解析 dex 文件结构 - DexHeader

简介

dex 文件是 dalvik 虚拟机的可执行文件。

dex 文件结构

该结构位于系统源码 dalvik\libdex\DexFile.h 中，描述的是 dex 文件被映射到内存中的结构。

/*
 * Structure representing a DEX file.
 *
 * Code should regard DexFile as opaque, using the API calls provided here
 * to access specific structures.
 */
struct DexFile {
    /* directly-mapped "opt" header */
    const DexOptHeader* pOptHeader;

    /* pointers to directly-mapped structs and arrays in base DEX */
    const DexHeader*    pHeader;
    const DexStringId*  pStringIds;
    const DexTypeId*    pTypeIds;
    const DexFieldId*   pFieldIds;
    const DexMethodId*  pMethodIds;
    const DexProtoId*   pProtoIds;
    const DexClassDef*  pClassDefs;
    const DexLink*      pLinkData;

    /*
     * These are mapped out of the "auxillary" section, and may not be
     * included in the file.
     */
    const DexClassLookup* pClassLookup;
    const void*         pRegisterMapPool;       // RegisterMapClassPool

    /* points to start of DEX file data */
    const u1*           baseAddr;

    /* track memory overhead for auxillary structures */
    int                 overhead;

    /* additional app-specific data structures associated with the DEX */
    //void*               auxData;
};

基本的文件结构只需关注:

struct DexFile{
    DexHeader    Header;
    DexStringId  StringIds[stringIdsSize];
    DexTypeId    TypeIds[typeIdsSize];
    DexFieldId   FieldIds[fieldIdsSize];
    DexMethodId  MethodIds[methodIdsSize];
    DexProtoId   ProtoIds[protoIdsSize];
    DexClassDef  ClassDefs[classDefsSize];
    DexData      Data[];
    DexLink      LinkData;
};

大体结构图

DexHeader 结构

/*
 * 160-bit SHA-1 digest.
 */
enum { kSHA1DigestLen = 20,
       kSHA1DigestOutputLen = kSHA1DigestLen*2 +1 };

/*
 * Direct-mapped "header_item" struct.
 */
struct DexHeader {
    u1  magic[8];           /* 版本标识 */
    u4  checksum;           /* adler32 检验 */
    u1  signature[kSHA1DigestLen]; /* SHA-1 哈希值 */
    u4  fileSize;           /* 整个文件大小 */
    u4  headerSize;         /* DexHeader 大小 */
    u4  endianTag;          /* 字节序标记 */
    u4  linkSize;           /* 链接段大小 */
    u4  linkOff;            /* 链接段偏移 */
    u4  mapOff;             /* DexMapList 的文件偏移 */
    u4  stringIdsSize;      /* DexStringId 的个数 */
    u4  stringIdsOff;       /* DexStringId 的文件偏移 */
    u4  typeIdsSize;        /* DexTypeId 的个数 */
    u4  typeIdsOff;         /* DexTypeId 的文件偏移 */
    u4  protoIdsSize;       /* DexProtoId 的个数 */
    u4  protoIdsOff;        /* DexProtoId 的文件偏移 */
    u4  fieldIdsSize;       /* DexFieldId 的个数 */
    u4  fieldIdsOff;        /* DexFieldId 的文件偏移 */
    u4  methodIdsSize;      /* DexMethodId 的个数 */
    u4  methodIdsOff;       /* DexMethodId 的文件偏移 */
    u4  classDefsSize;      /* DexClassDef 的个数 */
    u4  classDefsOff;       /* DexClassDef 的文件偏移 */
    u4  dataSize;           /* 数据段的大小 */
    u4  dataOff;            /* 数据段的文件偏移 */
};

其中：

magic ： dex文件标识，值必须为常量 DEX_FILE_MAGIC；

1 2	ubyte[8] DEX_FILE_MAGIC = { 0x64 0x65 0x78 0x0a 0x30 0x33 0x35 0x00 } = "dex\n035\0"

checkSum ：对除 magic 和 checkSum 外的剩余文件计算 adler32 校验值，目的是检测文件是否损坏；

signature：对除 magic、checkSum 和 signature 外的剩余文件计算 SHA-1 校验值，用来确定文件的唯一性

fileSize：以字节为单位，整个文件（包括头部）的大小；

headerSize：头部大小，0x70 字节，已经考虑到兼容性；

endianTag：两种字节序取值；

1 2	uint ENDIAN_CONSTANT = 0x12345678; uint REVERSE_ENDIAN_CONSTANT = 0x78563412;

linkSize：链接段的大小，如果没有使用静态链接，值为0；

linkOff：链接段的文件偏移，指向链接数据段内，如果 linkSize 为 0，则为 0；

mapOff：map item 的文件偏移，指向数据段内，数据结构为 mapList，如果没有 map，值为 0；

stringIdsSize：字符串 id 的个数；

stringIdsOff：字符串 id 清单的文件偏移，指向 stringIds 的起始地址，如果 stringIdsSize 为 0，值为0；

typeIdsSize：类型标识符的个数；

typeIdsOff：类型标识符清单的文件偏移，指向 typeIds 的起始地址，如果 typeIdsSize 为 0，值为 0 (这就很奇怪了哟)；

protoIdsSize：原型标识符的个数；

protoIdsOff：原型标识符清单的文件偏移，指向 protoIds 的起始地址，如果 protoIdsSize 为 0，值为 0 (这就很奇怪了哟)；

fieldIdsSize：字段标识符的个数；

fieldIdsOff：字段标识符清单的文件偏移，指向 fieldIds 的起始地址，如果 fieldIdsSize 为 0，值为 0；

methodIdsSize：方法标识符的个数；

methodIdsOff：方法标识符清单的文件偏移，指向 methodIds 的起始地址，如果 methodIdsSize 为 0，值为 0；

classDefsSize：类的个数；

classDefsOff：类清单的文件偏移，指向 classDefs 的起始地址，如果 classDefsSize 为 0，值为 0 (这就很奇怪了哟)；

dataSize：数据段的大小，以字节为单位，并且是 sizeof(uint) 的偶数倍；

dataOff：数据段的文件偏移。

手工查找

00000000  64 65 78 0a 30 33 35 00  3b ba fe c3 83 7e aa be  |dex.035.;....~..|
00000010  09 97 71 1e 17 96 9f e9  0c bd 01 60 b4 2a 1a c9  |..q........`.*..|
00000020  c4 10 00 00 70 00 00 00  78 56 34 12 00 00 00 00  |....p...xV4.....|
00000030  00 00 00 00 00 10 00 00  5c 00 00 00 70 00 00 00  |........\...p...|
00000040  19 00 00 00 e0 01 00 00  12 00 00 00 44 02 00 00  |............D...|
00000050  01 00 00 00 1c 03 00 00  2b 00 00 00 24 03 00 00  |........+...$...|
00000060  02 00 00 00 7c 04 00 00  08 0c 00 00 bc 04 00 00  |....|...........|

binary	field
64 65 78 0a 30 33 35 00	magic
3b ba fe c3	checksum
83 7e aa be 09 97 71 1e 17 96 9f e9 0c bd 01 60 b4 2a 1a c9	signature
c4 10 00 00	fileSize
70 00 00 00	headerSize
78 56 34 12	endianTag
00 00 00 00	linkSize
00 00 00 00	linkOff
00 10 00 00	mapOff
5c 00 00 00	stringIdsSize
70 00 00 00	stringIdsOff
19 00 00 00	typeIdsSize
e0 01 00 00	typeIdsOff
12 00 00 00	protoIdsSize
44 02 00 00	protoIdsOff
01 00 00 00	fieldIdsSize
1c 03 00 00	fieldIdsOff
2b 00 00 00	methodIdsSize
24 03 00 00	methodIdsOff
02 00 00 00	classDefsSize
7c 04 00 00	classDefsOff
08 0c 00 00	dataSize
bc 04 00 00	dataOff

写程序解析 DexHeader

import struct

class DexStruct(object):
    DexHeader = {
          "magic": 0,
          "checkSum": 0,
          'signature': 0,
          'fileSize': 0,
          "headerSize": 0,
          "endianTag": 0,
          "linkSize": 0,
          "linkOff": 0,
          "mapOff": 0,
          "stringIdsSize": 0,
          "stringIdsOff": 0,
          "typeIdsSize": 0,
          "typeIdsOff": 0,
          "protoIdsSize": 0,
          "protoIdsOff": 0,
          "fieldIdsSize": 0,
          "fieldIdsOff": 0,
          "methodIdsSize": 0,
          "methodIdsOff": 0,
          "classDefsSize": 0,
          "classDefsOff": 0,
          "dataSize": 0,
          "dataOff": 0,   }


def parseHeader(header_data):

        header_list = [header_data[i:i+4] for i in range(32,112,4)]
        header_list.insert(0,header_data[12:32])
        header_list.insert(0,header_data[8:12])
        header_list.insert(0,header_data[:8])

        DexStruct.DexHeader['magic'] = struct.unpack('8s',header_list[0])[0]
        if DexStruct.DexHeader['magic'] != "dex\n035\0":
            print 'invalid dex file.'
            exit(-1)
        DexStruct.DexHeader['checkSum'] = struct.unpack('I',header_list[1])[0]
        DexStruct.DexHeader['signature'] = struct.unpack('20s',header_list[2])[0]
        DexStruct.DexHeader['fileSize'] = struct.unpack('I',header_list[3])[0]
        DexStruct.DexHeader['headerSize'] = struct.unpack('I',header_list[4])[0]
        DexStruct.DexHeader['endianTag'] = struct.unpack('I',header_list[5])[0]
        DexStruct.DexHeader['linkSize'] = struct.unpack('I',header_list[6])[0]
        DexStruct.DexHeader['linkOff'] = struct.unpack('I',header_list[7])[0]
        DexStruct.DexHeader['mapOff'] = struct.unpack('I',header_list[8])[0]
        DexStruct.DexHeader['stringIdsSize'] = struct.unpack('I',header_list[9])[0]
        DexStruct.DexHeader['stringIdsOff'] = struct.unpack('I',header_list[10])[0]
        DexStruct.DexHeader['typeIdsSize'] = struct.unpack('I',header_list[11])[0]
        DexStruct.DexHeader['typeIdsOff'] = struct.unpack('I',header_list[12])[0]
        DexStruct.DexHeader['protoIdsSize'] = struct.unpack('I',header_list[13])[0]
        DexStruct.DexHeader['protoIdsOff'] = struct.unpack('I',header_list[14])[0]
        DexStruct.DexHeader['fieldIdsSize'] = struct.unpack('I',header_list[15])[0]
        DexStruct.DexHeader['fieldIdsOff'] = struct.unpack('I',header_list[16])[0]
        DexStruct.DexHeader['methodIdsSize'] = struct.unpack('I',header_list[17])[0]
        DexStruct.DexHeader['methodIdsOff'] = struct.unpack('I',header_list[18])[0]
        DexStruct.DexHeader['classDefsSize'] = struct.unpack('I',header_list[19])[0]
        DexStruct.DexHeader['classDefsOff'] = struct.unpack('I',header_list[20])[0]
        DexStruct.DexHeader['dataSize'] = struct.unpack('I',header_list[21])[0]
        DexStruct.DexHeader['dataOff'] = struct.unpack('I',header_list[22])[0]


if __name__ == '__main__':

    with open("classes.dex", 'rb') as f:
        parseHeader(f.read(0x70))
        for x in DexStruct.DexHeader:
            print x, hex(DexStruct.DexHeader[x])

Reference

《Android 软件安全与逆向分析》

Android安全–Dex文件格式详解