原文: https://kiya-z.github.io/2015/11/17/parse-dex-file-part-dex-header/
解析 dex 文件结构 - DexHeader
简介
dex 文件是 dalvik 虚拟机的可执行文件。
dex 文件结构
该结构位于系统源码 dalvik\libdex\DexFile.h
中,描述的是
dex 文件被映射到内存中的结构。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
/* * Structure representing a DEX file. * * Code should regard DexFile as opaque, using the API calls provided here * to access specific structures. */ struct DexFile { /* directly-mapped "opt" header */ const DexOptHeader* pOptHeader; /* pointers to directly-mapped structs and arrays in base DEX */ const DexHeader* pHeader; const DexStringId* pStringIds; const DexTypeId* pTypeIds; const DexFieldId* pFieldIds; const DexMethodId* pMethodIds; const DexProtoId* pProtoIds; const DexClassDef* pClassDefs; const DexLink* pLinkData; /* * These are mapped out of the "auxillary" section, and may not be * included in the file. */ const DexClassLookup* pClassLookup; const void* pRegisterMapPool; // RegisterMapClassPool /* points to start of DEX file data */ const u1* baseAddr; /* track memory overhead for auxillary structures */ int overhead; /* additional app-specific data structures associated with the DEX */ //void* auxData; }; |
基本的文件结构只需关注:
1 2 3 4 5 6 7 8 9 10 11 |
struct DexFile{ DexHeader Header; DexStringId StringIds[stringIdsSize]; DexTypeId TypeIds[typeIdsSize]; DexFieldId FieldIds[fieldIdsSize]; DexMethodId MethodIds[methodIdsSize]; DexProtoId ProtoIds[protoIdsSize]; DexClassDef ClassDefs[classDefsSize]; DexData Data[]; DexLink LinkData; }; |
大体结构图
DexHeader 结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
/* * 160-bit SHA-1 digest. */ enum { kSHA1DigestLen = 20, kSHA1DigestOutputLen = kSHA1DigestLen*2 +1 }; /* * Direct-mapped "header_item" struct. */ struct DexHeader { u1 magic[8]; /* 版本标识 */ u4 checksum; /* adler32 检验 */ u1 signature[kSHA1DigestLen]; /* SHA-1 哈希值 */ u4 fileSize; /* 整个文件大小 */ u4 headerSize; /* DexHeader 大小 */ u4 endianTag; /* 字节序标记 */ u4 linkSize; /* 链接段大小 */ u4 linkOff; /* 链接段偏移 */ u4 mapOff; /* DexMapList 的文件偏移 */ u4 stringIdsSize; /* DexStringId 的个数 */ u4 stringIdsOff; /* DexStringId 的文件偏移 */ u4 typeIdsSize; /* DexTypeId 的个数 */ u4 typeIdsOff; /* DexTypeId 的文件偏移 */ u4 protoIdsSize; /* DexProtoId 的个数 */ u4 protoIdsOff; /* DexProtoId 的文件偏移 */ u4 fieldIdsSize; /* DexFieldId 的个数 */ u4 fieldIdsOff; /* DexFieldId 的文件偏移 */ u4 methodIdsSize; /* DexMethodId 的个数 */ u4 methodIdsOff; /* DexMethodId 的文件偏移 */ u4 classDefsSize; /* DexClassDef 的个数 */ u4 classDefsOff; /* DexClassDef 的文件偏移 */ u4 dataSize; /* 数据段的大小 */ u4 dataOff; /* 数据段的文件偏移 */ }; |
其中:
magic : dex文件标识,值必须为常量 DEX_FILE_MAGIC
;
1 2 |
ubyte[8] DEX_FILE_MAGIC = { 0x64 0x65 0x78 0x0a 0x30 0x33 0x35 0x00 } = "dex\n035\0" |
checkSum : 对除 magic 和 checkSum 外的剩余文件计算 adler32 校验值,目的是检测文件是否损坏;
signature:对除 magic、checkSum 和 signature 外的剩余文件计算 SHA-1 校验值,用来确定文件的唯一性
fileSize:以字节为单位,整个文件(包括头部)的大小;
headerSize:头部大小,0x70 字节,已经考虑到兼容性;
endianTag:两种字节序取值;
1 2 |
uint ENDIAN_CONSTANT = 0x12345678; uint REVERSE_ENDIAN_CONSTANT = 0x78563412; |
linkSize:链接段的大小,如果没有使用静态链接,值为0;
linkOff:链接段的文件偏移,指向链接数据段内,如果 linkSize
为
0,则为 0;
mapOff:map item 的文件偏移,指向数据段内,数据结构为 mapList
,如果没有
map,值为 0;
stringIdsSize:字符串 id 的个数;
stringIdsOff:字符串 id 清单的文件偏移,指向 stringIds
的起始地址,如果
stringIdsSize 为 0,值为0;
typeIdsSize:类型标识符的个数;
typeIdsOff:类型标识符清单的文件偏移,指向 typeIds
的起始地址,如果
typeIdsSize 为 0,值为 0 (这就很奇怪了哟);
protoIdsSize:原型标识符的个数;
protoIdsOff:原型标识符清单的文件偏移,指向 protoIds
的起始地址,如果
protoIdsSize 为 0,值为 0 (这就很奇怪了哟);
fieldIdsSize:字段标识符的个数;
fieldIdsOff:字段标识符清单的文件偏移,指向 fieldIds
的起始地址,如果
fieldIdsSize 为 0,值为 0;
methodIdsSize:方法标识符的个数;
methodIdsOff:方法标识符清单的文件偏移,指向 methodIds
的起始地址,如果
methodIdsSize 为 0,值为 0;
classDefsSize:类的个数;
classDefsOff:类清单的文件偏移,指向 classDefs
的起始地址,如果
classDefsSize 为 0,值为 0 (这就很奇怪了哟);
dataSize:数据段的大小,以字节为单位,并且是 sizeof(uint) 的偶数倍;
dataOff:数据段的文件偏移。
手工查找
1 2 3 4 5 6 7 |
00000000 64 65 78 0a 30 33 35 00 3b ba fe c3 83 7e aa be |dex.035.;....~..| 00000010 09 97 71 1e 17 96 9f e9 0c bd 01 60 b4 2a 1a c9 |..q........`.*..| 00000020 c4 10 00 00 70 00 00 00 78 56 34 12 00 00 00 00 |....p...xV4.....| 00000030 00 00 00 00 00 10 00 00 5c 00 00 00 70 00 00 00 |........\...p...| 00000040 19 00 00 00 e0 01 00 00 12 00 00 00 44 02 00 00 |............D...| 00000050 01 00 00 00 1c 03 00 00 2b 00 00 00 24 03 00 00 |........+...$...| 00000060 02 00 00 00 7c 04 00 00 08 0c 00 00 bc 04 00 00 |....|...........| |
binary | field |
---|---|
64 65 78 0a 30 33 35 00 | magic |
3b ba fe c3 | checksum |
83 7e aa be 09 97 71 1e 17 96 9f e9 0c bd 01 60 b4 2a 1a c9 | signature |
c4 10 00 00 | fileSize |
70 00 00 00 | headerSize |
78 56 34 12 | endianTag |
00 00 00 00 | linkSize |
00 00 00 00 | linkOff |
00 10 00 00 | mapOff |
5c 00 00 00 | stringIdsSize |
70 00 00 00 | stringIdsOff |
19 00 00 00 | typeIdsSize |
e0 01 00 00 | typeIdsOff |
12 00 00 00 | protoIdsSize |
44 02 00 00 | protoIdsOff |
01 00 00 00 | fieldIdsSize |
1c 03 00 00 | fieldIdsOff |
2b 00 00 00 | methodIdsSize |
24 03 00 00 | methodIdsOff |
02 00 00 00 | classDefsSize |
7c 04 00 00 | classDefsOff |
08 0c 00 00 | dataSize |
bc 04 00 00 | dataOff |
写程序解析 DexHeader
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import struct class DexStruct(object): DexHeader = { "magic": 0, "checkSum": 0, 'signature': 0, 'fileSize': 0, "headerSize": 0, "endianTag": 0, "linkSize": 0, "linkOff": 0, "mapOff": 0, "stringIdsSize": 0, "stringIdsOff": 0, "typeIdsSize": 0, "typeIdsOff": 0, "protoIdsSize": 0, "protoIdsOff": 0, "fieldIdsSize": 0, "fieldIdsOff": 0, "methodIdsSize": 0, "methodIdsOff": 0, "classDefsSize": 0, "classDefsOff": 0, "dataSize": 0, "dataOff": 0, } def parseHeader(header_data): header_list = [header_data[i:i+4] for i in range(32,112,4)] header_list.insert(0,header_data[12:32]) header_list.insert(0,header_data[8:12]) header_list.insert(0,header_data[:8]) DexStruct.DexHeader['magic'] = struct.unpack('8s',header_list[0])[0] if DexStruct.DexHeader['magic'] != "dex\n035\0": print 'invalid dex file.' exit(-1) DexStruct.DexHeader['checkSum'] = struct.unpack('I',header_list[1])[0] DexStruct.DexHeader['signature'] = struct.unpack('20s',header_list[2])[0] DexStruct.DexHeader['fileSize'] = struct.unpack('I',header_list[3])[0] DexStruct.DexHeader['headerSize'] = struct.unpack('I',header_list[4])[0] DexStruct.DexHeader['endianTag'] = struct.unpack('I',header_list[5])[0] DexStruct.DexHeader['linkSize'] = struct.unpack('I',header_list[6])[0] DexStruct.DexHeader['linkOff'] = struct.unpack('I',header_list[7])[0] DexStruct.DexHeader['mapOff'] = struct.unpack('I',header_list[8])[0] DexStruct.DexHeader['stringIdsSize'] = struct.unpack('I',header_list[9])[0] DexStruct.DexHeader['stringIdsOff'] = struct.unpack('I',header_list[10])[0] DexStruct.DexHeader['typeIdsSize'] = struct.unpack('I',header_list[11])[0] DexStruct.DexHeader['typeIdsOff'] = struct.unpack('I',header_list[12])[0] DexStruct.DexHeader['protoIdsSize'] = struct.unpack('I',header_list[13])[0] DexStruct.DexHeader['protoIdsOff'] = struct.unpack('I',header_list[14])[0] DexStruct.DexHeader['fieldIdsSize'] = struct.unpack('I',header_list[15])[0] DexStruct.DexHeader['fieldIdsOff'] = struct.unpack('I',header_list[16])[0] DexStruct.DexHeader['methodIdsSize'] = struct.unpack('I',header_list[17])[0] DexStruct.DexHeader['methodIdsOff'] = struct.unpack('I',header_list[18])[0] DexStruct.DexHeader['classDefsSize'] = struct.unpack('I',header_list[19])[0] DexStruct.DexHeader['classDefsOff'] = struct.unpack('I',header_list[20])[0] DexStruct.DexHeader['dataSize'] = struct.unpack('I',header_list[21])[0] DexStruct.DexHeader['dataOff'] = struct.unpack('I',header_list[22])[0] if __name__ == '__main__': with open("classes.dex", 'rb') as f: parseHeader(f.read(0x70)) for x in DexStruct.DexHeader: print x, hex(DexStruct.DexHeader[x]) |