- sevaa/dwex: DWARF Explorer - a GUI utility for navigating the DWARF debug information (github.com)
- eliben/pyelftools: Parsing ELF and DWARF in Python (github.com)
- 8 调试信息标准: DWARF · GitBook (hitzhangjie.pro)
1.需求
通过elf获取到原文件中的相关数据定义,例如,c语言结构体,enmu,union等。
调查后可通过elf中的DWARF信息获取, DWARF信息在ELF的.debug_info段中。
2.DWARF简介
DWARF 是一种广泛使用的标准调试信息格式,最初DWARF的设计初衷是配合ELF格式使用,不过DWARF与具体的文件格式是没有依赖关系的。DWARF这个词是中世纪幻想小说中的用语,也没有什么官方含义,后来才提出 “Debugging With Attributed Record Formats” 这个术语来作为DWARF的另一种定义。
DWARF使用DIE(Debugging Information Entry)来描述变量、数据类型、代码等,DIE中包含了标签(Tag)和一系列属性(Attributes)。
DWARF还定义了一些关键的数据结构,如行号表(Line Number Table)、调用栈信息(Call Frame Information)等,有了这些关键数据结构之后,开发者就可以在源码级别动态添加断点、显示完整的调用栈信息、查看调用栈中指定栈帧的信息。
CU - Compilation Unit
DIE - Debugging Information Entry
- DW_TAG_xxxx: DIE的类型
- DW_AT_XXX: DIE的属性
- DIE也有父子关系,兄弟关系,例如:一个structure的成员变量就是structure DIE的子DIE.
3.如何解析
1.通过readelf tool
readelf -w xxx.elf
2. 通过python lib - pyelftools
使用示例:获取linux kernel module的各种structure定义。
import argparse
import json
import os
from collections import defaultdict
from typing import Optional
from elftools.dwarf.die import DIE
from elftools.elf.elffile import ELFFile
from loguru import logger
logger.add('test.log')
Map_TypePrefix = {
'DW_TAG_base_type': '',
'DW_TAG_structure_type': 'struct ',
'DW_TAG_union_type': 'union ',
'DW_TAG_pointer_type': 'pointer '
}
Map_AnonTypes = {
'DW_TAG_subroutine_type': 'subroutine',
'DW_TAG_pointer_type': 'pointer',
'DW_TAG_union_type': 'union'
}
# recursive function to get type of a DIE node
def die_type_rec(die: DIE, prev: Optional[DIE]):
t = die.attributes.get("DW_AT_type")
if t is None:
# logger.debug(die)
prefix = '* ' if prev.tag == 'DW_TAG_pointer_type' else ''
# got a type
if die.attributes.get("DW_AT_name"):
# common named type with prefix
return prefix + Map_TypePrefix.get(die.tag, f'unknown: {die.tag}') \
+ die.attributes.get("DW_AT_name").value.decode()
elif die.tag == 'DW_TAG_structure_type' and prev.tag == 'DW_TAG_typedef':
# typedef-ed anonymous struct
return prefix + 'struct ' + prev.attributes.get("DW_AT_name").value.decode()
else:
# no name types
return prefix + Map_AnonTypes.get(die.tag, f'unknown: {die.tag}')
elif t.form == 'DW_FORM_ref4':
ref = t.value
ref_die = dwarfinfo.get_DIE_from_refaddr(ref + die.cu.cu_offset)
return die_type_rec(ref_die, die)
# recursive function to get all struct members
def die_info_rec(die: DIE, name=''):
# logger.debug(die)
if die.tag == 'DW_TAG_member' and die.attributes.get("DW_AT_name"):
member_name = die.attributes.get("DW_AT_name").value.decode()
member_type = die_type_rec(die, None)
if die.attributes.get("DW_AT_data_member_location"):
member_offset = die.attributes.get("DW_AT_data_member_location").value
logger.debug(' > .{}, type: {}, offset: {}'.format(member_name, member_type, member_offset))
elif die.attributes.get("DW_AT_bit_size") and die.attributes.get("DW_AT_data_bit_offset"):
member_bit_size = die.attributes.get("DW_AT_bit_size").value
member_bit_offset = die.attributes.get("DW_AT_data_bit_offset").value
logger.debug(' > .{}, type: {}, bit_offset: {}, bit_size: {}'.format(member_name,
member_type, member_bit_size,
member_bit_offset))
# save to return data
if member_type.startswith('*'):
# pointer member, change to *name -> type
struct_data[name]['*' + member_name] = member_type[1:]
else:
struct_data[name][member_name] = member_type
if die.tag == 'DW_TAG_structure_type' and die.attributes.get("DW_AT_name"):
name = 'struct ' + die.attributes.get("DW_AT_name").value.decode()
if die.attributes.get("DW_AT_declaration") and die.attributes.get("DW_AT_declaration").value == 1:
logger.debug("struct {}: just a declaration".format(name))
return
size = die.attributes.get("DW_AT_byte_size").value
logger.debug("{}, size:{}".format(name, size))
# recursion into all children DIE
for child in die.iter_children():
die_info_rec(child, name)
def parse_top_die_by_cu(dwarfinfo):
j = 0
for CU in dwarfinfo.iter_CUs():
j = j + 1
logger.debug(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length']))
# Start with the top DIE, the root for this CU's DIE tree
top_DIE = CU.get_top_DIE()
logger.debug("------------------------Top Die[{}] start-----------------------------------------".format(j))
logger.debug(top_DIE)
# Display DIEs recursively starting with top_DIE
i = 0
for child in top_DIE.iter_children():
# for child in CU.iter_DIEs():
i = i + 1
logger.debug("Top Die[{}]->child[{}]:", j, i)
die_info_rec(child)
logger.debug("------------------------Top Die[{}] end-----------------------------------------".format(j))
# dict for all struct members
struct_data = defaultdict(dict)
elf_file = ".\\test.ko"
print('Processing file:', elf_file)
f = open(elf_file, 'rb')
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(f'ERROR: input file {elf_file} has no DWARF info')
exit(1)
dwarfinfo = elffile.get_dwarf_info()
parse_top_die_by_cu(dwarfinfo)
f.close()
运行结果