1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 import re
25
27 """A parser for DWARF files."""
28
29
30 dwarf_header_regex = re.compile(
31 r'<(?P<level>\d+)><(?P<statement_id>[0-9+]+)><(?P<kind>\w+)>')
32 dwarf_key_val_regex = re.compile(
33 '\s*(?P<keyname>\w+)<(?P<val>[^>]*)>')
34
35 dwarf_header_regex2 = re.compile(r'<(?P<level>\d+)><(?P<statement_id>0x[0-9a-fA-F]+([+]0x[0-9a-fA-F]+)?)><(?P<kind>\w+)>')
36
37 sz2tp = {8: 'long long', 4: 'int', 2: 'short', 1: 'char'}
38 tp2vol = {
39 '_Bool': 'unsigned char',
40 'char': 'char',
41 'float': 'float',
42 'double': 'double',
43 'long double': 'double',
44 'int': 'int',
45 'long int': 'long',
46 'long long int': 'long long',
47 'long long unsigned int': 'unsigned long long',
48 'long unsigned int': 'unsigned long',
49 'short int': 'short',
50 'short unsigned int': 'unsigned short',
51 'signed char': 'signed char',
52 'unsigned char': 'unsigned char',
53 'unsigned int': 'unsigned int',
54 'sizetype' : 'unsigned long',
55 }
56
57
59 self.current_level = -1
60 self.name_stack = []
61 self.id_to_name = {}
62 self.all_vtypes = {}
63 self.vtypes = {}
64 self.enums = {}
65 self.all_vars = {}
66 self.vars = {}
67 self.all_local_vars = []
68 self.local_vars = []
69 self.anons = 0
70 self.base = 10
71
72 if data:
73 for line in data.splitlines():
74 self.feed_line(line)
75
77 """Lookup anonymous member and replace it with a well known one."""
78
79 if isinstance(memb, str) and memb.startswith('<'):
80 if memb[1:3] == "0x":
81 memb = "<0x" + memb[3:].lstrip('0')
82
83 resolved = self.id_to_name[memb[1:]]
84
85 return self.resolve(resolved)
86
87 elif isinstance(memb, list):
88 return [self.resolve(r) for r in memb]
89 else:
90
91 return memb
92
100
102 """Recursively replace anonymous references."""
103 if t == search:
104 return repl
105
106 elif isinstance(t, list):
107 return [self.deep_replace(x, search, repl) for x in t]
108 else:
109 return t
110
112 if isinstance(t, list):
113 if len(t) == 1:
114 return t[0]
115 else:
116 for part in t:
117 res = self.get_deepest(part)
118 if res:
119 return res
120
121 return None
122
123 return None
124
126 """Replace references to base types."""
127 if 'DW_AT_name' in data:
128 return self.tp2vol[data['DW_AT_name'].strip('"')]
129 else:
130 sz = int(data['DW_AT_byte_size'], self.base)
131 if data['DW_AT_encoding'] == 'DW_ATE_unsigned':
132 return 'unsigned ' + self.sz2tp[sz]
133 else:
134 return self.sz2tp[sz]
135
137 """Accepts another line from the input.
138
139 A DWARF line looks like:
140 <2><1442><DW_TAG_member> DW_AT_name<fs> ...
141
142 The header is level, statement_id, and kind followed by key value pairs.
143 """
144
145 m = self.dwarf_header_regex.match(line)
146
147 if self.dwarf_header_regex2.match(line):
148 m = self.dwarf_header_regex2.match(line)
149 self.base = 16
150
151 if m:
152 parsed = m.groupdict()
153 parsed['data'] = {}
154
155 while m:
156 i = m.end()
157 m = self.dwarf_key_val_regex.search(line, i)
158 if m:
159 d = m.groupdict()
160 parsed['data'][d['keyname']] = d['val']
161
162 if parsed['kind'] in ('DW_TAG_formal_parameter', 'DW_TAG_variable'):
163 self.process_variable(parsed['data'])
164 else:
165 self.process_statement(**parsed)
166
168 """Process a single parsed statement."""
169 new_level = int(level)
170 if new_level > self.current_level:
171 self.current_level = new_level
172 self.name_stack.append([])
173 elif new_level < self.current_level:
174 self.name_stack = self.name_stack[:new_level + 1]
175 self.current_level = new_level
176
177 self.name_stack[-1] = [kind, statement_id]
178
179 try:
180 parent_kind, parent_name = self.name_stack[-2]
181 except IndexError:
182 parent_kind, parent_name = (None, None)
183
184 if kind == 'DW_TAG_compile_unit':
185 self.finalize()
186 self.vtypes = {}
187 self.vars = {}
188 self.all_local_vars += self.local_vars
189 self.local_vars = []
190 self.id_to_name = {}
191
192 elif kind == 'DW_TAG_structure_type':
193 name = data.get('DW_AT_name', "__unnamed_%s" % statement_id).strip('"')
194
195 self.name_stack[-1][1] = name
196 self.id_to_name[statement_id] = [name]
197
198
199
200 if 'DW_AT_declaration' not in data:
201 self.vtypes[name] = [ int(data['DW_AT_byte_size'], self.base), {} ]
202
203 elif kind == 'DW_TAG_union_type':
204 name = data.get('DW_AT_name', "__unnamed_%s" % statement_id).strip('"')
205 self.name_stack[-1][1] = name
206 self.id_to_name[statement_id] = [name]
207 self.vtypes[name] = [ int(data['DW_AT_byte_size'], self.base), {} ]
208
209 elif kind == 'DW_TAG_array_type':
210 self.name_stack[-1][1] = statement_id
211 self.id_to_name[statement_id] = data['DW_AT_type']
212
213 elif kind == 'DW_TAG_enumeration_type':
214 name = data.get('DW_AT_name', "__unnamed_%s" % statement_id).strip('"')
215 self.name_stack[-1][1] = name
216 self.id_to_name[statement_id] = [name]
217
218
219
220 if 'DW_AT_declaration' not in data:
221 sz = int(data['DW_AT_byte_size'], self.base)
222 self.enums[name] = [sz, {}]
223
224 elif kind == 'DW_TAG_pointer_type':
225 self.id_to_name[statement_id] = ['pointer', data.get('DW_AT_type', ['void'])]
226
227 elif kind == 'DW_TAG_base_type':
228 self.id_to_name[statement_id] = [self.base_type_name(data)]
229
230 elif kind == 'DW_TAG_volatile_type':
231 self.id_to_name[statement_id] = data.get('DW_AT_type', ['void'])
232
233 elif kind == 'DW_TAG_const_type':
234 self.id_to_name[statement_id] = data.get('DW_AT_type', ['void'])
235
236 elif kind == 'DW_TAG_typedef':
237 self.id_to_name[statement_id] = data['DW_AT_type']
238
239 elif kind == 'DW_TAG_subroutine_type':
240 self.id_to_name[statement_id] = ['void']
241
242 elif kind == 'DW_TAG_variable' and level == '1':
243 if 'DW_AT_location' in data:
244 split = data['DW_AT_location'].split()
245 if len(split) > 1:
246 loc = int(split[1], 0)
247 self.vars[data['DW_AT_name']] = [loc, data['DW_AT_type']]
248
249 elif kind == 'DW_TAG_subprogram':
250
251 pass
252
253 elif kind == 'DW_TAG_member' and parent_kind == 'DW_TAG_structure_type':
254 name = data.get('DW_AT_name', "__unnamed_%s" % statement_id).strip('"')
255 try:
256 off = int(data['DW_AT_data_member_location'].split()[1])
257 except:
258 d = data['DW_AT_data_member_location']
259 idx = d.find("(")
260
261 if idx != -1:
262 d = d[:idx]
263
264 off = int(d)
265
266 if 'DW_AT_bit_size' in data and 'DW_AT_bit_offset' in data:
267 full_size = int(data['DW_AT_byte_size'], self.base) * 8
268 stbit = int(data['DW_AT_bit_offset'], self.base)
269 edbit = stbit + int(data['DW_AT_bit_size'], self.base)
270 stbit = full_size - stbit
271 edbit = full_size - edbit
272 stbit, edbit = edbit, stbit
273 assert stbit < edbit
274 memb_tp = ['BitField', dict(start_bit = stbit, end_bit = edbit)]
275 else:
276 memb_tp = data['DW_AT_type']
277
278 self.vtypes[parent_name][1][name] = [off, memb_tp]
279
280 elif kind == 'DW_TAG_member' and parent_kind == 'DW_TAG_union_type':
281 name = data.get('DW_AT_name', "__unnamed_%s" % statement_id).strip('"')
282 self.vtypes[parent_name][1][name] = [0, data['DW_AT_type']]
283
284 elif kind == 'DW_TAG_enumerator' and parent_kind == 'DW_TAG_enumeration_type':
285 name = data['DW_AT_name'].strip('"')
286
287 try:
288 val = int(data['DW_AT_const_value'])
289 except ValueError:
290 val = int(data['DW_AT_const_value'].split('(')[0], self.base)
291
292 self.enums[parent_name][1][name] = val
293
294 elif kind == 'DW_TAG_subrange_type' and parent_kind == 'DW_TAG_array_type':
295 if 'DW_AT_upper_bound' in data:
296 try:
297 sz = int(data['DW_AT_upper_bound'])
298 except ValueError:
299 try:
300 sz = int(data['DW_AT_upper_bound'].split('(')[0])
301 except ValueError:
302
303 sz = 0
304 sz += 1
305 else:
306 sz = 0
307
308 tp = self.id_to_name[parent_name]
309 self.id_to_name[parent_name] = ['array', sz, tp]
310 else:
311 pass
312
313
314
316 """Process a local variable."""
317 if ('DW_AT_name' in data and 'DW_AT_decl_line' in data and
318 'DW_AT_type' in data):
319 self.local_vars.append(
320 (data['DW_AT_name'], int(data['DW_AT_decl_line'], self.base),
321 data['DW_AT_decl_file'].split()[1], data['DW_AT_type']))
322
324 """Finalize the output."""
325 if self.vtypes:
326 self.vtypes = self.resolve_refs()
327 self.all_vtypes.update(self.vtypes)
328 if self.vars:
329 self.vars = dict(((k, self.resolve(v)) for k, v in self.vars.items()))
330 self.all_vars.update(self.vars)
331 if self.local_vars:
332 self.local_vars = [ (name, lineno, decl_file, self.resolve(tp)) for
333 (name, lineno, decl_file, tp) in self.local_vars ]
334 self.all_local_vars += self.local_vars
335
336
337
338 changed = True
339 while changed:
340 changed = False
341 s = set()
342 for m in self.all_vtypes:
343 for t in self.all_vtypes[m][1].values():
344 s.add(self.get_deepest(t))
345 for m in self.all_vars:
346 s.add(self.get_deepest(self.all_vars[m][1]))
347 for v in list(self.all_vtypes):
348 if v.startswith('__unnamed_') and v not in s:
349 del self.all_vtypes[v]
350 changed = True
351
352
353 for t in self.all_vtypes:
354 for m in list(self.all_vtypes[t][1]):
355 memb = self.all_vtypes[t][1][m]
356 d = self.get_deepest(memb)
357 if d in self.enums:
358 sz = self.enums[d][0]
359 vals = dict((v, k) for k, v in self.enums[d][1].items())
360 self.all_vtypes[t][1][m] = self.deep_replace(
361 memb, [d],
362 ['Enumeration', dict(target = self.sz2tp[sz], choices = vals)]
363 )
364
365 return self.all_vtypes
366
368 self.finalize()
369 result = "linux_types = {\n"
370
371 for t in self.all_vtypes:
372 result += " '%s': [ %#x, {\n" % (t, self.all_vtypes[t][0])
373 for m in sorted(self.all_vtypes[t][1], key = lambda m: self.all_vtypes[t][1][m][0]):
374 result += " '%s': [%#x, %s],\n" % (m, self.all_vtypes[t][1][m][0], self.all_vtypes[t][1][m][1])
375 result += "}],\n"
376 result += "}\n\n"
377
378 result += "linux_gvars = {\n"
379 for v in sorted(self.all_vars, key = lambda v: self.all_vars[v][0]):
380 result += " '%s': [%#010x, %s],\n" % (v, self.all_vars[v][0], self.all_vars[v][1])
381 result += "}\n"
382
383 return result
384
385
386 if __name__ == '__main__':
387 parser = DWARFParser()
388
389 for l in open(sys.argv[1]):
390 parser.feed_line(l)
391
392 print(parser.print_output())
393