1from __future__ import annotations
2
3import ctypes as _ctypes
4import struct
5import sys
6import types
7from pathlib import Path
8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast
9
10from dissect.cstruct.exceptions import ResolveError
11from dissect.cstruct.expression import Expression
12from dissect.cstruct.parser import CStyleParser, TokenParser
13from dissect.cstruct.types import (
14 LEB128,
15 Array,
16 BaseArray,
17 BaseType,
18 Char,
19 Enum,
20 Field,
21 Flag,
22 Int,
23 Packed,
24 Pointer,
25 Structure,
26 Union,
27 Void,
28 Wchar,
29)
30
31if TYPE_CHECKING:
32 from collections.abc import Iterable
33
34 from typing_extensions import TypeAlias
35
36
37T = TypeVar("T", bound=BaseType)
38
39
40class cstruct:
41 """Main class of cstruct. All types are registered in here.
42
43 Args:
44 endian: The endianness to use when parsing.
45 pointer: The pointer type to use for pointers.
46 """
47
48 DEF_CSTYLE = 1
49 DEF_LEGACY = 2
50
51 def __init__(self, endian: str = "<", pointer: str | None = None):
52 self.endian = endian
53
54 self.consts = {}
55 self.lookups = {}
56 # fmt: off
57 self.typedefs = {
58 # Internal types
59 "int8": self._make_packed_type("int8", "b", int),
60 "uint8": self._make_packed_type("uint8", "B", int),
61 "int16": self._make_packed_type("int16", "h", int),
62 "uint16": self._make_packed_type("uint16", "H", int),
63 "int32": self._make_packed_type("int32", "i", int),
64 "uint32": self._make_packed_type("uint32", "I", int),
65 "int64": self._make_packed_type("int64", "q", int),
66 "uint64": self._make_packed_type("uint64", "Q", int),
67 "float16": self._make_packed_type("float16", "e", float),
68 "float": self._make_packed_type("float", "f", float),
69 "double": self._make_packed_type("double", "d", float),
70 "char": self._make_type("char", (Char,), 1),
71 "wchar": self._make_type("wchar", (Wchar,), 2),
72
73 "int24": self._make_int_type("int24", 3, True, alignment=4),
74 "uint24": self._make_int_type("uint24", 3, False, alignment=4),
75 "int48": self._make_int_type("int48", 6, True, alignment=8),
76 "uint48": self._make_int_type("int48", 6, False, alignment=8),
77 "int128": self._make_int_type("int128", 16, True, alignment=16),
78 "uint128": self._make_int_type("uint128", 16, False, alignment=16),
79
80 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}),
81 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}),
82
83 "void": self._make_type("void", (Void,), 0),
84
85 # Common C types not covered by internal types
86 "signed char": "int8",
87 "unsigned char": "char",
88 "short": "int16",
89 "signed short": "int16",
90 "unsigned short": "uint16",
91 "int": "int32",
92 "signed int": "int32",
93 "unsigned int": "uint32",
94 "long": "int32",
95 "signed long": "int32",
96 "unsigned long": "uint32",
97 "long long": "int64",
98 "signed long long": "int64",
99 "unsigned long long": "uint64",
100
101 # Windows types
102 "BYTE": "uint8",
103 "CHAR": "char",
104 "SHORT": "int16",
105 "WORD": "uint16",
106 "DWORD": "uint32",
107 "LONG": "int32",
108 "LONG32": "int32",
109 "LONG64": "int64",
110 "LONGLONG": "int64",
111 "QWORD": "uint64",
112 "OWORD": "uint128",
113 "WCHAR": "wchar",
114
115 "UCHAR": "uint8",
116 "USHORT": "uint16",
117 "ULONG": "uint32",
118 "ULONG64": "uint64",
119 "ULONGLONG": "uint64",
120
121 "INT": "int32",
122 "INT8": "int8",
123 "INT16": "int16",
124 "INT32": "int32",
125 "INT64": "int64",
126 "INT128": "int128",
127
128 "UINT": "uint32",
129 "UINT8": "uint8",
130 "UINT16": "uint16",
131 "UINT32": "uint32",
132 "UINT64": "uint64",
133 "UINT128": "uint128",
134
135 "__int8": "int8",
136 "__int16": "int16",
137 "__int32": "int32",
138 "__int64": "int64",
139 "__int128": "int128",
140
141 "unsigned __int8": "uint8",
142 "unsigned __int16": "uint16",
143 "unsigned __int32": "uint32",
144 "unsigned __int64": "uint64",
145 "unsigned __int128": "uint128",
146
147 "wchar_t": "wchar",
148
149 # GNU C types
150 "int8_t": "int8",
151 "int16_t": "int16",
152 "int32_t": "int32",
153 "int64_t": "int64",
154 "int128_t": "int128",
155
156 "uint8_t": "uint8",
157 "uint16_t": "uint16",
158 "uint32_t": "uint32",
159 "uint64_t": "uint64",
160 "uint128_t": "uint128",
161
162 # IDA types
163 "_BYTE": "uint8",
164 "_WORD": "uint16",
165 "_DWORD": "uint32",
166 "_QWORD": "uint64",
167 "_OWORD": "uint128",
168
169 # Other convenience types
170 "u1": "uint8",
171 "u2": "uint16",
172 "u4": "uint32",
173 "u8": "uint64",
174 "u16": "uint128",
175 "__u8": "uint8",
176 "__u16": "uint16",
177 "__u32": "uint32",
178 "__u64": "uint64",
179 "uchar": "uint8",
180 "ushort": "uint16",
181 "uint": "uint32",
182 "ulong": "uint32",
183 }
184 # fmt: on
185
186 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32")
187 self.pointer: type[BaseType] = self.resolve(pointer)
188 self._anonymous_count = 0
189
190 def __getattr__(self, attr: str) -> Any:
191 try:
192 return self.consts[attr]
193 except KeyError:
194 pass
195
196 try:
197 return self.resolve(self.typedefs[attr])
198 except KeyError:
199 pass
200
201 raise AttributeError(f"Invalid attribute: {attr}")
202
203 def _next_anonymous(self) -> str:
204 name = f"__anonymous_{self._anonymous_count}__"
205 self._anonymous_count += 1
206 return name
207
208 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None:
209 """Add a type or type reference.
210
211 Only use this method when creating type aliases or adding already bound types.
212
213 Args:
214 name: Name of the type to be added.
215 type_: The type to be added. Can be a str reference to another type or a compatible type class.
216
217 Raises:
218 ValueError: If the type already exists.
219 """
220 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)):
221 raise ValueError(f"Duplicate type: {name}")
222
223 self.typedefs[name] = type_
224
225 addtype = add_type
226
227 def add_custom_type(
228 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs
229 ) -> None:
230 """Add a custom type.
231
232 Use this method to add custom types to this cstruct instance. This is largely a convenience method for
233 the internal :func:`_make_type` method, which binds a class to this cstruct instance.
234
235 Args:
236 name: Name of the type to be added.
237 type_: The type to be added.
238 size: The size of the type.
239 alignment: The alignment of the type.
240 **kwargs: Additional attributes to add to the type.
241 """
242 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs))
243
244 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct:
245 """Parse structures from the given definitions using the given definition type.
246
247 Definitions can be parsed using different parsers. Currently, there's
248 only one supported parser - DEF_CSTYLE. Parsers can add types and
249 modify this cstruct instance. Arguments can be passed to parsers
250 using kwargs.
251
252 The CSTYLE parser was recently replaced with token based parser,
253 instead of a strictly regex based one. The old parser is still available
254 by using DEF_LEGACY.
255
256 Args:
257 definition: The definition to parse.
258 deftype: The definition type to parse the definitions with.
259 **kwargs: Keyword arguments for parsers.
260 """
261 deftype = deftype or cstruct.DEF_CSTYLE
262
263 if deftype == cstruct.DEF_CSTYLE:
264 TokenParser(self, **kwargs).parse(definition)
265 elif deftype == cstruct.DEF_LEGACY:
266 CStyleParser(self, **kwargs).parse(definition)
267
268 return self
269
270 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None:
271 """Load structure definitions from a file.
272
273 The given path will be read and parsed using the :meth:`~cstruct.load` function.
274
275 Args:
276 path: The path to load definitions from.
277 deftype: The definition type to parse the definitions with.
278 **kwargs: Keyword arguments for parsers.
279 """
280 with Path(path).open() as fh:
281 self.load(fh.read(), deftype, **kwargs)
282
283 def read(self, name: str, stream: BinaryIO) -> Any:
284 """Parse data using a given type.
285
286 Args:
287 name: Type name to read.
288 stream: File-like object or byte string to parse.
289
290 Returns:
291 The parsed data.
292 """
293 return self.resolve(name).read(stream)
294
295 def resolve(self, name: type[BaseType] | str) -> type[BaseType]:
296 """Resolve a type name to get the actual type object.
297
298 Types can be referenced using different names. When we want
299 the actual type object, we need to resolve these references.
300
301 Args:
302 name: Type name to resolve.
303
304 Returns:
305 The resolved type object.
306
307 Raises:
308 ResolveError: If the type can't be resolved.
309 """
310 type_name = name
311 if not isinstance(type_name, str):
312 return type_name
313
314 for _ in range(10):
315 if type_name not in self.typedefs:
316 raise ResolveError(f"Unknown type {name}")
317
318 type_name = self.typedefs[type_name]
319
320 if not isinstance(type_name, str):
321 return type_name
322
323 raise ResolveError(f"Recursion limit exceeded while resolving type {name}")
324
325 def _make_type(
326 self,
327 name: str,
328 bases: Iterable[object],
329 size: int | None,
330 *,
331 alignment: int | None = None,
332 attrs: dict[str, Any] | None = None,
333 ) -> type[BaseType]:
334 """Create a new type class bound to this cstruct instance.
335
336 All types are created using this method. This method automatically binds the type to this cstruct instance.
337 """
338 attrs = attrs or {}
339 attrs.update(
340 {
341 "cs": self,
342 "size": size,
343 "dynamic": size is None,
344 "alignment": alignment or size,
345 }
346 )
347 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs))
348
349 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]:
350 null_terminated = False
351 if num_entries is None:
352 null_terminated = True
353 size = None
354 elif isinstance(num_entries, Expression) or type_.dynamic:
355 size = None
356 else:
357 if type_.size is None:
358 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}")
359 size = num_entries * type_.size
360
361 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]"
362
363 bases = (type_.ArrayType,)
364
365 attrs = {
366 "type": type_,
367 "num_entries": num_entries,
368 "null_terminated": null_terminated,
369 }
370
371 return cast(type[Array], self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs))
372
373 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]:
374 return cast(type[Int], self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed}))
375
376 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]:
377 return cast(
378 type[Packed],
379 self._make_type(
380 name,
381 (base, Packed),
382 struct.calcsize(packchar),
383 alignment=alignment,
384 attrs={"packchar": packchar},
385 ),
386 )
387
388 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]:
389 return Enum(self, name, type_, values)
390
391 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]:
392 return Flag(self, name, type_, values)
393
394 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]:
395 return self._make_type(
396 f"{target.__name__}*",
397 (Pointer,),
398 self.pointer.size,
399 alignment=self.pointer.alignment,
400 attrs={"type": target},
401 )
402
403 def _make_struct(
404 self,
405 name: str,
406 fields: list[Field],
407 *,
408 align: bool = False,
409 anonymous: bool = False,
410 base: type[Structure] = Structure,
411 ) -> type[Structure]:
412 return self._make_type(
413 name,
414 (base,),
415 None,
416 attrs={
417 "fields": fields,
418 "__align__": align,
419 "__anonymous__": anonymous,
420 },
421 )
422
423 def _make_union(
424 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False
425 ) -> type[Structure]:
426 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union)
427
428 if TYPE_CHECKING:
429 # ruff: noqa: PYI042
430 _int = int
431 _float = float
432
433 class int8(_int, Packed[_int]): ...
434
435 class uint8(_int, Packed[_int]): ...
436
437 class int16(_int, Packed[_int]): ...
438
439 class uint16(_int, Packed[_int]): ...
440
441 class int32(_int, Packed[_int]): ...
442
443 class uint32(_int, Packed[_int]): ...
444
445 class int64(_int, Packed[_int]): ...
446
447 class uint64(_int, Packed[_int]): ...
448
449 class float16(_float, Packed[_float]): ...
450
451 class float(_float, Packed[_float]): ...
452
453 class double(_float, Packed[_float]): ...
454
455 class char(Char): ...
456
457 class wchar(Wchar): ...
458
459 class int24(Int): ...
460
461 class uint24(Int): ...
462
463 class int48(Int): ...
464
465 class uint48(Int): ...
466
467 class int128(Int): ...
468
469 class uint128(Int): ...
470
471 class uleb128(LEB128): ...
472
473 class ileb128(LEB128): ...
474
475 class void(Void): ...
476
477 # signed char: TypeAlias = int8
478 # signed char: TypeAlias = char
479 short: TypeAlias = int16
480 # signed short: TypeAlias = int16
481 # unsigned short: TypeAlias = uint16
482 int: TypeAlias = int32
483 # signed int: TypeAlias = int32
484 # unsigned int: TypeAlias = uint32
485 long: TypeAlias = int32
486 # signed long: TypeAlias = int32
487 # unsigned long: TypeAlias = uint32
488 # long long: TypeAlias = int64
489 # signed long long: TypeAlias = int64
490 # unsigned long long: TypeAlias = uint64
491
492 BYTE: TypeAlias = uint8
493 CHAR: TypeAlias = char
494 SHORT: TypeAlias = int16
495 WORD: TypeAlias = uint16
496 DWORD: TypeAlias = uint32
497 LONG: TypeAlias = int32
498 LONG32: TypeAlias = int32
499 LONG64: TypeAlias = int64
500 LONGLONG: TypeAlias = int64
501 QWORD: TypeAlias = uint64
502 OWORD: TypeAlias = uint128
503 WCHAR: TypeAlias = wchar
504
505 UCHAR: TypeAlias = uint8
506 USHORT: TypeAlias = uint16
507 ULONG: TypeAlias = uint32
508 ULONG64: TypeAlias = uint64
509 ULONGLONG: TypeAlias = uint64
510
511 INT: TypeAlias = int32
512 INT8: TypeAlias = int8
513 INT16: TypeAlias = int16
514 INT32: TypeAlias = int32
515 INT64: TypeAlias = int64
516 INT128: TypeAlias = int128
517
518 UINT: TypeAlias = uint32
519 UINT8: TypeAlias = uint8
520 UINT16: TypeAlias = uint16
521 UINT32: TypeAlias = uint32
522 UINT64: TypeAlias = uint64
523 UINT128: TypeAlias = uint128
524
525 __int8: TypeAlias = int8
526 __int16: TypeAlias = int16
527 __int32: TypeAlias = int32
528 __int64: TypeAlias = int64
529 __int128: TypeAlias = int128
530
531 # unsigned __int8: TypeAlias = uint8
532 # unsigned __int16: TypeAlias = uint16
533 # unsigned __int32: TypeAlias = uint32
534 # unsigned __int64: TypeAlias = uint64
535 # unsigned __int128: TypeAlias = uint128
536
537 wchar_t: TypeAlias = wchar
538
539 int8_t: TypeAlias = int8
540 int16_t: TypeAlias = int16
541 int32_t: TypeAlias = int32
542 int64_t: TypeAlias = int64
543 int128_t: TypeAlias = int128
544
545 uint8_t: TypeAlias = uint8
546 uint16_t: TypeAlias = uint16
547 uint32_t: TypeAlias = uint32
548 uint64_t: TypeAlias = uint64
549 uint128_t: TypeAlias = uint128
550
551 _BYTE: TypeAlias = uint8
552 _WORD: TypeAlias = uint16
553 _DWORD: TypeAlias = uint32
554 _QWORD: TypeAlias = uint64
555 _OWORD: TypeAlias = uint128
556
557 u1: TypeAlias = uint8
558 u2: TypeAlias = uint16
559 u4: TypeAlias = uint32
560 u8: TypeAlias = uint64
561 u16: TypeAlias = uint128
562 __u8: TypeAlias = uint8
563 __u16: TypeAlias = uint16
564 __u32: TypeAlias = uint32
565 __u64: TypeAlias = uint64
566 uchar: TypeAlias = uint8
567 ushort: TypeAlias = uint16
568 uint: TypeAlias = uint32
569 ulong: TypeAlias = uint32
570
571
572def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]:
573 """Create ctypes structures from cstruct structures."""
574 fields = []
575 for field in structure.__fields__:
576 t = ctypes_type(field.type)
577 fields.append((field._name, t))
578
579 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields})
580
581
582def ctypes_type(type_: type[BaseType]) -> Any:
583 mapping = {
584 "b": _ctypes.c_int8,
585 "B": _ctypes.c_uint8,
586 "h": _ctypes.c_int16,
587 "H": _ctypes.c_uint16,
588 "i": _ctypes.c_int32,
589 "I": _ctypes.c_uint32,
590 "q": _ctypes.c_int64,
591 "Q": _ctypes.c_uint64,
592 "f": _ctypes.c_float,
593 "d": _ctypes.c_double,
594 }
595
596 if issubclass(type_, Packed) and type_.packchar in mapping:
597 return mapping[type_.packchar]
598
599 if issubclass(type_, Char):
600 return _ctypes.c_char
601
602 if issubclass(type_, Wchar):
603 return _ctypes.c_wchar
604
605 if issubclass(type_, BaseArray):
606 subtype = ctypes_type(type_.type)
607 return subtype * type_.num_entries
608
609 if issubclass(type_, Pointer):
610 subtype = ctypes_type(type_.type)
611 return _ctypes.POINTER(subtype)
612
613 if issubclass(type_, Structure):
614 return ctypes(type_)
615
616 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")