1from __future__ import annotations
2
3import ctypes as _ctypes
4import struct
5import sys
6import types
7from pathlib import Path
8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast
9
10from dissect.cstruct.exceptions import ResolveError
11from dissect.cstruct.expression import Expression
12from dissect.cstruct.parser import CStyleParser, TokenParser
13from dissect.cstruct.types import (
14 LEB128,
15 Array,
16 BaseArray,
17 BaseType,
18 Char,
19 Enum,
20 Field,
21 Flag,
22 Int,
23 Packed,
24 Pointer,
25 Structure,
26 Union,
27 Void,
28 Wchar,
29)
30
31if TYPE_CHECKING:
32 from collections.abc import Iterable
33 from typing import TypeAlias
34
35
36T = TypeVar("T", bound=BaseType)
37
38
39class cstruct:
40 """Main class of cstruct. All types are registered in here.
41
42 Args:
43 endian: The endianness to use when parsing.
44 pointer: The pointer type to use for pointers.
45 """
46
47 DEF_CSTYLE = 1
48 DEF_LEGACY = 2
49
50 def __init__(self, load: str = "", *, endian: str = "<", pointer: str | None = None):
51 self.endian = endian
52
53 self.consts = {}
54 self.lookups = {}
55 self.includes = []
56 # fmt: off
57 self.typedefs = {
58 # Internal types
59 "int8": self._make_packed_type("int8", "b", int),
60 "uint8": self._make_packed_type("uint8", "B", int),
61 "int16": self._make_packed_type("int16", "h", int),
62 "uint16": self._make_packed_type("uint16", "H", int),
63 "int32": self._make_packed_type("int32", "i", int),
64 "uint32": self._make_packed_type("uint32", "I", int),
65 "int64": self._make_packed_type("int64", "q", int),
66 "uint64": self._make_packed_type("uint64", "Q", int),
67 "float16": self._make_packed_type("float16", "e", float),
68 "float": self._make_packed_type("float", "f", float),
69 "double": self._make_packed_type("double", "d", float),
70 "char": self._make_type("char", (Char,), 1),
71 "wchar": self._make_type("wchar", (Wchar,), 2),
72
73 "int24": self._make_int_type("int24", 3, True, alignment=4),
74 "uint24": self._make_int_type("uint24", 3, False, alignment=4),
75 "int48": self._make_int_type("int48", 6, True, alignment=8),
76 "uint48": self._make_int_type("int48", 6, False, alignment=8),
77 "int128": self._make_int_type("int128", 16, True, alignment=16),
78 "uint128": self._make_int_type("uint128", 16, False, alignment=16),
79
80 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}),
81 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}),
82
83 "void": self._make_type("void", (Void,), 0),
84
85 # Common C types not covered by internal types
86 "signed char": "int8",
87 "unsigned char": "char",
88 "short": "int16",
89 "signed short": "int16",
90 "unsigned short": "uint16",
91 "int": "int32",
92 "signed int": "int32",
93 "unsigned int": "uint32",
94 "long": "int32",
95 "signed long": "int32",
96 "unsigned long": "uint32",
97 "long long": "int64",
98 "signed long long": "int64",
99 "unsigned long long": "uint64",
100
101 # Windows types
102 "BYTE": "uint8",
103 "CHAR": "char",
104 "SHORT": "int16",
105 "WORD": "uint16",
106 "DWORD": "uint32",
107 "LONG": "int32",
108 "LONG32": "int32",
109 "LONG64": "int64",
110 "LONGLONG": "int64",
111 "QWORD": "uint64",
112 "OWORD": "uint128",
113 "WCHAR": "wchar",
114
115 "UCHAR": "uint8",
116 "USHORT": "uint16",
117 "ULONG": "uint32",
118 "ULONG64": "uint64",
119 "ULONGLONG": "uint64",
120
121 "INT": "int32",
122 "INT8": "int8",
123 "INT16": "int16",
124 "INT32": "int32",
125 "INT64": "int64",
126 "INT128": "int128",
127
128 "UINT": "uint32",
129 "UINT8": "uint8",
130 "UINT16": "uint16",
131 "UINT32": "uint32",
132 "UINT64": "uint64",
133 "UINT128": "uint128",
134
135 "__int8": "int8",
136 "__int16": "int16",
137 "__int32": "int32",
138 "__int64": "int64",
139 "__int128": "int128",
140
141 "unsigned __int8": "uint8",
142 "unsigned __int16": "uint16",
143 "unsigned __int32": "uint32",
144 "unsigned __int64": "uint64",
145 "unsigned __int128": "uint128",
146
147 "wchar_t": "wchar",
148
149 # GNU C types
150 "int8_t": "int8",
151 "int16_t": "int16",
152 "int32_t": "int32",
153 "int64_t": "int64",
154 "int128_t": "int128",
155
156 "uint8_t": "uint8",
157 "uint16_t": "uint16",
158 "uint32_t": "uint32",
159 "uint64_t": "uint64",
160 "uint128_t": "uint128",
161
162 # IDA types
163 "_BYTE": "uint8",
164 "_WORD": "uint16",
165 "_DWORD": "uint32",
166 "_QWORD": "uint64",
167 "_OWORD": "uint128",
168
169 # Other convenience types
170 "u1": "uint8",
171 "u2": "uint16",
172 "u4": "uint32",
173 "u8": "uint64",
174 "u16": "uint128",
175 "__u8": "uint8",
176 "__u16": "uint16",
177 "__u32": "uint32",
178 "__u64": "uint64",
179 "uchar": "uint8",
180 "ushort": "uint16",
181 "uint": "uint32",
182 "ulong": "uint32",
183 }
184 # fmt: on
185
186 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32")
187 self.pointer: type[BaseType] = self.resolve(pointer)
188 self._anonymous_count = 0
189
190 if load:
191 self.load(load)
192
193 def __getattr__(self, attr: str) -> Any:
194 try:
195 return self.consts[attr]
196 except KeyError:
197 pass
198
199 try:
200 return self.resolve(self.typedefs[attr])
201 except KeyError:
202 pass
203
204 raise AttributeError(f"Invalid attribute: {attr}")
205
206 def _next_anonymous(self) -> str:
207 name = f"__anonymous_{self._anonymous_count}__"
208 self._anonymous_count += 1
209 return name
210
211 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None:
212 """Add a type or type reference.
213
214 Only use this method when creating type aliases or adding already bound types.
215
216 Args:
217 name: Name of the type to be added.
218 type_: The type to be added. Can be a str reference to another type or a compatible type class.
219
220 Raises:
221 ValueError: If the type already exists.
222 """
223 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)):
224 raise ValueError(f"Duplicate type: {name}")
225
226 self.typedefs[name] = type_
227
228 addtype = add_type
229
230 def add_custom_type(
231 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs
232 ) -> None:
233 """Add a custom type.
234
235 Use this method to add custom types to this cstruct instance. This is largely a convenience method for
236 the internal :func:`_make_type` method, which binds a class to this cstruct instance.
237
238 Args:
239 name: Name of the type to be added.
240 type_: The type to be added.
241 size: The size of the type.
242 alignment: The alignment of the type.
243 **kwargs: Additional attributes to add to the type.
244 """
245 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs))
246
247 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct:
248 """Parse structures from the given definitions using the given definition type.
249
250 Definitions can be parsed using different parsers. Currently, there's
251 only one supported parser - DEF_CSTYLE. Parsers can add types and
252 modify this cstruct instance. Arguments can be passed to parsers
253 using kwargs.
254
255 The CSTYLE parser was recently replaced with token based parser,
256 instead of a strictly regex based one. The old parser is still available
257 by using DEF_LEGACY.
258
259 Args:
260 definition: The definition to parse.
261 deftype: The definition type to parse the definitions with.
262 **kwargs: Keyword arguments for parsers.
263 """
264 deftype = deftype or cstruct.DEF_CSTYLE
265
266 if deftype == cstruct.DEF_CSTYLE:
267 TokenParser(self, **kwargs).parse(definition)
268 elif deftype == cstruct.DEF_LEGACY:
269 CStyleParser(self, **kwargs).parse(definition)
270
271 return self
272
273 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None:
274 """Load structure definitions from a file.
275
276 The given path will be read and parsed using the :meth:`~cstruct.load` function.
277
278 Args:
279 path: The path to load definitions from.
280 deftype: The definition type to parse the definitions with.
281 **kwargs: Keyword arguments for parsers.
282 """
283 with Path(path).open() as fh:
284 self.load(fh.read(), deftype, **kwargs)
285
286 def read(self, name: str, stream: BinaryIO) -> Any:
287 """Parse data using a given type.
288
289 Args:
290 name: Type name to read.
291 stream: File-like object or byte string to parse.
292
293 Returns:
294 The parsed data.
295 """
296 return self.resolve(name).read(stream)
297
298 def resolve(self, name: type[BaseType] | str) -> type[BaseType]:
299 """Resolve a type name to get the actual type object.
300
301 Types can be referenced using different names. When we want
302 the actual type object, we need to resolve these references.
303
304 Args:
305 name: Type name to resolve.
306
307 Returns:
308 The resolved type object.
309
310 Raises:
311 ResolveError: If the type can't be resolved.
312 """
313 type_name = name
314 if not isinstance(type_name, str):
315 return type_name
316
317 for _ in range(10):
318 if type_name not in self.typedefs:
319 raise ResolveError(f"Unknown type {name}")
320
321 type_name = self.typedefs[type_name]
322
323 if not isinstance(type_name, str):
324 return type_name
325
326 raise ResolveError(f"Recursion limit exceeded while resolving type {name}")
327
328 def _make_type(
329 self,
330 name: str,
331 bases: Iterable[object],
332 size: int | None,
333 *,
334 alignment: int | None = None,
335 attrs: dict[str, Any] | None = None,
336 ) -> type[BaseType]:
337 """Create a new type class bound to this cstruct instance.
338
339 All types are created using this method. This method automatically binds the type to this cstruct instance.
340 """
341 attrs = attrs or {}
342 attrs.update(
343 {
344 "cs": self,
345 "size": size,
346 "dynamic": size is None,
347 "alignment": alignment or size,
348 }
349 )
350 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs))
351
352 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]:
353 null_terminated = False
354 if num_entries is None:
355 null_terminated = True
356 size = None
357 elif isinstance(num_entries, Expression) or type_.dynamic:
358 size = None
359 else:
360 if type_.size is None:
361 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}")
362 size = num_entries * type_.size
363
364 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]"
365
366 bases = (type_.ArrayType,)
367
368 attrs = {
369 "type": type_,
370 "num_entries": num_entries,
371 "null_terminated": null_terminated,
372 }
373
374 return cast("type[Array]", self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs))
375
376 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]:
377 return cast("type[Int]", self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed}))
378
379 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]:
380 return cast(
381 "type[Packed]",
382 self._make_type(
383 name,
384 (base, Packed),
385 struct.calcsize(packchar),
386 alignment=alignment,
387 attrs={"packchar": packchar},
388 ),
389 )
390
391 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]:
392 return Enum(self, name, type_, values)
393
394 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]:
395 return Flag(self, name, type_, values)
396
397 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]:
398 return self._make_type(
399 f"{target.__name__}*",
400 (Pointer,),
401 self.pointer.size,
402 alignment=self.pointer.alignment,
403 attrs={"type": target},
404 )
405
406 def _make_struct(
407 self,
408 name: str,
409 fields: list[Field],
410 *,
411 align: bool = False,
412 anonymous: bool = False,
413 base: type[Structure] = Structure,
414 ) -> type[Structure]:
415 return self._make_type(
416 name,
417 (base,),
418 None,
419 attrs={
420 "fields": fields,
421 "__align__": align,
422 "__anonymous__": anonymous,
423 },
424 )
425
426 def _make_union(
427 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False
428 ) -> type[Structure]:
429 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union)
430
431 if TYPE_CHECKING:
432 # ruff: noqa: PYI042
433 _int = int
434 _float = float
435
436 class int8(_int, Packed[_int]): ...
437
438 class uint8(_int, Packed[_int]): ...
439
440 class int16(_int, Packed[_int]): ...
441
442 class uint16(_int, Packed[_int]): ...
443
444 class int32(_int, Packed[_int]): ...
445
446 class uint32(_int, Packed[_int]): ...
447
448 class int64(_int, Packed[_int]): ...
449
450 class uint64(_int, Packed[_int]): ...
451
452 class float16(_float, Packed[_float]): ...
453
454 class float(_float, Packed[_float]): ...
455
456 class double(_float, Packed[_float]): ...
457
458 class char(Char): ...
459
460 class wchar(Wchar): ...
461
462 class int24(Int): ...
463
464 class uint24(Int): ...
465
466 class int48(Int): ...
467
468 class uint48(Int): ...
469
470 class int128(Int): ...
471
472 class uint128(Int): ...
473
474 class uleb128(LEB128): ...
475
476 class ileb128(LEB128): ...
477
478 class void(Void): ...
479
480 # signed char: TypeAlias = int8
481 # signed char: TypeAlias = char
482 short: TypeAlias = int16
483 # signed short: TypeAlias = int16
484 # unsigned short: TypeAlias = uint16
485 int: TypeAlias = int32
486 # signed int: TypeAlias = int32
487 # unsigned int: TypeAlias = uint32
488 long: TypeAlias = int32
489 # signed long: TypeAlias = int32
490 # unsigned long: TypeAlias = uint32
491 # long long: TypeAlias = int64
492 # signed long long: TypeAlias = int64
493 # unsigned long long: TypeAlias = uint64
494
495 BYTE: TypeAlias = uint8
496 CHAR: TypeAlias = char
497 SHORT: TypeAlias = int16
498 WORD: TypeAlias = uint16
499 DWORD: TypeAlias = uint32
500 LONG: TypeAlias = int32
501 LONG32: TypeAlias = int32
502 LONG64: TypeAlias = int64
503 LONGLONG: TypeAlias = int64
504 QWORD: TypeAlias = uint64
505 OWORD: TypeAlias = uint128
506 WCHAR: TypeAlias = wchar
507
508 UCHAR: TypeAlias = uint8
509 USHORT: TypeAlias = uint16
510 ULONG: TypeAlias = uint32
511 ULONG64: TypeAlias = uint64
512 ULONGLONG: TypeAlias = uint64
513
514 INT: TypeAlias = int32
515 INT8: TypeAlias = int8
516 INT16: TypeAlias = int16
517 INT32: TypeAlias = int32
518 INT64: TypeAlias = int64
519 INT128: TypeAlias = int128
520
521 UINT: TypeAlias = uint32
522 UINT8: TypeAlias = uint8
523 UINT16: TypeAlias = uint16
524 UINT32: TypeAlias = uint32
525 UINT64: TypeAlias = uint64
526 UINT128: TypeAlias = uint128
527
528 __int8: TypeAlias = int8
529 __int16: TypeAlias = int16
530 __int32: TypeAlias = int32
531 __int64: TypeAlias = int64
532 __int128: TypeAlias = int128
533
534 # unsigned __int8: TypeAlias = uint8
535 # unsigned __int16: TypeAlias = uint16
536 # unsigned __int32: TypeAlias = uint32
537 # unsigned __int64: TypeAlias = uint64
538 # unsigned __int128: TypeAlias = uint128
539
540 wchar_t: TypeAlias = wchar
541
542 int8_t: TypeAlias = int8
543 int16_t: TypeAlias = int16
544 int32_t: TypeAlias = int32
545 int64_t: TypeAlias = int64
546 int128_t: TypeAlias = int128
547
548 uint8_t: TypeAlias = uint8
549 uint16_t: TypeAlias = uint16
550 uint32_t: TypeAlias = uint32
551 uint64_t: TypeAlias = uint64
552 uint128_t: TypeAlias = uint128
553
554 _BYTE: TypeAlias = uint8
555 _WORD: TypeAlias = uint16
556 _DWORD: TypeAlias = uint32
557 _QWORD: TypeAlias = uint64
558 _OWORD: TypeAlias = uint128
559
560 u1: TypeAlias = uint8
561 u2: TypeAlias = uint16
562 u4: TypeAlias = uint32
563 u8: TypeAlias = uint64
564 u16: TypeAlias = uint128
565 __u8: TypeAlias = uint8
566 __u16: TypeAlias = uint16
567 __u32: TypeAlias = uint32
568 __u64: TypeAlias = uint64
569 uchar: TypeAlias = uint8
570 ushort: TypeAlias = uint16
571 uint: TypeAlias = uint32
572 ulong: TypeAlias = uint32
573
574
575def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]:
576 """Create ctypes structures from cstruct structures."""
577 fields = []
578 for field in structure.__fields__:
579 t = ctypes_type(field.type)
580 fields.append((field._name, t))
581
582 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields})
583
584
585def ctypes_type(type_: type[BaseType]) -> Any:
586 mapping = {
587 "b": _ctypes.c_int8,
588 "B": _ctypes.c_uint8,
589 "h": _ctypes.c_int16,
590 "H": _ctypes.c_uint16,
591 "i": _ctypes.c_int32,
592 "I": _ctypes.c_uint32,
593 "q": _ctypes.c_int64,
594 "Q": _ctypes.c_uint64,
595 "f": _ctypes.c_float,
596 "d": _ctypes.c_double,
597 }
598
599 if issubclass(type_, Packed) and type_.packchar in mapping:
600 return mapping[type_.packchar]
601
602 if issubclass(type_, Char):
603 return _ctypes.c_char
604
605 if issubclass(type_, Wchar):
606 return _ctypes.c_wchar
607
608 if issubclass(type_, BaseArray):
609 subtype = ctypes_type(type_.type)
610 return subtype * type_.num_entries
611
612 if issubclass(type_, Pointer):
613 subtype = ctypes_type(type_.type)
614 return _ctypes.POINTER(subtype)
615
616 if issubclass(type_, Structure):
617 return ctypes(type_)
618
619 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")