1from __future__ import annotations
2
3import ctypes as _ctypes
4import struct
5import sys
6import types
7from pathlib import Path
8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast
9
10from dissect.cstruct.exceptions import ResolveError
11from dissect.cstruct.expression import Expression
12from dissect.cstruct.parser import CStyleParser, TokenParser
13from dissect.cstruct.types import (
14 LEB128,
15 Array,
16 BaseArray,
17 BaseType,
18 Char,
19 Enum,
20 Field,
21 Flag,
22 Int,
23 Packed,
24 Pointer,
25 Structure,
26 Union,
27 Void,
28 Wchar,
29)
30
31if TYPE_CHECKING:
32 from collections.abc import Iterable
33
34 from typing_extensions import TypeAlias
35
36
37T = TypeVar("T", bound=BaseType)
38
39
40class cstruct:
41 """Main class of cstruct. All types are registered in here.
42
43 Args:
44 endian: The endianness to use when parsing.
45 pointer: The pointer type to use for pointers.
46 """
47
48 DEF_CSTYLE = 1
49 DEF_LEGACY = 2
50
51 def __init__(self, load: str = "", *, endian: str = "<", pointer: str | None = None):
52 self.endian = endian
53
54 self.consts = {}
55 self.lookups = {}
56 self.includes = []
57 # fmt: off
58 self.typedefs = {
59 # Internal types
60 "int8": self._make_packed_type("int8", "b", int),
61 "uint8": self._make_packed_type("uint8", "B", int),
62 "int16": self._make_packed_type("int16", "h", int),
63 "uint16": self._make_packed_type("uint16", "H", int),
64 "int32": self._make_packed_type("int32", "i", int),
65 "uint32": self._make_packed_type("uint32", "I", int),
66 "int64": self._make_packed_type("int64", "q", int),
67 "uint64": self._make_packed_type("uint64", "Q", int),
68 "float16": self._make_packed_type("float16", "e", float),
69 "float": self._make_packed_type("float", "f", float),
70 "double": self._make_packed_type("double", "d", float),
71 "char": self._make_type("char", (Char,), 1),
72 "wchar": self._make_type("wchar", (Wchar,), 2),
73
74 "int24": self._make_int_type("int24", 3, True, alignment=4),
75 "uint24": self._make_int_type("uint24", 3, False, alignment=4),
76 "int48": self._make_int_type("int48", 6, True, alignment=8),
77 "uint48": self._make_int_type("int48", 6, False, alignment=8),
78 "int128": self._make_int_type("int128", 16, True, alignment=16),
79 "uint128": self._make_int_type("uint128", 16, False, alignment=16),
80
81 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}),
82 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}),
83
84 "void": self._make_type("void", (Void,), 0),
85
86 # Common C types not covered by internal types
87 "signed char": "int8",
88 "unsigned char": "char",
89 "short": "int16",
90 "signed short": "int16",
91 "unsigned short": "uint16",
92 "int": "int32",
93 "signed int": "int32",
94 "unsigned int": "uint32",
95 "long": "int32",
96 "signed long": "int32",
97 "unsigned long": "uint32",
98 "long long": "int64",
99 "signed long long": "int64",
100 "unsigned long long": "uint64",
101
102 # Windows types
103 "BYTE": "uint8",
104 "CHAR": "char",
105 "SHORT": "int16",
106 "WORD": "uint16",
107 "DWORD": "uint32",
108 "LONG": "int32",
109 "LONG32": "int32",
110 "LONG64": "int64",
111 "LONGLONG": "int64",
112 "QWORD": "uint64",
113 "OWORD": "uint128",
114 "WCHAR": "wchar",
115
116 "UCHAR": "uint8",
117 "USHORT": "uint16",
118 "ULONG": "uint32",
119 "ULONG64": "uint64",
120 "ULONGLONG": "uint64",
121
122 "INT": "int32",
123 "INT8": "int8",
124 "INT16": "int16",
125 "INT32": "int32",
126 "INT64": "int64",
127 "INT128": "int128",
128
129 "UINT": "uint32",
130 "UINT8": "uint8",
131 "UINT16": "uint16",
132 "UINT32": "uint32",
133 "UINT64": "uint64",
134 "UINT128": "uint128",
135
136 "__int8": "int8",
137 "__int16": "int16",
138 "__int32": "int32",
139 "__int64": "int64",
140 "__int128": "int128",
141
142 "unsigned __int8": "uint8",
143 "unsigned __int16": "uint16",
144 "unsigned __int32": "uint32",
145 "unsigned __int64": "uint64",
146 "unsigned __int128": "uint128",
147
148 "wchar_t": "wchar",
149
150 # GNU C types
151 "int8_t": "int8",
152 "int16_t": "int16",
153 "int32_t": "int32",
154 "int64_t": "int64",
155 "int128_t": "int128",
156
157 "uint8_t": "uint8",
158 "uint16_t": "uint16",
159 "uint32_t": "uint32",
160 "uint64_t": "uint64",
161 "uint128_t": "uint128",
162
163 # IDA types
164 "_BYTE": "uint8",
165 "_WORD": "uint16",
166 "_DWORD": "uint32",
167 "_QWORD": "uint64",
168 "_OWORD": "uint128",
169
170 # Other convenience types
171 "u1": "uint8",
172 "u2": "uint16",
173 "u4": "uint32",
174 "u8": "uint64",
175 "u16": "uint128",
176 "__u8": "uint8",
177 "__u16": "uint16",
178 "__u32": "uint32",
179 "__u64": "uint64",
180 "uchar": "uint8",
181 "ushort": "uint16",
182 "uint": "uint32",
183 "ulong": "uint32",
184 }
185 # fmt: on
186
187 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32")
188 self.pointer: type[BaseType] = self.resolve(pointer)
189 self._anonymous_count = 0
190
191 if load:
192 self.load(load)
193
194 def __getattr__(self, attr: str) -> Any:
195 try:
196 return self.consts[attr]
197 except KeyError:
198 pass
199
200 try:
201 return self.resolve(self.typedefs[attr])
202 except KeyError:
203 pass
204
205 raise AttributeError(f"Invalid attribute: {attr}")
206
207 def _next_anonymous(self) -> str:
208 name = f"__anonymous_{self._anonymous_count}__"
209 self._anonymous_count += 1
210 return name
211
212 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None:
213 """Add a type or type reference.
214
215 Only use this method when creating type aliases or adding already bound types.
216
217 Args:
218 name: Name of the type to be added.
219 type_: The type to be added. Can be a str reference to another type or a compatible type class.
220
221 Raises:
222 ValueError: If the type already exists.
223 """
224 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)):
225 raise ValueError(f"Duplicate type: {name}")
226
227 self.typedefs[name] = type_
228
229 addtype = add_type
230
231 def add_custom_type(
232 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs
233 ) -> None:
234 """Add a custom type.
235
236 Use this method to add custom types to this cstruct instance. This is largely a convenience method for
237 the internal :func:`_make_type` method, which binds a class to this cstruct instance.
238
239 Args:
240 name: Name of the type to be added.
241 type_: The type to be added.
242 size: The size of the type.
243 alignment: The alignment of the type.
244 **kwargs: Additional attributes to add to the type.
245 """
246 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs))
247
248 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct:
249 """Parse structures from the given definitions using the given definition type.
250
251 Definitions can be parsed using different parsers. Currently, there's
252 only one supported parser - DEF_CSTYLE. Parsers can add types and
253 modify this cstruct instance. Arguments can be passed to parsers
254 using kwargs.
255
256 The CSTYLE parser was recently replaced with token based parser,
257 instead of a strictly regex based one. The old parser is still available
258 by using DEF_LEGACY.
259
260 Args:
261 definition: The definition to parse.
262 deftype: The definition type to parse the definitions with.
263 **kwargs: Keyword arguments for parsers.
264 """
265 deftype = deftype or cstruct.DEF_CSTYLE
266
267 if deftype == cstruct.DEF_CSTYLE:
268 TokenParser(self, **kwargs).parse(definition)
269 elif deftype == cstruct.DEF_LEGACY:
270 CStyleParser(self, **kwargs).parse(definition)
271
272 return self
273
274 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None:
275 """Load structure definitions from a file.
276
277 The given path will be read and parsed using the :meth:`~cstruct.load` function.
278
279 Args:
280 path: The path to load definitions from.
281 deftype: The definition type to parse the definitions with.
282 **kwargs: Keyword arguments for parsers.
283 """
284 with Path(path).open() as fh:
285 self.load(fh.read(), deftype, **kwargs)
286
287 def read(self, name: str, stream: BinaryIO) -> Any:
288 """Parse data using a given type.
289
290 Args:
291 name: Type name to read.
292 stream: File-like object or byte string to parse.
293
294 Returns:
295 The parsed data.
296 """
297 return self.resolve(name).read(stream)
298
299 def resolve(self, name: type[BaseType] | str) -> type[BaseType]:
300 """Resolve a type name to get the actual type object.
301
302 Types can be referenced using different names. When we want
303 the actual type object, we need to resolve these references.
304
305 Args:
306 name: Type name to resolve.
307
308 Returns:
309 The resolved type object.
310
311 Raises:
312 ResolveError: If the type can't be resolved.
313 """
314 type_name = name
315 if not isinstance(type_name, str):
316 return type_name
317
318 for _ in range(10):
319 if type_name not in self.typedefs:
320 raise ResolveError(f"Unknown type {name}")
321
322 type_name = self.typedefs[type_name]
323
324 if not isinstance(type_name, str):
325 return type_name
326
327 raise ResolveError(f"Recursion limit exceeded while resolving type {name}")
328
329 def _make_type(
330 self,
331 name: str,
332 bases: Iterable[object],
333 size: int | None,
334 *,
335 alignment: int | None = None,
336 attrs: dict[str, Any] | None = None,
337 ) -> type[BaseType]:
338 """Create a new type class bound to this cstruct instance.
339
340 All types are created using this method. This method automatically binds the type to this cstruct instance.
341 """
342 attrs = attrs or {}
343 attrs.update(
344 {
345 "cs": self,
346 "size": size,
347 "dynamic": size is None,
348 "alignment": alignment or size,
349 }
350 )
351 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs))
352
353 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]:
354 null_terminated = False
355 if num_entries is None:
356 null_terminated = True
357 size = None
358 elif isinstance(num_entries, Expression) or type_.dynamic:
359 size = None
360 else:
361 if type_.size is None:
362 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}")
363 size = num_entries * type_.size
364
365 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]"
366
367 bases = (type_.ArrayType,)
368
369 attrs = {
370 "type": type_,
371 "num_entries": num_entries,
372 "null_terminated": null_terminated,
373 }
374
375 return cast("type[Array]", self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs))
376
377 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]:
378 return cast("type[Int]", self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed}))
379
380 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]:
381 return cast(
382 "type[Packed]",
383 self._make_type(
384 name,
385 (base, Packed),
386 struct.calcsize(packchar),
387 alignment=alignment,
388 attrs={"packchar": packchar},
389 ),
390 )
391
392 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]:
393 return Enum(self, name, type_, values)
394
395 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]:
396 return Flag(self, name, type_, values)
397
398 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]:
399 return self._make_type(
400 f"{target.__name__}*",
401 (Pointer,),
402 self.pointer.size,
403 alignment=self.pointer.alignment,
404 attrs={"type": target},
405 )
406
407 def _make_struct(
408 self,
409 name: str,
410 fields: list[Field],
411 *,
412 align: bool = False,
413 anonymous: bool = False,
414 base: type[Structure] = Structure,
415 ) -> type[Structure]:
416 return self._make_type(
417 name,
418 (base,),
419 None,
420 attrs={
421 "fields": fields,
422 "__align__": align,
423 "__anonymous__": anonymous,
424 },
425 )
426
427 def _make_union(
428 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False
429 ) -> type[Structure]:
430 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union)
431
432 if TYPE_CHECKING:
433 # ruff: noqa: PYI042
434 _int = int
435 _float = float
436
437 class int8(_int, Packed[_int]): ...
438
439 class uint8(_int, Packed[_int]): ...
440
441 class int16(_int, Packed[_int]): ...
442
443 class uint16(_int, Packed[_int]): ...
444
445 class int32(_int, Packed[_int]): ...
446
447 class uint32(_int, Packed[_int]): ...
448
449 class int64(_int, Packed[_int]): ...
450
451 class uint64(_int, Packed[_int]): ...
452
453 class float16(_float, Packed[_float]): ...
454
455 class float(_float, Packed[_float]): ...
456
457 class double(_float, Packed[_float]): ...
458
459 class char(Char): ...
460
461 class wchar(Wchar): ...
462
463 class int24(Int): ...
464
465 class uint24(Int): ...
466
467 class int48(Int): ...
468
469 class uint48(Int): ...
470
471 class int128(Int): ...
472
473 class uint128(Int): ...
474
475 class uleb128(LEB128): ...
476
477 class ileb128(LEB128): ...
478
479 class void(Void): ...
480
481 # signed char: TypeAlias = int8
482 # signed char: TypeAlias = char
483 short: TypeAlias = int16
484 # signed short: TypeAlias = int16
485 # unsigned short: TypeAlias = uint16
486 int: TypeAlias = int32
487 # signed int: TypeAlias = int32
488 # unsigned int: TypeAlias = uint32
489 long: TypeAlias = int32
490 # signed long: TypeAlias = int32
491 # unsigned long: TypeAlias = uint32
492 # long long: TypeAlias = int64
493 # signed long long: TypeAlias = int64
494 # unsigned long long: TypeAlias = uint64
495
496 BYTE: TypeAlias = uint8
497 CHAR: TypeAlias = char
498 SHORT: TypeAlias = int16
499 WORD: TypeAlias = uint16
500 DWORD: TypeAlias = uint32
501 LONG: TypeAlias = int32
502 LONG32: TypeAlias = int32
503 LONG64: TypeAlias = int64
504 LONGLONG: TypeAlias = int64
505 QWORD: TypeAlias = uint64
506 OWORD: TypeAlias = uint128
507 WCHAR: TypeAlias = wchar
508
509 UCHAR: TypeAlias = uint8
510 USHORT: TypeAlias = uint16
511 ULONG: TypeAlias = uint32
512 ULONG64: TypeAlias = uint64
513 ULONGLONG: TypeAlias = uint64
514
515 INT: TypeAlias = int32
516 INT8: TypeAlias = int8
517 INT16: TypeAlias = int16
518 INT32: TypeAlias = int32
519 INT64: TypeAlias = int64
520 INT128: TypeAlias = int128
521
522 UINT: TypeAlias = uint32
523 UINT8: TypeAlias = uint8
524 UINT16: TypeAlias = uint16
525 UINT32: TypeAlias = uint32
526 UINT64: TypeAlias = uint64
527 UINT128: TypeAlias = uint128
528
529 __int8: TypeAlias = int8
530 __int16: TypeAlias = int16
531 __int32: TypeAlias = int32
532 __int64: TypeAlias = int64
533 __int128: TypeAlias = int128
534
535 # unsigned __int8: TypeAlias = uint8
536 # unsigned __int16: TypeAlias = uint16
537 # unsigned __int32: TypeAlias = uint32
538 # unsigned __int64: TypeAlias = uint64
539 # unsigned __int128: TypeAlias = uint128
540
541 wchar_t: TypeAlias = wchar
542
543 int8_t: TypeAlias = int8
544 int16_t: TypeAlias = int16
545 int32_t: TypeAlias = int32
546 int64_t: TypeAlias = int64
547 int128_t: TypeAlias = int128
548
549 uint8_t: TypeAlias = uint8
550 uint16_t: TypeAlias = uint16
551 uint32_t: TypeAlias = uint32
552 uint64_t: TypeAlias = uint64
553 uint128_t: TypeAlias = uint128
554
555 _BYTE: TypeAlias = uint8
556 _WORD: TypeAlias = uint16
557 _DWORD: TypeAlias = uint32
558 _QWORD: TypeAlias = uint64
559 _OWORD: TypeAlias = uint128
560
561 u1: TypeAlias = uint8
562 u2: TypeAlias = uint16
563 u4: TypeAlias = uint32
564 u8: TypeAlias = uint64
565 u16: TypeAlias = uint128
566 __u8: TypeAlias = uint8
567 __u16: TypeAlias = uint16
568 __u32: TypeAlias = uint32
569 __u64: TypeAlias = uint64
570 uchar: TypeAlias = uint8
571 ushort: TypeAlias = uint16
572 uint: TypeAlias = uint32
573 ulong: TypeAlias = uint32
574
575
576def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]:
577 """Create ctypes structures from cstruct structures."""
578 fields = []
579 for field in structure.__fields__:
580 t = ctypes_type(field.type)
581 fields.append((field._name, t))
582
583 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields})
584
585
586def ctypes_type(type_: type[BaseType]) -> Any:
587 mapping = {
588 "b": _ctypes.c_int8,
589 "B": _ctypes.c_uint8,
590 "h": _ctypes.c_int16,
591 "H": _ctypes.c_uint16,
592 "i": _ctypes.c_int32,
593 "I": _ctypes.c_uint32,
594 "q": _ctypes.c_int64,
595 "Q": _ctypes.c_uint64,
596 "f": _ctypes.c_float,
597 "d": _ctypes.c_double,
598 }
599
600 if issubclass(type_, Packed) and type_.packchar in mapping:
601 return mapping[type_.packchar]
602
603 if issubclass(type_, Char):
604 return _ctypes.c_char
605
606 if issubclass(type_, Wchar):
607 return _ctypes.c_wchar
608
609 if issubclass(type_, BaseArray):
610 subtype = ctypes_type(type_.type)
611 return subtype * type_.num_entries
612
613 if issubclass(type_, Pointer):
614 subtype = ctypes_type(type_.type)
615 return _ctypes.POINTER(subtype)
616
617 if issubclass(type_, Structure):
618 return ctypes(type_)
619
620 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")