Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dissect/cstruct/cstruct.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

219 statements  

1from __future__ import annotations 

2 

3import ctypes as _ctypes 

4import struct 

5import sys 

6import types 

7from pathlib import Path 

8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast 

9 

10from dissect.cstruct.exceptions import ResolveError 

11from dissect.cstruct.expression import Expression 

12from dissect.cstruct.parser import CStyleParser, TokenParser 

13from dissect.cstruct.types import ( 

14 LEB128, 

15 Array, 

16 BaseArray, 

17 BaseType, 

18 Char, 

19 Enum, 

20 Field, 

21 Flag, 

22 Int, 

23 Packed, 

24 Pointer, 

25 Structure, 

26 Union, 

27 Void, 

28 Wchar, 

29) 

30 

31if TYPE_CHECKING: 

32 from collections.abc import Iterable 

33 from typing import TypeAlias 

34 

35 

36T = TypeVar("T", bound=BaseType) 

37 

38 

39class cstruct: 

40 """Main class of cstruct. All types are registered in here. 

41 

42 Args: 

43 endian: The endianness to use when parsing. 

44 pointer: The pointer type to use for pointers. 

45 """ 

46 

47 DEF_CSTYLE = 1 

48 DEF_LEGACY = 2 

49 

50 def __init__(self, load: str = "", *, endian: str = "<", pointer: str | None = None): 

51 self.endian = endian 

52 

53 self.consts = {} 

54 self.lookups = {} 

55 self.includes = [] 

56 # fmt: off 

57 self.typedefs = { 

58 # Internal types 

59 "int8": self._make_packed_type("int8", "b", int), 

60 "uint8": self._make_packed_type("uint8", "B", int), 

61 "int16": self._make_packed_type("int16", "h", int), 

62 "uint16": self._make_packed_type("uint16", "H", int), 

63 "int32": self._make_packed_type("int32", "i", int), 

64 "uint32": self._make_packed_type("uint32", "I", int), 

65 "int64": self._make_packed_type("int64", "q", int), 

66 "uint64": self._make_packed_type("uint64", "Q", int), 

67 "float16": self._make_packed_type("float16", "e", float), 

68 "float": self._make_packed_type("float", "f", float), 

69 "double": self._make_packed_type("double", "d", float), 

70 "char": self._make_type("char", (Char,), 1), 

71 "wchar": self._make_type("wchar", (Wchar,), 2), 

72 

73 "int24": self._make_int_type("int24", 3, True, alignment=4), 

74 "uint24": self._make_int_type("uint24", 3, False, alignment=4), 

75 "int48": self._make_int_type("int48", 6, True, alignment=8), 

76 "uint48": self._make_int_type("int48", 6, False, alignment=8), 

77 "int128": self._make_int_type("int128", 16, True, alignment=16), 

78 "uint128": self._make_int_type("uint128", 16, False, alignment=16), 

79 

80 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}), 

81 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}), 

82 

83 "void": self._make_type("void", (Void,), 0), 

84 

85 # Common C types not covered by internal types 

86 "signed char": "int8", 

87 "unsigned char": "char", 

88 "short": "int16", 

89 "signed short": "int16", 

90 "unsigned short": "uint16", 

91 "int": "int32", 

92 "signed int": "int32", 

93 "unsigned int": "uint32", 

94 "long": "int32", 

95 "signed long": "int32", 

96 "unsigned long": "uint32", 

97 "long long": "int64", 

98 "signed long long": "int64", 

99 "unsigned long long": "uint64", 

100 

101 # Windows types 

102 "BYTE": "uint8", 

103 "CHAR": "char", 

104 "SHORT": "int16", 

105 "WORD": "uint16", 

106 "DWORD": "uint32", 

107 "LONG": "int32", 

108 "LONG32": "int32", 

109 "LONG64": "int64", 

110 "LONGLONG": "int64", 

111 "QWORD": "uint64", 

112 "OWORD": "uint128", 

113 "WCHAR": "wchar", 

114 

115 "UCHAR": "uint8", 

116 "USHORT": "uint16", 

117 "ULONG": "uint32", 

118 "ULONG64": "uint64", 

119 "ULONGLONG": "uint64", 

120 

121 "INT": "int32", 

122 "INT8": "int8", 

123 "INT16": "int16", 

124 "INT32": "int32", 

125 "INT64": "int64", 

126 "INT128": "int128", 

127 

128 "UINT": "uint32", 

129 "UINT8": "uint8", 

130 "UINT16": "uint16", 

131 "UINT32": "uint32", 

132 "UINT64": "uint64", 

133 "UINT128": "uint128", 

134 

135 "__int8": "int8", 

136 "__int16": "int16", 

137 "__int32": "int32", 

138 "__int64": "int64", 

139 "__int128": "int128", 

140 

141 "unsigned __int8": "uint8", 

142 "unsigned __int16": "uint16", 

143 "unsigned __int32": "uint32", 

144 "unsigned __int64": "uint64", 

145 "unsigned __int128": "uint128", 

146 

147 "wchar_t": "wchar", 

148 

149 # GNU C types 

150 "int8_t": "int8", 

151 "int16_t": "int16", 

152 "int32_t": "int32", 

153 "int64_t": "int64", 

154 "int128_t": "int128", 

155 

156 "uint8_t": "uint8", 

157 "uint16_t": "uint16", 

158 "uint32_t": "uint32", 

159 "uint64_t": "uint64", 

160 "uint128_t": "uint128", 

161 

162 # IDA types 

163 "_BYTE": "uint8", 

164 "_WORD": "uint16", 

165 "_DWORD": "uint32", 

166 "_QWORD": "uint64", 

167 "_OWORD": "uint128", 

168 

169 # Other convenience types 

170 "u1": "uint8", 

171 "u2": "uint16", 

172 "u4": "uint32", 

173 "u8": "uint64", 

174 "u16": "uint128", 

175 "__u8": "uint8", 

176 "__u16": "uint16", 

177 "__u32": "uint32", 

178 "__u64": "uint64", 

179 "uchar": "uint8", 

180 "ushort": "uint16", 

181 "uint": "uint32", 

182 "ulong": "uint32", 

183 } 

184 # fmt: on 

185 

186 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32") 

187 self.pointer: type[BaseType] = self.resolve(pointer) 

188 self._anonymous_count = 0 

189 

190 if load: 

191 self.load(load) 

192 

193 def __getattr__(self, attr: str) -> Any: 

194 try: 

195 return self.consts[attr] 

196 except KeyError: 

197 pass 

198 

199 try: 

200 return self.resolve(self.typedefs[attr]) 

201 except KeyError: 

202 pass 

203 

204 raise AttributeError(f"Invalid attribute: {attr}") 

205 

206 def _next_anonymous(self) -> str: 

207 name = f"__anonymous_{self._anonymous_count}__" 

208 self._anonymous_count += 1 

209 return name 

210 

211 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None: 

212 """Add a type or type reference. 

213 

214 Only use this method when creating type aliases or adding already bound types. 

215 

216 Args: 

217 name: Name of the type to be added. 

218 type_: The type to be added. Can be a str reference to another type or a compatible type class. 

219 

220 Raises: 

221 ValueError: If the type already exists. 

222 """ 

223 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)): 

224 raise ValueError(f"Duplicate type: {name}") 

225 

226 self.typedefs[name] = type_ 

227 

228 addtype = add_type 

229 

230 def add_custom_type( 

231 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs 

232 ) -> None: 

233 """Add a custom type. 

234 

235 Use this method to add custom types to this cstruct instance. This is largely a convenience method for 

236 the internal :func:`_make_type` method, which binds a class to this cstruct instance. 

237 

238 Args: 

239 name: Name of the type to be added. 

240 type_: The type to be added. 

241 size: The size of the type. 

242 alignment: The alignment of the type. 

243 **kwargs: Additional attributes to add to the type. 

244 """ 

245 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs)) 

246 

247 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct: 

248 """Parse structures from the given definitions using the given definition type. 

249 

250 Definitions can be parsed using different parsers. Currently, there's 

251 only one supported parser - DEF_CSTYLE. Parsers can add types and 

252 modify this cstruct instance. Arguments can be passed to parsers 

253 using kwargs. 

254 

255 The CSTYLE parser was recently replaced with token based parser, 

256 instead of a strictly regex based one. The old parser is still available 

257 by using DEF_LEGACY. 

258 

259 Args: 

260 definition: The definition to parse. 

261 deftype: The definition type to parse the definitions with. 

262 **kwargs: Keyword arguments for parsers. 

263 """ 

264 deftype = deftype or cstruct.DEF_CSTYLE 

265 

266 if deftype == cstruct.DEF_CSTYLE: 

267 TokenParser(self, **kwargs).parse(definition) 

268 elif deftype == cstruct.DEF_LEGACY: 

269 CStyleParser(self, **kwargs).parse(definition) 

270 

271 return self 

272 

273 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None: 

274 """Load structure definitions from a file. 

275 

276 The given path will be read and parsed using the :meth:`~cstruct.load` function. 

277 

278 Args: 

279 path: The path to load definitions from. 

280 deftype: The definition type to parse the definitions with. 

281 **kwargs: Keyword arguments for parsers. 

282 """ 

283 with Path(path).open() as fh: 

284 self.load(fh.read(), deftype, **kwargs) 

285 

286 def read(self, name: str, stream: BinaryIO) -> Any: 

287 """Parse data using a given type. 

288 

289 Args: 

290 name: Type name to read. 

291 stream: File-like object or byte string to parse. 

292 

293 Returns: 

294 The parsed data. 

295 """ 

296 return self.resolve(name).read(stream) 

297 

298 def resolve(self, name: type[BaseType] | str) -> type[BaseType]: 

299 """Resolve a type name to get the actual type object. 

300 

301 Types can be referenced using different names. When we want 

302 the actual type object, we need to resolve these references. 

303 

304 Args: 

305 name: Type name to resolve. 

306 

307 Returns: 

308 The resolved type object. 

309 

310 Raises: 

311 ResolveError: If the type can't be resolved. 

312 """ 

313 type_name = name 

314 if not isinstance(type_name, str): 

315 return type_name 

316 

317 for _ in range(10): 

318 if type_name not in self.typedefs: 

319 raise ResolveError(f"Unknown type {name}") 

320 

321 type_name = self.typedefs[type_name] 

322 

323 if not isinstance(type_name, str): 

324 return type_name 

325 

326 raise ResolveError(f"Recursion limit exceeded while resolving type {name}") 

327 

328 def _make_type( 

329 self, 

330 name: str, 

331 bases: Iterable[object], 

332 size: int | None, 

333 *, 

334 alignment: int | None = None, 

335 attrs: dict[str, Any] | None = None, 

336 ) -> type[BaseType]: 

337 """Create a new type class bound to this cstruct instance. 

338 

339 All types are created using this method. This method automatically binds the type to this cstruct instance. 

340 """ 

341 attrs = attrs or {} 

342 attrs.update( 

343 { 

344 "cs": self, 

345 "size": size, 

346 "dynamic": size is None, 

347 "alignment": alignment or size, 

348 } 

349 ) 

350 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs)) 

351 

352 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]: 

353 null_terminated = False 

354 if num_entries is None: 

355 null_terminated = True 

356 size = None 

357 elif isinstance(num_entries, Expression) or type_.dynamic: 

358 size = None 

359 else: 

360 if type_.size is None: 

361 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}") 

362 size = num_entries * type_.size 

363 

364 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]" 

365 

366 bases = (type_.ArrayType,) 

367 

368 attrs = { 

369 "type": type_, 

370 "num_entries": num_entries, 

371 "null_terminated": null_terminated, 

372 } 

373 

374 return cast("type[Array]", self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs)) 

375 

376 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]: 

377 return cast("type[Int]", self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed})) 

378 

379 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]: 

380 return cast( 

381 "type[Packed]", 

382 self._make_type( 

383 name, 

384 (base, Packed), 

385 struct.calcsize(packchar), 

386 alignment=alignment, 

387 attrs={"packchar": packchar}, 

388 ), 

389 ) 

390 

391 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]: 

392 return Enum(self, name, type_, values) 

393 

394 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]: 

395 return Flag(self, name, type_, values) 

396 

397 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]: 

398 return self._make_type( 

399 f"{target.__name__}*", 

400 (Pointer,), 

401 self.pointer.size, 

402 alignment=self.pointer.alignment, 

403 attrs={"type": target}, 

404 ) 

405 

406 def _make_struct( 

407 self, 

408 name: str, 

409 fields: list[Field], 

410 *, 

411 align: bool = False, 

412 anonymous: bool = False, 

413 base: type[Structure] = Structure, 

414 ) -> type[Structure]: 

415 return self._make_type( 

416 name, 

417 (base,), 

418 None, 

419 attrs={ 

420 "fields": fields, 

421 "__align__": align, 

422 "__anonymous__": anonymous, 

423 }, 

424 ) 

425 

426 def _make_union( 

427 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False 

428 ) -> type[Structure]: 

429 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union) 

430 

431 if TYPE_CHECKING: 

432 # ruff: noqa: PYI042 

433 _int = int 

434 _float = float 

435 

436 class int8(_int, Packed[_int]): ... 

437 

438 class uint8(_int, Packed[_int]): ... 

439 

440 class int16(_int, Packed[_int]): ... 

441 

442 class uint16(_int, Packed[_int]): ... 

443 

444 class int32(_int, Packed[_int]): ... 

445 

446 class uint32(_int, Packed[_int]): ... 

447 

448 class int64(_int, Packed[_int]): ... 

449 

450 class uint64(_int, Packed[_int]): ... 

451 

452 class float16(_float, Packed[_float]): ... 

453 

454 class float(_float, Packed[_float]): ... 

455 

456 class double(_float, Packed[_float]): ... 

457 

458 class char(Char): ... 

459 

460 class wchar(Wchar): ... 

461 

462 class int24(Int): ... 

463 

464 class uint24(Int): ... 

465 

466 class int48(Int): ... 

467 

468 class uint48(Int): ... 

469 

470 class int128(Int): ... 

471 

472 class uint128(Int): ... 

473 

474 class uleb128(LEB128): ... 

475 

476 class ileb128(LEB128): ... 

477 

478 class void(Void): ... 

479 

480 # signed char: TypeAlias = int8 

481 # signed char: TypeAlias = char 

482 short: TypeAlias = int16 

483 # signed short: TypeAlias = int16 

484 # unsigned short: TypeAlias = uint16 

485 int: TypeAlias = int32 

486 # signed int: TypeAlias = int32 

487 # unsigned int: TypeAlias = uint32 

488 long: TypeAlias = int32 

489 # signed long: TypeAlias = int32 

490 # unsigned long: TypeAlias = uint32 

491 # long long: TypeAlias = int64 

492 # signed long long: TypeAlias = int64 

493 # unsigned long long: TypeAlias = uint64 

494 

495 BYTE: TypeAlias = uint8 

496 CHAR: TypeAlias = char 

497 SHORT: TypeAlias = int16 

498 WORD: TypeAlias = uint16 

499 DWORD: TypeAlias = uint32 

500 LONG: TypeAlias = int32 

501 LONG32: TypeAlias = int32 

502 LONG64: TypeAlias = int64 

503 LONGLONG: TypeAlias = int64 

504 QWORD: TypeAlias = uint64 

505 OWORD: TypeAlias = uint128 

506 WCHAR: TypeAlias = wchar 

507 

508 UCHAR: TypeAlias = uint8 

509 USHORT: TypeAlias = uint16 

510 ULONG: TypeAlias = uint32 

511 ULONG64: TypeAlias = uint64 

512 ULONGLONG: TypeAlias = uint64 

513 

514 INT: TypeAlias = int32 

515 INT8: TypeAlias = int8 

516 INT16: TypeAlias = int16 

517 INT32: TypeAlias = int32 

518 INT64: TypeAlias = int64 

519 INT128: TypeAlias = int128 

520 

521 UINT: TypeAlias = uint32 

522 UINT8: TypeAlias = uint8 

523 UINT16: TypeAlias = uint16 

524 UINT32: TypeAlias = uint32 

525 UINT64: TypeAlias = uint64 

526 UINT128: TypeAlias = uint128 

527 

528 __int8: TypeAlias = int8 

529 __int16: TypeAlias = int16 

530 __int32: TypeAlias = int32 

531 __int64: TypeAlias = int64 

532 __int128: TypeAlias = int128 

533 

534 # unsigned __int8: TypeAlias = uint8 

535 # unsigned __int16: TypeAlias = uint16 

536 # unsigned __int32: TypeAlias = uint32 

537 # unsigned __int64: TypeAlias = uint64 

538 # unsigned __int128: TypeAlias = uint128 

539 

540 wchar_t: TypeAlias = wchar 

541 

542 int8_t: TypeAlias = int8 

543 int16_t: TypeAlias = int16 

544 int32_t: TypeAlias = int32 

545 int64_t: TypeAlias = int64 

546 int128_t: TypeAlias = int128 

547 

548 uint8_t: TypeAlias = uint8 

549 uint16_t: TypeAlias = uint16 

550 uint32_t: TypeAlias = uint32 

551 uint64_t: TypeAlias = uint64 

552 uint128_t: TypeAlias = uint128 

553 

554 _BYTE: TypeAlias = uint8 

555 _WORD: TypeAlias = uint16 

556 _DWORD: TypeAlias = uint32 

557 _QWORD: TypeAlias = uint64 

558 _OWORD: TypeAlias = uint128 

559 

560 u1: TypeAlias = uint8 

561 u2: TypeAlias = uint16 

562 u4: TypeAlias = uint32 

563 u8: TypeAlias = uint64 

564 u16: TypeAlias = uint128 

565 __u8: TypeAlias = uint8 

566 __u16: TypeAlias = uint16 

567 __u32: TypeAlias = uint32 

568 __u64: TypeAlias = uint64 

569 uchar: TypeAlias = uint8 

570 ushort: TypeAlias = uint16 

571 uint: TypeAlias = uint32 

572 ulong: TypeAlias = uint32 

573 

574 

575def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]: 

576 """Create ctypes structures from cstruct structures.""" 

577 fields = [] 

578 for field in structure.__fields__: 

579 t = ctypes_type(field.type) 

580 fields.append((field._name, t)) 

581 

582 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields}) 

583 

584 

585def ctypes_type(type_: type[BaseType]) -> Any: 

586 mapping = { 

587 "b": _ctypes.c_int8, 

588 "B": _ctypes.c_uint8, 

589 "h": _ctypes.c_int16, 

590 "H": _ctypes.c_uint16, 

591 "i": _ctypes.c_int32, 

592 "I": _ctypes.c_uint32, 

593 "q": _ctypes.c_int64, 

594 "Q": _ctypes.c_uint64, 

595 "f": _ctypes.c_float, 

596 "d": _ctypes.c_double, 

597 } 

598 

599 if issubclass(type_, Packed) and type_.packchar in mapping: 

600 return mapping[type_.packchar] 

601 

602 if issubclass(type_, Char): 

603 return _ctypes.c_char 

604 

605 if issubclass(type_, Wchar): 

606 return _ctypes.c_wchar 

607 

608 if issubclass(type_, BaseArray): 

609 subtype = ctypes_type(type_.type) 

610 return subtype * type_.num_entries 

611 

612 if issubclass(type_, Pointer): 

613 subtype = ctypes_type(type_.type) 

614 return _ctypes.POINTER(subtype) 

615 

616 if issubclass(type_, Structure): 

617 return ctypes(type_) 

618 

619 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")