Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dissect/cstruct/cstruct.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

216 statements  

1from __future__ import annotations 

2 

3import ctypes as _ctypes 

4import struct 

5import sys 

6import types 

7from pathlib import Path 

8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast 

9 

10from dissect.cstruct.exceptions import ResolveError 

11from dissect.cstruct.expression import Expression 

12from dissect.cstruct.parser import CStyleParser, TokenParser 

13from dissect.cstruct.types import ( 

14 LEB128, 

15 Array, 

16 BaseArray, 

17 BaseType, 

18 Char, 

19 Enum, 

20 Field, 

21 Flag, 

22 Int, 

23 Packed, 

24 Pointer, 

25 Structure, 

26 Union, 

27 Void, 

28 Wchar, 

29) 

30 

31if TYPE_CHECKING: 

32 from collections.abc import Iterable 

33 

34 from typing_extensions import TypeAlias 

35 

36 

37T = TypeVar("T", bound=BaseType) 

38 

39 

40class cstruct: 

41 """Main class of cstruct. All types are registered in here. 

42 

43 Args: 

44 endian: The endianness to use when parsing. 

45 pointer: The pointer type to use for pointers. 

46 """ 

47 

48 DEF_CSTYLE = 1 

49 DEF_LEGACY = 2 

50 

51 def __init__(self, endian: str = "<", pointer: str | None = None): 

52 self.endian = endian 

53 

54 self.consts = {} 

55 self.lookups = {} 

56 # fmt: off 

57 self.typedefs = { 

58 # Internal types 

59 "int8": self._make_packed_type("int8", "b", int), 

60 "uint8": self._make_packed_type("uint8", "B", int), 

61 "int16": self._make_packed_type("int16", "h", int), 

62 "uint16": self._make_packed_type("uint16", "H", int), 

63 "int32": self._make_packed_type("int32", "i", int), 

64 "uint32": self._make_packed_type("uint32", "I", int), 

65 "int64": self._make_packed_type("int64", "q", int), 

66 "uint64": self._make_packed_type("uint64", "Q", int), 

67 "float16": self._make_packed_type("float16", "e", float), 

68 "float": self._make_packed_type("float", "f", float), 

69 "double": self._make_packed_type("double", "d", float), 

70 "char": self._make_type("char", (Char,), 1), 

71 "wchar": self._make_type("wchar", (Wchar,), 2), 

72 

73 "int24": self._make_int_type("int24", 3, True, alignment=4), 

74 "uint24": self._make_int_type("uint24", 3, False, alignment=4), 

75 "int48": self._make_int_type("int48", 6, True, alignment=8), 

76 "uint48": self._make_int_type("int48", 6, False, alignment=8), 

77 "int128": self._make_int_type("int128", 16, True, alignment=16), 

78 "uint128": self._make_int_type("uint128", 16, False, alignment=16), 

79 

80 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}), 

81 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}), 

82 

83 "void": self._make_type("void", (Void,), 0), 

84 

85 # Common C types not covered by internal types 

86 "signed char": "int8", 

87 "unsigned char": "char", 

88 "short": "int16", 

89 "signed short": "int16", 

90 "unsigned short": "uint16", 

91 "int": "int32", 

92 "signed int": "int32", 

93 "unsigned int": "uint32", 

94 "long": "int32", 

95 "signed long": "int32", 

96 "unsigned long": "uint32", 

97 "long long": "int64", 

98 "signed long long": "int64", 

99 "unsigned long long": "uint64", 

100 

101 # Windows types 

102 "BYTE": "uint8", 

103 "CHAR": "char", 

104 "SHORT": "int16", 

105 "WORD": "uint16", 

106 "DWORD": "uint32", 

107 "LONG": "int32", 

108 "LONG32": "int32", 

109 "LONG64": "int64", 

110 "LONGLONG": "int64", 

111 "QWORD": "uint64", 

112 "OWORD": "uint128", 

113 "WCHAR": "wchar", 

114 

115 "UCHAR": "uint8", 

116 "USHORT": "uint16", 

117 "ULONG": "uint32", 

118 "ULONG64": "uint64", 

119 "ULONGLONG": "uint64", 

120 

121 "INT": "int32", 

122 "INT8": "int8", 

123 "INT16": "int16", 

124 "INT32": "int32", 

125 "INT64": "int64", 

126 "INT128": "int128", 

127 

128 "UINT": "uint32", 

129 "UINT8": "uint8", 

130 "UINT16": "uint16", 

131 "UINT32": "uint32", 

132 "UINT64": "uint64", 

133 "UINT128": "uint128", 

134 

135 "__int8": "int8", 

136 "__int16": "int16", 

137 "__int32": "int32", 

138 "__int64": "int64", 

139 "__int128": "int128", 

140 

141 "unsigned __int8": "uint8", 

142 "unsigned __int16": "uint16", 

143 "unsigned __int32": "uint32", 

144 "unsigned __int64": "uint64", 

145 "unsigned __int128": "uint128", 

146 

147 "wchar_t": "wchar", 

148 

149 # GNU C types 

150 "int8_t": "int8", 

151 "int16_t": "int16", 

152 "int32_t": "int32", 

153 "int64_t": "int64", 

154 "int128_t": "int128", 

155 

156 "uint8_t": "uint8", 

157 "uint16_t": "uint16", 

158 "uint32_t": "uint32", 

159 "uint64_t": "uint64", 

160 "uint128_t": "uint128", 

161 

162 # IDA types 

163 "_BYTE": "uint8", 

164 "_WORD": "uint16", 

165 "_DWORD": "uint32", 

166 "_QWORD": "uint64", 

167 "_OWORD": "uint128", 

168 

169 # Other convenience types 

170 "u1": "uint8", 

171 "u2": "uint16", 

172 "u4": "uint32", 

173 "u8": "uint64", 

174 "u16": "uint128", 

175 "__u8": "uint8", 

176 "__u16": "uint16", 

177 "__u32": "uint32", 

178 "__u64": "uint64", 

179 "uchar": "uint8", 

180 "ushort": "uint16", 

181 "uint": "uint32", 

182 "ulong": "uint32", 

183 } 

184 # fmt: on 

185 

186 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32") 

187 self.pointer: type[BaseType] = self.resolve(pointer) 

188 self._anonymous_count = 0 

189 

190 def __getattr__(self, attr: str) -> Any: 

191 try: 

192 return self.consts[attr] 

193 except KeyError: 

194 pass 

195 

196 try: 

197 return self.resolve(self.typedefs[attr]) 

198 except KeyError: 

199 pass 

200 

201 raise AttributeError(f"Invalid attribute: {attr}") 

202 

203 def _next_anonymous(self) -> str: 

204 name = f"__anonymous_{self._anonymous_count}__" 

205 self._anonymous_count += 1 

206 return name 

207 

208 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None: 

209 """Add a type or type reference. 

210 

211 Only use this method when creating type aliases or adding already bound types. 

212 

213 Args: 

214 name: Name of the type to be added. 

215 type_: The type to be added. Can be a str reference to another type or a compatible type class. 

216 

217 Raises: 

218 ValueError: If the type already exists. 

219 """ 

220 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)): 

221 raise ValueError(f"Duplicate type: {name}") 

222 

223 self.typedefs[name] = type_ 

224 

225 addtype = add_type 

226 

227 def add_custom_type( 

228 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs 

229 ) -> None: 

230 """Add a custom type. 

231 

232 Use this method to add custom types to this cstruct instance. This is largely a convenience method for 

233 the internal :func:`_make_type` method, which binds a class to this cstruct instance. 

234 

235 Args: 

236 name: Name of the type to be added. 

237 type_: The type to be added. 

238 size: The size of the type. 

239 alignment: The alignment of the type. 

240 **kwargs: Additional attributes to add to the type. 

241 """ 

242 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs)) 

243 

244 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct: 

245 """Parse structures from the given definitions using the given definition type. 

246 

247 Definitions can be parsed using different parsers. Currently, there's 

248 only one supported parser - DEF_CSTYLE. Parsers can add types and 

249 modify this cstruct instance. Arguments can be passed to parsers 

250 using kwargs. 

251 

252 The CSTYLE parser was recently replaced with token based parser, 

253 instead of a strictly regex based one. The old parser is still available 

254 by using DEF_LEGACY. 

255 

256 Args: 

257 definition: The definition to parse. 

258 deftype: The definition type to parse the definitions with. 

259 **kwargs: Keyword arguments for parsers. 

260 """ 

261 deftype = deftype or cstruct.DEF_CSTYLE 

262 

263 if deftype == cstruct.DEF_CSTYLE: 

264 TokenParser(self, **kwargs).parse(definition) 

265 elif deftype == cstruct.DEF_LEGACY: 

266 CStyleParser(self, **kwargs).parse(definition) 

267 

268 return self 

269 

270 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None: 

271 """Load structure definitions from a file. 

272 

273 The given path will be read and parsed using the :meth:`~cstruct.load` function. 

274 

275 Args: 

276 path: The path to load definitions from. 

277 deftype: The definition type to parse the definitions with. 

278 **kwargs: Keyword arguments for parsers. 

279 """ 

280 with Path(path).open() as fh: 

281 self.load(fh.read(), deftype, **kwargs) 

282 

283 def read(self, name: str, stream: BinaryIO) -> Any: 

284 """Parse data using a given type. 

285 

286 Args: 

287 name: Type name to read. 

288 stream: File-like object or byte string to parse. 

289 

290 Returns: 

291 The parsed data. 

292 """ 

293 return self.resolve(name).read(stream) 

294 

295 def resolve(self, name: type[BaseType] | str) -> type[BaseType]: 

296 """Resolve a type name to get the actual type object. 

297 

298 Types can be referenced using different names. When we want 

299 the actual type object, we need to resolve these references. 

300 

301 Args: 

302 name: Type name to resolve. 

303 

304 Returns: 

305 The resolved type object. 

306 

307 Raises: 

308 ResolveError: If the type can't be resolved. 

309 """ 

310 type_name = name 

311 if not isinstance(type_name, str): 

312 return type_name 

313 

314 for _ in range(10): 

315 if type_name not in self.typedefs: 

316 raise ResolveError(f"Unknown type {name}") 

317 

318 type_name = self.typedefs[type_name] 

319 

320 if not isinstance(type_name, str): 

321 return type_name 

322 

323 raise ResolveError(f"Recursion limit exceeded while resolving type {name}") 

324 

325 def _make_type( 

326 self, 

327 name: str, 

328 bases: Iterable[object], 

329 size: int | None, 

330 *, 

331 alignment: int | None = None, 

332 attrs: dict[str, Any] | None = None, 

333 ) -> type[BaseType]: 

334 """Create a new type class bound to this cstruct instance. 

335 

336 All types are created using this method. This method automatically binds the type to this cstruct instance. 

337 """ 

338 attrs = attrs or {} 

339 attrs.update( 

340 { 

341 "cs": self, 

342 "size": size, 

343 "dynamic": size is None, 

344 "alignment": alignment or size, 

345 } 

346 ) 

347 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs)) 

348 

349 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]: 

350 null_terminated = False 

351 if num_entries is None: 

352 null_terminated = True 

353 size = None 

354 elif isinstance(num_entries, Expression) or type_.dynamic: 

355 size = None 

356 else: 

357 if type_.size is None: 

358 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}") 

359 size = num_entries * type_.size 

360 

361 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]" 

362 

363 bases = (type_.ArrayType,) 

364 

365 attrs = { 

366 "type": type_, 

367 "num_entries": num_entries, 

368 "null_terminated": null_terminated, 

369 } 

370 

371 return cast(type[Array], self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs)) 

372 

373 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]: 

374 return cast(type[Int], self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed})) 

375 

376 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]: 

377 return cast( 

378 type[Packed], 

379 self._make_type( 

380 name, 

381 (base, Packed), 

382 struct.calcsize(packchar), 

383 alignment=alignment, 

384 attrs={"packchar": packchar}, 

385 ), 

386 ) 

387 

388 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]: 

389 return Enum(self, name, type_, values) 

390 

391 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]: 

392 return Flag(self, name, type_, values) 

393 

394 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]: 

395 return self._make_type( 

396 f"{target.__name__}*", 

397 (Pointer,), 

398 self.pointer.size, 

399 alignment=self.pointer.alignment, 

400 attrs={"type": target}, 

401 ) 

402 

403 def _make_struct( 

404 self, 

405 name: str, 

406 fields: list[Field], 

407 *, 

408 align: bool = False, 

409 anonymous: bool = False, 

410 base: type[Structure] = Structure, 

411 ) -> type[Structure]: 

412 return self._make_type( 

413 name, 

414 (base,), 

415 None, 

416 attrs={ 

417 "fields": fields, 

418 "__align__": align, 

419 "__anonymous__": anonymous, 

420 }, 

421 ) 

422 

423 def _make_union( 

424 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False 

425 ) -> type[Structure]: 

426 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union) 

427 

428 if TYPE_CHECKING: 

429 # ruff: noqa: PYI042 

430 _int = int 

431 _float = float 

432 

433 class int8(_int, Packed[_int]): ... 

434 

435 class uint8(_int, Packed[_int]): ... 

436 

437 class int16(_int, Packed[_int]): ... 

438 

439 class uint16(_int, Packed[_int]): ... 

440 

441 class int32(_int, Packed[_int]): ... 

442 

443 class uint32(_int, Packed[_int]): ... 

444 

445 class int64(_int, Packed[_int]): ... 

446 

447 class uint64(_int, Packed[_int]): ... 

448 

449 class float16(_float, Packed[_float]): ... 

450 

451 class float(_float, Packed[_float]): ... 

452 

453 class double(_float, Packed[_float]): ... 

454 

455 class char(Char): ... 

456 

457 class wchar(Wchar): ... 

458 

459 class int24(Int): ... 

460 

461 class uint24(Int): ... 

462 

463 class int48(Int): ... 

464 

465 class uint48(Int): ... 

466 

467 class int128(Int): ... 

468 

469 class uint128(Int): ... 

470 

471 class uleb128(LEB128): ... 

472 

473 class ileb128(LEB128): ... 

474 

475 class void(Void): ... 

476 

477 # signed char: TypeAlias = int8 

478 # signed char: TypeAlias = char 

479 short: TypeAlias = int16 

480 # signed short: TypeAlias = int16 

481 # unsigned short: TypeAlias = uint16 

482 int: TypeAlias = int32 

483 # signed int: TypeAlias = int32 

484 # unsigned int: TypeAlias = uint32 

485 long: TypeAlias = int32 

486 # signed long: TypeAlias = int32 

487 # unsigned long: TypeAlias = uint32 

488 # long long: TypeAlias = int64 

489 # signed long long: TypeAlias = int64 

490 # unsigned long long: TypeAlias = uint64 

491 

492 BYTE: TypeAlias = uint8 

493 CHAR: TypeAlias = char 

494 SHORT: TypeAlias = int16 

495 WORD: TypeAlias = uint16 

496 DWORD: TypeAlias = uint32 

497 LONG: TypeAlias = int32 

498 LONG32: TypeAlias = int32 

499 LONG64: TypeAlias = int64 

500 LONGLONG: TypeAlias = int64 

501 QWORD: TypeAlias = uint64 

502 OWORD: TypeAlias = uint128 

503 WCHAR: TypeAlias = wchar 

504 

505 UCHAR: TypeAlias = uint8 

506 USHORT: TypeAlias = uint16 

507 ULONG: TypeAlias = uint32 

508 ULONG64: TypeAlias = uint64 

509 ULONGLONG: TypeAlias = uint64 

510 

511 INT: TypeAlias = int32 

512 INT8: TypeAlias = int8 

513 INT16: TypeAlias = int16 

514 INT32: TypeAlias = int32 

515 INT64: TypeAlias = int64 

516 INT128: TypeAlias = int128 

517 

518 UINT: TypeAlias = uint32 

519 UINT8: TypeAlias = uint8 

520 UINT16: TypeAlias = uint16 

521 UINT32: TypeAlias = uint32 

522 UINT64: TypeAlias = uint64 

523 UINT128: TypeAlias = uint128 

524 

525 __int8: TypeAlias = int8 

526 __int16: TypeAlias = int16 

527 __int32: TypeAlias = int32 

528 __int64: TypeAlias = int64 

529 __int128: TypeAlias = int128 

530 

531 # unsigned __int8: TypeAlias = uint8 

532 # unsigned __int16: TypeAlias = uint16 

533 # unsigned __int32: TypeAlias = uint32 

534 # unsigned __int64: TypeAlias = uint64 

535 # unsigned __int128: TypeAlias = uint128 

536 

537 wchar_t: TypeAlias = wchar 

538 

539 int8_t: TypeAlias = int8 

540 int16_t: TypeAlias = int16 

541 int32_t: TypeAlias = int32 

542 int64_t: TypeAlias = int64 

543 int128_t: TypeAlias = int128 

544 

545 uint8_t: TypeAlias = uint8 

546 uint16_t: TypeAlias = uint16 

547 uint32_t: TypeAlias = uint32 

548 uint64_t: TypeAlias = uint64 

549 uint128_t: TypeAlias = uint128 

550 

551 _BYTE: TypeAlias = uint8 

552 _WORD: TypeAlias = uint16 

553 _DWORD: TypeAlias = uint32 

554 _QWORD: TypeAlias = uint64 

555 _OWORD: TypeAlias = uint128 

556 

557 u1: TypeAlias = uint8 

558 u2: TypeAlias = uint16 

559 u4: TypeAlias = uint32 

560 u8: TypeAlias = uint64 

561 u16: TypeAlias = uint128 

562 __u8: TypeAlias = uint8 

563 __u16: TypeAlias = uint16 

564 __u32: TypeAlias = uint32 

565 __u64: TypeAlias = uint64 

566 uchar: TypeAlias = uint8 

567 ushort: TypeAlias = uint16 

568 uint: TypeAlias = uint32 

569 ulong: TypeAlias = uint32 

570 

571 

572def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]: 

573 """Create ctypes structures from cstruct structures.""" 

574 fields = [] 

575 for field in structure.__fields__: 

576 t = ctypes_type(field.type) 

577 fields.append((field._name, t)) 

578 

579 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields}) 

580 

581 

582def ctypes_type(type_: type[BaseType]) -> Any: 

583 mapping = { 

584 "b": _ctypes.c_int8, 

585 "B": _ctypes.c_uint8, 

586 "h": _ctypes.c_int16, 

587 "H": _ctypes.c_uint16, 

588 "i": _ctypes.c_int32, 

589 "I": _ctypes.c_uint32, 

590 "q": _ctypes.c_int64, 

591 "Q": _ctypes.c_uint64, 

592 "f": _ctypes.c_float, 

593 "d": _ctypes.c_double, 

594 } 

595 

596 if issubclass(type_, Packed) and type_.packchar in mapping: 

597 return mapping[type_.packchar] 

598 

599 if issubclass(type_, Char): 

600 return _ctypes.c_char 

601 

602 if issubclass(type_, Wchar): 

603 return _ctypes.c_wchar 

604 

605 if issubclass(type_, BaseArray): 

606 subtype = ctypes_type(type_.type) 

607 return subtype * type_.num_entries 

608 

609 if issubclass(type_, Pointer): 

610 subtype = ctypes_type(type_.type) 

611 return _ctypes.POINTER(subtype) 

612 

613 if issubclass(type_, Structure): 

614 return ctypes(type_) 

615 

616 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")