Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dissect/cstruct/cstruct.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

219 statements  

1from __future__ import annotations 

2 

3import ctypes as _ctypes 

4import struct 

5import sys 

6import types 

7from pathlib import Path 

8from typing import TYPE_CHECKING, Any, BinaryIO, TypeVar, cast 

9 

10from dissect.cstruct.exceptions import ResolveError 

11from dissect.cstruct.expression import Expression 

12from dissect.cstruct.parser import CStyleParser, TokenParser 

13from dissect.cstruct.types import ( 

14 LEB128, 

15 Array, 

16 BaseArray, 

17 BaseType, 

18 Char, 

19 Enum, 

20 Field, 

21 Flag, 

22 Int, 

23 Packed, 

24 Pointer, 

25 Structure, 

26 Union, 

27 Void, 

28 Wchar, 

29) 

30 

31if TYPE_CHECKING: 

32 from collections.abc import Iterable 

33 

34 from typing_extensions import TypeAlias 

35 

36 

37T = TypeVar("T", bound=BaseType) 

38 

39 

40class cstruct: 

41 """Main class of cstruct. All types are registered in here. 

42 

43 Args: 

44 endian: The endianness to use when parsing. 

45 pointer: The pointer type to use for pointers. 

46 """ 

47 

48 DEF_CSTYLE = 1 

49 DEF_LEGACY = 2 

50 

51 def __init__(self, load: str = "", *, endian: str = "<", pointer: str | None = None): 

52 self.endian = endian 

53 

54 self.consts = {} 

55 self.lookups = {} 

56 self.includes = [] 

57 # fmt: off 

58 self.typedefs = { 

59 # Internal types 

60 "int8": self._make_packed_type("int8", "b", int), 

61 "uint8": self._make_packed_type("uint8", "B", int), 

62 "int16": self._make_packed_type("int16", "h", int), 

63 "uint16": self._make_packed_type("uint16", "H", int), 

64 "int32": self._make_packed_type("int32", "i", int), 

65 "uint32": self._make_packed_type("uint32", "I", int), 

66 "int64": self._make_packed_type("int64", "q", int), 

67 "uint64": self._make_packed_type("uint64", "Q", int), 

68 "float16": self._make_packed_type("float16", "e", float), 

69 "float": self._make_packed_type("float", "f", float), 

70 "double": self._make_packed_type("double", "d", float), 

71 "char": self._make_type("char", (Char,), 1), 

72 "wchar": self._make_type("wchar", (Wchar,), 2), 

73 

74 "int24": self._make_int_type("int24", 3, True, alignment=4), 

75 "uint24": self._make_int_type("uint24", 3, False, alignment=4), 

76 "int48": self._make_int_type("int48", 6, True, alignment=8), 

77 "uint48": self._make_int_type("int48", 6, False, alignment=8), 

78 "int128": self._make_int_type("int128", 16, True, alignment=16), 

79 "uint128": self._make_int_type("uint128", 16, False, alignment=16), 

80 

81 "uleb128": self._make_type("uleb128", (LEB128,), None, attrs={"signed": False}), 

82 "ileb128": self._make_type("ileb128", (LEB128,), None, attrs={"signed": True}), 

83 

84 "void": self._make_type("void", (Void,), 0), 

85 

86 # Common C types not covered by internal types 

87 "signed char": "int8", 

88 "unsigned char": "char", 

89 "short": "int16", 

90 "signed short": "int16", 

91 "unsigned short": "uint16", 

92 "int": "int32", 

93 "signed int": "int32", 

94 "unsigned int": "uint32", 

95 "long": "int32", 

96 "signed long": "int32", 

97 "unsigned long": "uint32", 

98 "long long": "int64", 

99 "signed long long": "int64", 

100 "unsigned long long": "uint64", 

101 

102 # Windows types 

103 "BYTE": "uint8", 

104 "CHAR": "char", 

105 "SHORT": "int16", 

106 "WORD": "uint16", 

107 "DWORD": "uint32", 

108 "LONG": "int32", 

109 "LONG32": "int32", 

110 "LONG64": "int64", 

111 "LONGLONG": "int64", 

112 "QWORD": "uint64", 

113 "OWORD": "uint128", 

114 "WCHAR": "wchar", 

115 

116 "UCHAR": "uint8", 

117 "USHORT": "uint16", 

118 "ULONG": "uint32", 

119 "ULONG64": "uint64", 

120 "ULONGLONG": "uint64", 

121 

122 "INT": "int32", 

123 "INT8": "int8", 

124 "INT16": "int16", 

125 "INT32": "int32", 

126 "INT64": "int64", 

127 "INT128": "int128", 

128 

129 "UINT": "uint32", 

130 "UINT8": "uint8", 

131 "UINT16": "uint16", 

132 "UINT32": "uint32", 

133 "UINT64": "uint64", 

134 "UINT128": "uint128", 

135 

136 "__int8": "int8", 

137 "__int16": "int16", 

138 "__int32": "int32", 

139 "__int64": "int64", 

140 "__int128": "int128", 

141 

142 "unsigned __int8": "uint8", 

143 "unsigned __int16": "uint16", 

144 "unsigned __int32": "uint32", 

145 "unsigned __int64": "uint64", 

146 "unsigned __int128": "uint128", 

147 

148 "wchar_t": "wchar", 

149 

150 # GNU C types 

151 "int8_t": "int8", 

152 "int16_t": "int16", 

153 "int32_t": "int32", 

154 "int64_t": "int64", 

155 "int128_t": "int128", 

156 

157 "uint8_t": "uint8", 

158 "uint16_t": "uint16", 

159 "uint32_t": "uint32", 

160 "uint64_t": "uint64", 

161 "uint128_t": "uint128", 

162 

163 # IDA types 

164 "_BYTE": "uint8", 

165 "_WORD": "uint16", 

166 "_DWORD": "uint32", 

167 "_QWORD": "uint64", 

168 "_OWORD": "uint128", 

169 

170 # Other convenience types 

171 "u1": "uint8", 

172 "u2": "uint16", 

173 "u4": "uint32", 

174 "u8": "uint64", 

175 "u16": "uint128", 

176 "__u8": "uint8", 

177 "__u16": "uint16", 

178 "__u32": "uint32", 

179 "__u64": "uint64", 

180 "uchar": "uint8", 

181 "ushort": "uint16", 

182 "uint": "uint32", 

183 "ulong": "uint32", 

184 } 

185 # fmt: on 

186 

187 pointer = pointer or ("uint64" if sys.maxsize > 2**32 else "uint32") 

188 self.pointer: type[BaseType] = self.resolve(pointer) 

189 self._anonymous_count = 0 

190 

191 if load: 

192 self.load(load) 

193 

194 def __getattr__(self, attr: str) -> Any: 

195 try: 

196 return self.consts[attr] 

197 except KeyError: 

198 pass 

199 

200 try: 

201 return self.resolve(self.typedefs[attr]) 

202 except KeyError: 

203 pass 

204 

205 raise AttributeError(f"Invalid attribute: {attr}") 

206 

207 def _next_anonymous(self) -> str: 

208 name = f"__anonymous_{self._anonymous_count}__" 

209 self._anonymous_count += 1 

210 return name 

211 

212 def add_type(self, name: str, type_: type[BaseType] | str, replace: bool = False) -> None: 

213 """Add a type or type reference. 

214 

215 Only use this method when creating type aliases or adding already bound types. 

216 

217 Args: 

218 name: Name of the type to be added. 

219 type_: The type to be added. Can be a str reference to another type or a compatible type class. 

220 

221 Raises: 

222 ValueError: If the type already exists. 

223 """ 

224 if not replace and (name in self.typedefs and self.resolve(self.typedefs[name]) != self.resolve(type_)): 

225 raise ValueError(f"Duplicate type: {name}") 

226 

227 self.typedefs[name] = type_ 

228 

229 addtype = add_type 

230 

231 def add_custom_type( 

232 self, name: str, type_: type[BaseType], size: int | None = None, alignment: int | None = None, **kwargs 

233 ) -> None: 

234 """Add a custom type. 

235 

236 Use this method to add custom types to this cstruct instance. This is largely a convenience method for 

237 the internal :func:`_make_type` method, which binds a class to this cstruct instance. 

238 

239 Args: 

240 name: Name of the type to be added. 

241 type_: The type to be added. 

242 size: The size of the type. 

243 alignment: The alignment of the type. 

244 **kwargs: Additional attributes to add to the type. 

245 """ 

246 self.add_type(name, self._make_type(name, (type_,), size, alignment=alignment, attrs=kwargs)) 

247 

248 def load(self, definition: str, deftype: int | None = None, **kwargs) -> cstruct: 

249 """Parse structures from the given definitions using the given definition type. 

250 

251 Definitions can be parsed using different parsers. Currently, there's 

252 only one supported parser - DEF_CSTYLE. Parsers can add types and 

253 modify this cstruct instance. Arguments can be passed to parsers 

254 using kwargs. 

255 

256 The CSTYLE parser was recently replaced with token based parser, 

257 instead of a strictly regex based one. The old parser is still available 

258 by using DEF_LEGACY. 

259 

260 Args: 

261 definition: The definition to parse. 

262 deftype: The definition type to parse the definitions with. 

263 **kwargs: Keyword arguments for parsers. 

264 """ 

265 deftype = deftype or cstruct.DEF_CSTYLE 

266 

267 if deftype == cstruct.DEF_CSTYLE: 

268 TokenParser(self, **kwargs).parse(definition) 

269 elif deftype == cstruct.DEF_LEGACY: 

270 CStyleParser(self, **kwargs).parse(definition) 

271 

272 return self 

273 

274 def loadfile(self, path: str, deftype: int | None = None, **kwargs) -> None: 

275 """Load structure definitions from a file. 

276 

277 The given path will be read and parsed using the :meth:`~cstruct.load` function. 

278 

279 Args: 

280 path: The path to load definitions from. 

281 deftype: The definition type to parse the definitions with. 

282 **kwargs: Keyword arguments for parsers. 

283 """ 

284 with Path(path).open() as fh: 

285 self.load(fh.read(), deftype, **kwargs) 

286 

287 def read(self, name: str, stream: BinaryIO) -> Any: 

288 """Parse data using a given type. 

289 

290 Args: 

291 name: Type name to read. 

292 stream: File-like object or byte string to parse. 

293 

294 Returns: 

295 The parsed data. 

296 """ 

297 return self.resolve(name).read(stream) 

298 

299 def resolve(self, name: type[BaseType] | str) -> type[BaseType]: 

300 """Resolve a type name to get the actual type object. 

301 

302 Types can be referenced using different names. When we want 

303 the actual type object, we need to resolve these references. 

304 

305 Args: 

306 name: Type name to resolve. 

307 

308 Returns: 

309 The resolved type object. 

310 

311 Raises: 

312 ResolveError: If the type can't be resolved. 

313 """ 

314 type_name = name 

315 if not isinstance(type_name, str): 

316 return type_name 

317 

318 for _ in range(10): 

319 if type_name not in self.typedefs: 

320 raise ResolveError(f"Unknown type {name}") 

321 

322 type_name = self.typedefs[type_name] 

323 

324 if not isinstance(type_name, str): 

325 return type_name 

326 

327 raise ResolveError(f"Recursion limit exceeded while resolving type {name}") 

328 

329 def _make_type( 

330 self, 

331 name: str, 

332 bases: Iterable[object], 

333 size: int | None, 

334 *, 

335 alignment: int | None = None, 

336 attrs: dict[str, Any] | None = None, 

337 ) -> type[BaseType]: 

338 """Create a new type class bound to this cstruct instance. 

339 

340 All types are created using this method. This method automatically binds the type to this cstruct instance. 

341 """ 

342 attrs = attrs or {} 

343 attrs.update( 

344 { 

345 "cs": self, 

346 "size": size, 

347 "dynamic": size is None, 

348 "alignment": alignment or size, 

349 } 

350 ) 

351 return types.new_class(name, bases, {}, lambda ns: ns.update(attrs)) 

352 

353 def _make_array(self, type_: T, num_entries: int | Expression | None) -> type[Array[T]]: 

354 null_terminated = False 

355 if num_entries is None: 

356 null_terminated = True 

357 size = None 

358 elif isinstance(num_entries, Expression) or type_.dynamic: 

359 size = None 

360 else: 

361 if type_.size is None: 

362 raise ValueError(f"Cannot create array of dynamic type: {type_.__name__}") 

363 size = num_entries * type_.size 

364 

365 name = f"{type_.__name__}[]" if null_terminated else f"{type_.__name__}[{num_entries}]" 

366 

367 bases = (type_.ArrayType,) 

368 

369 attrs = { 

370 "type": type_, 

371 "num_entries": num_entries, 

372 "null_terminated": null_terminated, 

373 } 

374 

375 return cast("type[Array]", self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs)) 

376 

377 def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]: 

378 return cast("type[Int]", self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed})) 

379 

380 def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]: 

381 return cast( 

382 "type[Packed]", 

383 self._make_type( 

384 name, 

385 (base, Packed), 

386 struct.calcsize(packchar), 

387 alignment=alignment, 

388 attrs={"packchar": packchar}, 

389 ), 

390 ) 

391 

392 def _make_enum(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Enum]: 

393 return Enum(self, name, type_, values) 

394 

395 def _make_flag(self, name: str, type_: type[BaseType], values: dict[str, int]) -> type[Flag]: 

396 return Flag(self, name, type_, values) 

397 

398 def _make_pointer(self, target: type[BaseType]) -> type[Pointer]: 

399 return self._make_type( 

400 f"{target.__name__}*", 

401 (Pointer,), 

402 self.pointer.size, 

403 alignment=self.pointer.alignment, 

404 attrs={"type": target}, 

405 ) 

406 

407 def _make_struct( 

408 self, 

409 name: str, 

410 fields: list[Field], 

411 *, 

412 align: bool = False, 

413 anonymous: bool = False, 

414 base: type[Structure] = Structure, 

415 ) -> type[Structure]: 

416 return self._make_type( 

417 name, 

418 (base,), 

419 None, 

420 attrs={ 

421 "fields": fields, 

422 "__align__": align, 

423 "__anonymous__": anonymous, 

424 }, 

425 ) 

426 

427 def _make_union( 

428 self, name: str, fields: list[Field], *, align: bool = False, anonymous: bool = False 

429 ) -> type[Structure]: 

430 return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union) 

431 

432 if TYPE_CHECKING: 

433 # ruff: noqa: PYI042 

434 _int = int 

435 _float = float 

436 

437 class int8(_int, Packed[_int]): ... 

438 

439 class uint8(_int, Packed[_int]): ... 

440 

441 class int16(_int, Packed[_int]): ... 

442 

443 class uint16(_int, Packed[_int]): ... 

444 

445 class int32(_int, Packed[_int]): ... 

446 

447 class uint32(_int, Packed[_int]): ... 

448 

449 class int64(_int, Packed[_int]): ... 

450 

451 class uint64(_int, Packed[_int]): ... 

452 

453 class float16(_float, Packed[_float]): ... 

454 

455 class float(_float, Packed[_float]): ... 

456 

457 class double(_float, Packed[_float]): ... 

458 

459 class char(Char): ... 

460 

461 class wchar(Wchar): ... 

462 

463 class int24(Int): ... 

464 

465 class uint24(Int): ... 

466 

467 class int48(Int): ... 

468 

469 class uint48(Int): ... 

470 

471 class int128(Int): ... 

472 

473 class uint128(Int): ... 

474 

475 class uleb128(LEB128): ... 

476 

477 class ileb128(LEB128): ... 

478 

479 class void(Void): ... 

480 

481 # signed char: TypeAlias = int8 

482 # signed char: TypeAlias = char 

483 short: TypeAlias = int16 

484 # signed short: TypeAlias = int16 

485 # unsigned short: TypeAlias = uint16 

486 int: TypeAlias = int32 

487 # signed int: TypeAlias = int32 

488 # unsigned int: TypeAlias = uint32 

489 long: TypeAlias = int32 

490 # signed long: TypeAlias = int32 

491 # unsigned long: TypeAlias = uint32 

492 # long long: TypeAlias = int64 

493 # signed long long: TypeAlias = int64 

494 # unsigned long long: TypeAlias = uint64 

495 

496 BYTE: TypeAlias = uint8 

497 CHAR: TypeAlias = char 

498 SHORT: TypeAlias = int16 

499 WORD: TypeAlias = uint16 

500 DWORD: TypeAlias = uint32 

501 LONG: TypeAlias = int32 

502 LONG32: TypeAlias = int32 

503 LONG64: TypeAlias = int64 

504 LONGLONG: TypeAlias = int64 

505 QWORD: TypeAlias = uint64 

506 OWORD: TypeAlias = uint128 

507 WCHAR: TypeAlias = wchar 

508 

509 UCHAR: TypeAlias = uint8 

510 USHORT: TypeAlias = uint16 

511 ULONG: TypeAlias = uint32 

512 ULONG64: TypeAlias = uint64 

513 ULONGLONG: TypeAlias = uint64 

514 

515 INT: TypeAlias = int32 

516 INT8: TypeAlias = int8 

517 INT16: TypeAlias = int16 

518 INT32: TypeAlias = int32 

519 INT64: TypeAlias = int64 

520 INT128: TypeAlias = int128 

521 

522 UINT: TypeAlias = uint32 

523 UINT8: TypeAlias = uint8 

524 UINT16: TypeAlias = uint16 

525 UINT32: TypeAlias = uint32 

526 UINT64: TypeAlias = uint64 

527 UINT128: TypeAlias = uint128 

528 

529 __int8: TypeAlias = int8 

530 __int16: TypeAlias = int16 

531 __int32: TypeAlias = int32 

532 __int64: TypeAlias = int64 

533 __int128: TypeAlias = int128 

534 

535 # unsigned __int8: TypeAlias = uint8 

536 # unsigned __int16: TypeAlias = uint16 

537 # unsigned __int32: TypeAlias = uint32 

538 # unsigned __int64: TypeAlias = uint64 

539 # unsigned __int128: TypeAlias = uint128 

540 

541 wchar_t: TypeAlias = wchar 

542 

543 int8_t: TypeAlias = int8 

544 int16_t: TypeAlias = int16 

545 int32_t: TypeAlias = int32 

546 int64_t: TypeAlias = int64 

547 int128_t: TypeAlias = int128 

548 

549 uint8_t: TypeAlias = uint8 

550 uint16_t: TypeAlias = uint16 

551 uint32_t: TypeAlias = uint32 

552 uint64_t: TypeAlias = uint64 

553 uint128_t: TypeAlias = uint128 

554 

555 _BYTE: TypeAlias = uint8 

556 _WORD: TypeAlias = uint16 

557 _DWORD: TypeAlias = uint32 

558 _QWORD: TypeAlias = uint64 

559 _OWORD: TypeAlias = uint128 

560 

561 u1: TypeAlias = uint8 

562 u2: TypeAlias = uint16 

563 u4: TypeAlias = uint32 

564 u8: TypeAlias = uint64 

565 u16: TypeAlias = uint128 

566 __u8: TypeAlias = uint8 

567 __u16: TypeAlias = uint16 

568 __u32: TypeAlias = uint32 

569 __u64: TypeAlias = uint64 

570 uchar: TypeAlias = uint8 

571 ushort: TypeAlias = uint16 

572 uint: TypeAlias = uint32 

573 ulong: TypeAlias = uint32 

574 

575 

576def ctypes(structure: type[Structure]) -> type[_ctypes.Structure]: 

577 """Create ctypes structures from cstruct structures.""" 

578 fields = [] 

579 for field in structure.__fields__: 

580 t = ctypes_type(field.type) 

581 fields.append((field._name, t)) 

582 

583 return type(structure.__name__, (_ctypes.Structure,), {"_fields_": fields}) 

584 

585 

586def ctypes_type(type_: type[BaseType]) -> Any: 

587 mapping = { 

588 "b": _ctypes.c_int8, 

589 "B": _ctypes.c_uint8, 

590 "h": _ctypes.c_int16, 

591 "H": _ctypes.c_uint16, 

592 "i": _ctypes.c_int32, 

593 "I": _ctypes.c_uint32, 

594 "q": _ctypes.c_int64, 

595 "Q": _ctypes.c_uint64, 

596 "f": _ctypes.c_float, 

597 "d": _ctypes.c_double, 

598 } 

599 

600 if issubclass(type_, Packed) and type_.packchar in mapping: 

601 return mapping[type_.packchar] 

602 

603 if issubclass(type_, Char): 

604 return _ctypes.c_char 

605 

606 if issubclass(type_, Wchar): 

607 return _ctypes.c_wchar 

608 

609 if issubclass(type_, BaseArray): 

610 subtype = ctypes_type(type_.type) 

611 return subtype * type_.num_entries 

612 

613 if issubclass(type_, Pointer): 

614 subtype = ctypes_type(type_.type) 

615 return _ctypes.POINTER(subtype) 

616 

617 if issubclass(type_, Structure): 

618 return ctypes(type_) 

619 

620 raise NotImplementedError(f"Type not implemented: {type_.__class__.__name__}")