Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/pdfinterp.py: 85%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

739 statements  

1import logging 

2import re 

3from collections.abc import Mapping, Sequence 

4from io import BytesIO 

5from typing import Union, cast 

6 

7from pdfminer import settings 

8from pdfminer.casting import safe_cmyk, safe_float, safe_int, safe_matrix, safe_rgb 

9from pdfminer.cmapdb import CMap, CMapBase, CMapDB 

10from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace 

11from pdfminer.pdfdevice import PDFDevice, PDFTextSeq 

12from pdfminer.pdfexceptions import PDFException, PDFValueError 

13from pdfminer.pdffont import ( 

14 PDFCIDFont, 

15 PDFFont, 

16 PDFFontError, 

17 PDFTrueTypeFont, 

18 PDFType1Font, 

19 PDFType3Font, 

20) 

21from pdfminer.pdfpage import PDFPage 

22from pdfminer.pdftypes import ( 

23 LITERALS_ASCII85_DECODE, 

24 PDFObjRef, 

25 PDFStream, 

26 dict_value, 

27 list_value, 

28 resolve1, 

29 stream_value, 

30) 

31from pdfminer.psexceptions import PSEOF, PSTypeError 

32from pdfminer.psparser import ( 

33 KWD, 

34 LIT, 

35 PSKeyword, 

36 PSLiteral, 

37 PSStackParser, 

38 PSStackType, 

39 keyword_name, 

40 literal_name, 

41) 

42from pdfminer.utils import ( 

43 MATRIX_IDENTITY, 

44 Matrix, 

45 PathSegment, 

46 Point, 

47 Rect, 

48 choplist, 

49 mult_matrix, 

50) 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class PDFResourceError(PDFException): 

56 pass 

57 

58 

59class PDFInterpreterError(PDFException): 

60 pass 

61 

62 

63LITERAL_PDF = LIT("PDF") 

64LITERAL_TEXT = LIT("Text") 

65LITERAL_FONT = LIT("Font") 

66LITERAL_FORM = LIT("Form") 

67LITERAL_IMAGE = LIT("Image") 

68 

69 

70class PDFTextState: 

71 matrix: Matrix 

72 linematrix: Point 

73 

74 def __init__(self) -> None: 

75 self.font: PDFFont | None = None 

76 self.fontsize: float = 0 

77 self.charspace: float = 0 

78 self.wordspace: float = 0 

79 self.scaling: float = 100 

80 self.leading: float = 0 

81 self.render: int = 0 

82 self.rise: float = 0 

83 self.reset() 

84 # self.matrix is set 

85 # self.linematrix is set 

86 

87 def __repr__(self) -> str: 

88 return ( 

89 f"<PDFTextState: font={self.font!r}, " 

90 f"fontsize={self.fontsize!r}, " 

91 f"charspace={self.charspace!r}, " 

92 f"wordspace={self.wordspace!r}, " 

93 f"scaling={self.scaling!r}, " 

94 f"leading={self.leading!r}, " 

95 f"render={self.render!r}, " 

96 f"rise={self.rise!r}, " 

97 f"matrix={self.matrix!r}, " 

98 f"linematrix={self.linematrix!r}>" 

99 ) 

100 

101 def copy(self) -> "PDFTextState": 

102 obj = PDFTextState() 

103 obj.font = self.font 

104 obj.fontsize = self.fontsize 

105 obj.charspace = self.charspace 

106 obj.wordspace = self.wordspace 

107 obj.scaling = self.scaling 

108 obj.leading = self.leading 

109 obj.render = self.render 

110 obj.rise = self.rise 

111 obj.matrix = self.matrix 

112 obj.linematrix = self.linematrix 

113 return obj 

114 

115 def reset(self) -> None: 

116 self.matrix = MATRIX_IDENTITY 

117 self.linematrix = (0, 0) 

118 

119 

120# Standard color types (used standalone or as base for uncolored patterns) 

121StandardColor = Union[ 

122 float, # Greyscale 

123 tuple[float, float, float], # R, G, B 

124 tuple[float, float, float, float], # C, M, Y, K 

125] 

126 

127# Complete color type including patterns 

128Color = Union[ 

129 StandardColor, # Standard colors (gray, RGB, CMYK) 

130 str, # Pattern name (colored pattern, PaintType=1) 

131 tuple[ 

132 StandardColor, str 

133 ], # (base_color, pattern_name) (uncolored pattern, PaintType=2) 

134] 

135 

136 

137class PDFGraphicState: 

138 def __init__(self) -> None: 

139 self.linewidth: float = 0 

140 self.linecap: object | None = None 

141 self.linejoin: object | None = None 

142 self.miterlimit: object | None = None 

143 self.dash: tuple[object, object] | None = None 

144 self.intent: object | None = None 

145 self.flatness: object | None = None 

146 

147 # stroking color 

148 self.scolor: Color = 0 

149 self.scs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

150 

151 # non stroking color 

152 self.ncolor: Color = 0 

153 self.ncs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

154 

155 def copy(self) -> "PDFGraphicState": 

156 obj = PDFGraphicState() 

157 obj.linewidth = self.linewidth 

158 obj.linecap = self.linecap 

159 obj.linejoin = self.linejoin 

160 obj.miterlimit = self.miterlimit 

161 obj.dash = self.dash 

162 obj.intent = self.intent 

163 obj.flatness = self.flatness 

164 obj.scolor = self.scolor 

165 obj.scs = self.scs 

166 obj.ncolor = self.ncolor 

167 obj.ncs = self.ncs 

168 return obj 

169 

170 def __repr__(self) -> str: 

171 return ( 

172 f"<PDFGraphicState: " 

173 f"linewidth={self.linewidth!r}, " 

174 f"linecap={self.linecap!r}, " 

175 f"linejoin={self.linejoin!r}, " 

176 f"miterlimit={self.miterlimit!r}, " 

177 f"dash={self.dash!r}, " 

178 f"intent={self.intent!r}, " 

179 f"flatness={self.flatness!r}, " 

180 f"stroking color={self.scolor!r}, " 

181 f"non stroking color={self.ncolor!r}>" 

182 ) 

183 

184 

185class PDFResourceManager: 

186 """Repository of shared resources. 

187 

188 ResourceManager facilitates reuse of shared resources 

189 such as fonts and images so that large objects are not 

190 allocated multiple times. 

191 """ 

192 

193 def __init__(self, caching: bool = True) -> None: 

194 self.caching = caching 

195 self._cached_fonts: dict[object, PDFFont] = {} 

196 

197 def get_procset(self, procs: Sequence[object]) -> None: 

198 for proc in procs: 

199 if proc is LITERAL_PDF or proc is LITERAL_TEXT: 

200 pass 

201 else: 

202 pass 

203 

204 def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: 

205 try: 

206 return CMapDB.get_cmap(cmapname) 

207 except CMapDB.CMapNotFound: 

208 if strict: 

209 raise 

210 return CMap() 

211 

212 def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: 

213 if objid and objid in self._cached_fonts: 

214 font = self._cached_fonts[objid] 

215 else: 

216 log.debug("get_font: create: objid=%r, spec=%r", objid, spec) 

217 if settings.STRICT and spec["Type"] is not LITERAL_FONT: 

218 raise PDFFontError("Type is not /Font") 

219 # Create a Font object. 

220 if "Subtype" in spec: 

221 subtype = literal_name(spec["Subtype"]) 

222 else: 

223 if settings.STRICT: 

224 raise PDFFontError("Font Subtype is not specified.") 

225 subtype = "Type1" 

226 if subtype in ("Type1", "MMType1"): 

227 # Type1 Font 

228 font = PDFType1Font(self, spec) 

229 elif subtype == "TrueType": 

230 # TrueType Font 

231 font = PDFTrueTypeFont(self, spec) 

232 elif subtype == "Type3": 

233 # Type3 Font 

234 font = PDFType3Font(self, spec) 

235 elif subtype in ("CIDFontType0", "CIDFontType2"): 

236 # CID Font 

237 font = PDFCIDFont(self, spec) 

238 elif subtype == "Type0": 

239 # Type0 Font 

240 dfonts = list_value(spec["DescendantFonts"]) 

241 assert dfonts 

242 subspec = dict_value(dfonts[0]).copy() 

243 for k in ("Encoding", "ToUnicode"): 

244 if k in spec: 

245 subspec[k] = resolve1(spec[k]) 

246 font = self.get_font(None, subspec) 

247 else: 

248 if settings.STRICT: 

249 raise PDFFontError(f"Invalid Font spec: {spec!r}") 

250 font = PDFType1Font(self, spec) # this is so wrong! 

251 if objid and self.caching: 

252 self._cached_fonts[objid] = font 

253 return font 

254 

255 

256class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): 

257 def __init__(self, streams: Sequence[object]) -> None: 

258 self.streams = streams 

259 self.istream = 0 

260 # PSStackParser.__init__(fp=None) is safe only because we've overloaded 

261 # all the methods that would attempt to access self.fp without first 

262 # calling self.fillfp(). 

263 PSStackParser.__init__(self, None) # type: ignore[arg-type] 

264 

265 def fillfp(self) -> bool: 

266 if not self.fp: 

267 if self.istream < len(self.streams): 

268 strm = stream_value(self.streams[self.istream]) 

269 self.istream += 1 

270 else: 

271 raise PSEOF("Unexpected EOF, file truncated?") 

272 self.fp = BytesIO(strm.get_data()) 

273 return True 

274 return False 

275 

276 def seek(self, pos: int) -> None: 

277 self.fillfp() 

278 PSStackParser.seek(self, pos) 

279 

280 def fillbuf(self) -> bool: 

281 if self.charpos < len(self.buf): 

282 return False 

283 new_stream = False 

284 while 1: 

285 new_stream = self.fillfp() 

286 self.bufpos = self.fp.tell() 

287 self.buf = self.fp.read(self.BUFSIZ) 

288 if self.buf: 

289 break 

290 self.fp = None # type: ignore[assignment] 

291 self.charpos = 0 

292 return new_stream 

293 

294 def get_inline_data(self, pos: int, target: bytes = b"EI") -> tuple[int, bytes]: 

295 self.seek(pos) 

296 i = 0 

297 data = b"" 

298 while i <= len(target): 

299 self.fillbuf() 

300 if i: 

301 ci = self.buf[self.charpos] 

302 c = bytes((ci,)) 

303 data += c 

304 self.charpos += 1 

305 if (len(target) <= i and c.isspace()) or ( 

306 i < len(target) and c == (bytes((target[i],))) 

307 ): 

308 i += 1 

309 else: 

310 i = 0 

311 else: 

312 try: 

313 j = self.buf.index(target[0], self.charpos) 

314 data += self.buf[self.charpos : j + 1] 

315 self.charpos = j + 1 

316 i = 1 

317 except ValueError: 

318 data += self.buf[self.charpos :] 

319 self.charpos = len(self.buf) 

320 data = data[: -(len(target) + 1)] # strip the last part 

321 data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data) 

322 return (pos, data) 

323 

324 def flush(self) -> None: 

325 self.add_results(*self.popall()) 

326 

327 KEYWORD_BI = KWD(b"BI") 

328 KEYWORD_ID = KWD(b"ID") 

329 KEYWORD_EI = KWD(b"EI") 

330 

331 def do_keyword(self, pos: int, token: PSKeyword) -> None: 

332 if token is self.KEYWORD_BI: 

333 # inline image within a content stream 

334 self.start_type(pos, "inline") 

335 elif token is self.KEYWORD_ID: 

336 try: 

337 (_, objs) = self.end_type("inline") 

338 if len(objs) % 2 != 0: 

339 error_msg = f"Invalid dictionary construct: {objs!r}" 

340 raise PSTypeError(error_msg) 

341 d = {literal_name(k): resolve1(v) for (k, v) in choplist(2, objs)} 

342 eos = b"EI" 

343 filter = d.get("F") 

344 if filter is not None: 

345 if isinstance(filter, PSLiteral): 

346 filter = [filter] 

347 if filter[0] in LITERALS_ASCII85_DECODE: 

348 eos = b"~>" 

349 (pos, data) = self.get_inline_data(pos + len(b"ID "), target=eos) 

350 if eos != b"EI": # it may be necessary for decoding 

351 data += eos 

352 obj = PDFStream(d, data) 

353 self.push((pos, obj)) 

354 if eos == b"EI": # otherwise it is still in the stream 

355 self.push((pos, self.KEYWORD_EI)) 

356 except PSTypeError: 

357 if settings.STRICT: 

358 raise 

359 else: 

360 self.push((pos, token)) 

361 

362 

363# Types that may appear on the PDF argument stack. 

364PDFStackT = PSStackType[PDFStream] 

365 

366 

367class PDFPageInterpreter: 

368 """Processor for the content of a PDF page 

369 

370 Reference: PDF Reference, Appendix A, Operator Summary 

371 """ 

372 

373 def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None: 

374 self.rsrcmgr = rsrcmgr 

375 self.device = device 

376 # Track stream IDs currently being executed to detect circular references 

377 self.stream_ids: set[int] = set() 

378 # Track stream IDs from parent interpreters in the call stack 

379 self.parent_stream_ids: set[int] = set() 

380 

381 def dup(self) -> "PDFPageInterpreter": 

382 return self.__class__(self.rsrcmgr, self.device) 

383 

384 def subinterp(self) -> "PDFPageInterpreter": 

385 """Create a sub-interpreter for processing nested content streams. 

386 

387 This is used when invoking Form XObjects to prevent circular references. 

388 Unlike dup(), this method propagates the stream ID tracking from the 

389 parent interpreter, allowing detection of circular references across 

390 nested XObject invocations. 

391 """ 

392 interp = self.dup() 

393 interp.parent_stream_ids.update(self.parent_stream_ids) 

394 interp.parent_stream_ids.update(self.stream_ids) 

395 return interp 

396 

397 def init_resources(self, resources: dict[object, object]) -> None: 

398 """Prepare the fonts and XObjects listed in the Resource attribute.""" 

399 self.resources = resources 

400 self.fontmap: dict[object, PDFFont] = {} 

401 self.xobjmap = {} 

402 self.csmap: dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() 

403 if not resources: 

404 return 

405 

406 def get_colorspace(spec: object) -> PDFColorSpace | None: 

407 if isinstance(spec, list): 

408 name = literal_name(spec[0]) 

409 else: 

410 name = literal_name(spec) 

411 if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2: 

412 return PDFColorSpace(name, stream_value(spec[1])["N"]) 

413 elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2: 

414 return PDFColorSpace(name, len(list_value(spec[1]))) 

415 else: 

416 return PREDEFINED_COLORSPACE.get(name) 

417 

418 for k, v in dict_value(resources).items(): 

419 log.debug("Resource: %r: %r", k, v) 

420 if k == "Font": 

421 for fontid, spec in dict_value(v).items(): 

422 objid = None 

423 if isinstance(spec, PDFObjRef): 

424 objid = spec.objid 

425 spec = dict_value(spec) 

426 self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) 

427 elif k == "ColorSpace": 

428 for csid, spec in dict_value(v).items(): 

429 colorspace = get_colorspace(resolve1(spec)) 

430 if colorspace is not None: 

431 self.csmap[csid] = colorspace 

432 elif k == "ProcSet": 

433 self.rsrcmgr.get_procset(list_value(v)) 

434 elif k == "XObject": 

435 for xobjid, xobjstrm in dict_value(v).items(): 

436 self.xobjmap[xobjid] = xobjstrm 

437 

438 def init_state(self, ctm: Matrix) -> None: 

439 """Initialize the text and graphic states for rendering a page.""" 

440 # gstack: stack for graphical states. 

441 self.gstack: list[tuple[Matrix, PDFTextState, PDFGraphicState]] = [] 

442 self.ctm = ctm 

443 self.device.set_ctm(self.ctm) 

444 self.textstate = PDFTextState() 

445 self.graphicstate = PDFGraphicState() 

446 self.curpath: list[PathSegment] = [] 

447 # argstack: stack for command arguments. 

448 self.argstack: list[PDFStackT] = [] 

449 

450 def push(self, obj: PDFStackT) -> None: 

451 self.argstack.append(obj) 

452 

453 def pop(self, n: int) -> list[PDFStackT]: 

454 if n == 0: 

455 return [] 

456 x = self.argstack[-n:] 

457 self.argstack = self.argstack[:-n] 

458 return x 

459 

460 def get_current_state(self) -> tuple[Matrix, PDFTextState, PDFGraphicState]: 

461 return (self.ctm, self.textstate.copy(), self.graphicstate.copy()) 

462 

463 def set_current_state( 

464 self, 

465 state: tuple[Matrix, PDFTextState, PDFGraphicState], 

466 ) -> None: 

467 (self.ctm, self.textstate, self.graphicstate) = state 

468 self.device.set_ctm(self.ctm) 

469 

470 def do_q(self) -> None: 

471 """Save graphics state""" 

472 self.gstack.append(self.get_current_state()) 

473 

474 def do_Q(self) -> None: 

475 """Restore graphics state""" 

476 if self.gstack: 

477 self.set_current_state(self.gstack.pop()) 

478 

479 def do_cm( 

480 self, 

481 a1: PDFStackT, 

482 b1: PDFStackT, 

483 c1: PDFStackT, 

484 d1: PDFStackT, 

485 e1: PDFStackT, 

486 f1: PDFStackT, 

487 ) -> None: 

488 """Concatenate matrix to current transformation matrix""" 

489 matrix = safe_matrix(a1, b1, c1, d1, e1, f1) 

490 

491 if matrix is None: 

492 log.warning( 

493 "Cannot concatenate matrix to current transformation matrix " 

494 f"because not all values in {(a1, b1, c1, d1, e1, f1)!r} " 

495 "can be parsed as floats" 

496 ) 

497 else: 

498 self.ctm = mult_matrix(matrix, self.ctm) 

499 self.device.set_ctm(self.ctm) 

500 

501 def do_w(self, linewidth: PDFStackT) -> None: 

502 """Set line width""" 

503 linewidth_f = safe_float(linewidth) 

504 if linewidth_f is None: 

505 log.warning( 

506 f"Cannot set line width because {linewidth!r} is an invalid float value" 

507 ) 

508 else: 

509 scale = (self.ctm[0] ** 2 + self.ctm[1] ** 2) ** 0.5 

510 self.graphicstate.linewidth = linewidth_f * scale 

511 

512 def do_J(self, linecap: PDFStackT) -> None: 

513 """Set line cap style""" 

514 self.graphicstate.linecap = linecap 

515 

516 def do_j(self, linejoin: PDFStackT) -> None: 

517 """Set line join style""" 

518 self.graphicstate.linejoin = linejoin 

519 

520 def do_M(self, miterlimit: PDFStackT) -> None: 

521 """Set miter limit""" 

522 self.graphicstate.miterlimit = miterlimit 

523 

524 def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: 

525 """Set line dash pattern""" 

526 self.graphicstate.dash = (dash, phase) 

527 

528 def do_ri(self, intent: PDFStackT) -> None: 

529 """Set color rendering intent""" 

530 self.graphicstate.intent = intent 

531 

532 def do_i(self, flatness: PDFStackT) -> None: 

533 """Set flatness tolerance""" 

534 self.graphicstate.flatness = flatness 

535 

536 def do_gs(self, name: PDFStackT) -> None: 

537 """Set parameters from graphics state parameter dictionary""" 

538 # TODO 

539 

540 def do_m(self, x: PDFStackT, y: PDFStackT) -> None: 

541 """Begin new subpath""" 

542 x_f = safe_float(x) 

543 y_f = safe_float(y) 

544 

545 if x_f is None or y_f is None: 

546 point = ("m", x, y) 

547 log.warning( 

548 "Cannot start new subpath because not all values " 

549 f"in {point!r} can be parsed as floats" 

550 ) 

551 else: 

552 point = ("m", x_f, y_f) 

553 self.curpath.append(point) 

554 

555 def do_l(self, x: PDFStackT, y: PDFStackT) -> None: 

556 """Append straight line segment to path""" 

557 x_f = safe_float(x) 

558 y_f = safe_float(y) 

559 if x_f is None or y_f is None: 

560 point = ("l", x, y) 

561 log.warning( 

562 "Cannot append straight line segment to path " 

563 f"because not all values in {point!r} can be parsed as floats" 

564 ) 

565 else: 

566 point = ("l", x_f, y_f) 

567 self.curpath.append(point) 

568 

569 def do_c( 

570 self, 

571 x1: PDFStackT, 

572 y1: PDFStackT, 

573 x2: PDFStackT, 

574 y2: PDFStackT, 

575 x3: PDFStackT, 

576 y3: PDFStackT, 

577 ) -> None: 

578 """Append curved segment to path (three control points)""" 

579 x1_f = safe_float(x1) 

580 y1_f = safe_float(y1) 

581 x2_f = safe_float(x2) 

582 y2_f = safe_float(y2) 

583 x3_f = safe_float(x3) 

584 y3_f = safe_float(y3) 

585 if ( 

586 x1_f is None 

587 or y1_f is None 

588 or x2_f is None 

589 or y2_f is None 

590 or x3_f is None 

591 or y3_f is None 

592 ): 

593 point = ("c", x1, y1, x2, y2, x3, y3) 

594 log.warning( 

595 "Cannot append curved segment to path " 

596 f"because not all values in {point!r} can be parsed as floats" 

597 ) 

598 else: 

599 point = ("c", x1_f, y1_f, x2_f, y2_f, x3_f, y3_f) 

600 self.curpath.append(point) 

601 

602 def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

603 """Append curved segment to path (initial point replicated)""" 

604 x2_f = safe_float(x2) 

605 y2_f = safe_float(y2) 

606 x3_f = safe_float(x3) 

607 y3_f = safe_float(y3) 

608 if x2_f is None or y2_f is None or x3_f is None or y3_f is None: 

609 point = ("v", x2, y2, x3, y3) 

610 log.warning( 

611 "Cannot append curved segment to path " 

612 f"because not all values in {point!r} can be parsed as floats" 

613 ) 

614 else: 

615 point = ("v", x2_f, y2_f, x3_f, y3_f) 

616 self.curpath.append(point) 

617 

618 def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

619 """Append curved segment to path (final point replicated)""" 

620 x1_f = safe_float(x1) 

621 y1_f = safe_float(y1) 

622 x3_f = safe_float(x3) 

623 y3_f = safe_float(y3) 

624 if x1_f is None or y1_f is None or x3_f is None or y3_f is None: 

625 point = ("y", x1, y1, x3, y3) 

626 log.warning( 

627 "Cannot append curved segment to path " 

628 f"because not all values in {point!r} can be parsed as floats" 

629 ) 

630 else: 

631 point = ("y", x1_f, y1_f, x3_f, y3_f) 

632 self.curpath.append(point) 

633 

634 def do_h(self) -> None: 

635 """Close subpath""" 

636 self.curpath.append(("h",)) 

637 

638 def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None: 

639 """Append rectangle to path""" 

640 x_f = safe_float(x) 

641 y_f = safe_float(y) 

642 w_f = safe_float(w) 

643 h_f = safe_float(h) 

644 

645 if x_f is None or y_f is None or w_f is None or h_f is None: 

646 values = (x, y, w, h) 

647 log.warning( 

648 "Cannot append rectangle to path " 

649 f"because not all values in {values!r} can be parsed as floats" 

650 ) 

651 else: 

652 self.curpath.append(("m", x_f, y_f)) 

653 self.curpath.append(("l", x_f + w_f, y_f)) 

654 self.curpath.append(("l", x_f + w_f, y_f + h_f)) 

655 self.curpath.append(("l", x_f, y_f + h_f)) 

656 self.curpath.append(("h",)) 

657 

658 def do_S(self) -> None: 

659 """Stroke path""" 

660 self.device.paint_path(self.graphicstate, True, False, False, self.curpath) 

661 self.curpath = [] 

662 

663 def do_s(self) -> None: 

664 """Close and stroke path""" 

665 self.do_h() 

666 self.do_S() 

667 

668 def do_f(self) -> None: 

669 """Fill path using nonzero winding number rule""" 

670 self.device.paint_path(self.graphicstate, False, True, False, self.curpath) 

671 self.curpath = [] 

672 

673 def do_F(self) -> None: 

674 """Fill path using nonzero winding number rule (obsolete)""" 

675 

676 def do_f_a(self) -> None: 

677 """Fill path using even-odd rule""" 

678 self.device.paint_path(self.graphicstate, False, True, True, self.curpath) 

679 self.curpath = [] 

680 

681 def do_B(self) -> None: 

682 """Fill and stroke path using nonzero winding number rule""" 

683 self.device.paint_path(self.graphicstate, True, True, False, self.curpath) 

684 self.curpath = [] 

685 

686 def do_B_a(self) -> None: 

687 """Fill and stroke path using even-odd rule""" 

688 self.device.paint_path(self.graphicstate, True, True, True, self.curpath) 

689 self.curpath = [] 

690 

691 def do_b(self) -> None: 

692 """Close, fill, and stroke path using nonzero winding number rule""" 

693 self.do_h() 

694 self.do_B() 

695 

696 def do_b_a(self) -> None: 

697 """Close, fill, and stroke path using even-odd rule""" 

698 self.do_h() 

699 self.do_B_a() 

700 

701 def do_n(self) -> None: 

702 """End path without filling or stroking""" 

703 self.curpath = [] 

704 

705 def do_W(self) -> None: 

706 """Set clipping path using nonzero winding number rule""" 

707 

708 def do_W_a(self) -> None: 

709 """Set clipping path using even-odd rule""" 

710 

711 def do_CS(self, name: PDFStackT) -> None: 

712 """Set color space for stroking operations 

713 

714 Introduced in PDF 1.1 

715 """ 

716 try: 

717 self.graphicstate.scs = self.csmap[literal_name(name)] 

718 except KeyError as err: 

719 if settings.STRICT: 

720 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

721 

722 def do_cs(self, name: PDFStackT) -> None: 

723 """Set color space for nonstroking operations""" 

724 try: 

725 self.graphicstate.ncs = self.csmap[literal_name(name)] 

726 except KeyError as err: 

727 if settings.STRICT: 

728 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

729 

730 def do_G(self, gray: PDFStackT) -> None: 

731 """Set gray level for stroking operations""" 

732 gray_f = safe_float(gray) 

733 

734 if gray_f is None: 

735 log.warning( 

736 f"Cannot set gray level because {gray!r} is an invalid float value" 

737 ) 

738 else: 

739 self.graphicstate.scolor = gray_f 

740 self.graphicstate.scs = self.csmap["DeviceGray"] 

741 

742 def do_g(self, gray: PDFStackT) -> None: 

743 """Set gray level for nonstroking operations""" 

744 gray_f = safe_float(gray) 

745 

746 if gray_f is None: 

747 log.warning( 

748 f"Cannot set gray level because {gray!r} is an invalid float value" 

749 ) 

750 else: 

751 self.graphicstate.ncolor = gray_f 

752 self.graphicstate.ncs = self.csmap["DeviceGray"] 

753 

754 def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

755 """Set RGB color for stroking operations""" 

756 rgb = safe_rgb(r, g, b) 

757 

758 if rgb is None: 

759 log.warning( 

760 "Cannot set RGB stroke color " 

761 f"because not all values in {(r, g, b)!r} can be parsed as floats" 

762 ) 

763 else: 

764 self.graphicstate.scolor = rgb 

765 self.graphicstate.scs = self.csmap["DeviceRGB"] 

766 

767 def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

768 """Set RGB color for nonstroking operations""" 

769 rgb = safe_rgb(r, g, b) 

770 

771 if rgb is None: 

772 log.warning( 

773 "Cannot set RGB non-stroke color " 

774 f"because not all values in {(r, g, b)!r} can be parsed as floats" 

775 ) 

776 else: 

777 self.graphicstate.ncolor = rgb 

778 self.graphicstate.ncs = self.csmap["DeviceRGB"] 

779 

780 def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

781 """Set CMYK color for stroking operations""" 

782 cmyk = safe_cmyk(c, m, y, k) 

783 

784 if cmyk is None: 

785 log.warning( 

786 "Cannot set CMYK stroke color " 

787 f"because not all values in {(c, m, y, k)!r} can be parsed as floats" 

788 ) 

789 else: 

790 self.graphicstate.scolor = cmyk 

791 self.graphicstate.scs = self.csmap["DeviceCMYK"] 

792 

793 def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

794 """Set CMYK color for nonstroking operations""" 

795 cmyk = safe_cmyk(c, m, y, k) 

796 

797 if cmyk is None: 

798 log.warning( 

799 "Cannot set CMYK non-stroke color " 

800 f"because not all values in {(c, m, y, k)!r} can be parsed as floats" 

801 ) 

802 else: 

803 self.graphicstate.ncolor = cmyk 

804 self.graphicstate.ncs = self.csmap["DeviceCMYK"] 

805 

806 def _parse_color_components( 

807 self, components: list[PDFStackT], context: str 

808 ) -> StandardColor | None: 

809 """Parse color components into StandardColor (gray, RGB, or CMYK). 

810 

811 Args: 

812 components: List of 1, 3, or 4 numeric color components 

813 context: Description for error messages (e.g., "stroke", "non-stroke") 

814 

815 Returns: 

816 Parsed color (float for gray, tuple for RGB/CMYK) or None if invalid 

817 """ 

818 if len(components) == 1: 

819 gray = safe_float(components[0]) 

820 if gray is None: 

821 log.warning( 

822 f"Cannot set {context} color: " 

823 f"{components[0]!r} is an invalid float value" 

824 ) 

825 return gray 

826 

827 elif len(components) == 3: 

828 rgb = safe_rgb(*components) 

829 if rgb is None: 

830 log.warning( 

831 f"Cannot set {context} color: " 

832 f"components {components!r} cannot be parsed as RGB" 

833 ) 

834 return rgb 

835 

836 elif len(components) == 4: 

837 cmyk = safe_cmyk(*components) 

838 if cmyk is None: 

839 log.warning( 

840 f"Cannot set {context} color: " 

841 f"components {components!r} cannot be parsed as CMYK" 

842 ) 

843 return cmyk 

844 

845 else: 

846 log.warning( 

847 f"Cannot set {context} color: " 

848 f"{len(components)} components specified, " 

849 "but only 1 (grayscale), 3 (RGB), and 4 (CMYK) are supported" 

850 ) 

851 return None 

852 

853 def do_SCN(self) -> None: 

854 """Set color for stroking operations. 

855 

856 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

857 and ISO 32000-2:2020 8.7.3 (PDF 2.0): 

858 - Colored patterns (PaintType=1): single operand (pattern name) 

859 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

860 """ 

861 n = self.graphicstate.scs.ncomponents 

862 

863 components = self.pop(n) 

864 if len(components) != n: 

865 log.warning( 

866 "Cannot set stroke color because " 

867 f"expected {n} components but got {components!r}" 

868 ) 

869 

870 elif self.graphicstate.scs.name != "Pattern": 

871 # Standard colors (gray, RGB, CMYK) - common case 

872 color = self._parse_color_components(components, "stroke") 

873 if color is not None: 

874 self.graphicstate.scolor = color 

875 

876 elif len(components) >= 1: 

877 # Pattern color space (ISO 32000 8.7.3.2-3) 

878 # Last component is always the pattern name 

879 pattern_component = components[-1] 

880 

881 # Per spec: pattern name must be a name object (PSLiteral) 

882 if not isinstance(pattern_component, PSLiteral): 

883 log.warning( 

884 f"Pattern color space requires name object (PSLiteral), " 

885 f"got {type(pattern_component).__name__}: {pattern_component!r}. " 

886 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name SCN'. " 

887 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

888 "syntax 'c1...cn /name SCN'." 

889 ) 

890 return 

891 

892 pattern_name = literal_name(pattern_component) 

893 

894 if len(components) == 1: 

895 # Colored tiling pattern (PaintType=1): just pattern name 

896 self.graphicstate.scolor = pattern_name 

897 log.debug(f"Set stroke pattern (colored): {pattern_name}") 

898 else: 

899 # Uncolored tiling pattern (PaintType=2): 

900 # color components + pattern name 

901 base_color_components = components[:-1] 

902 

903 # Parse base color using shared logic 

904 base_color = self._parse_color_components( 

905 base_color_components, "stroke (uncolored pattern)" 

906 ) 

907 if base_color is None: 

908 return 

909 

910 # Store as tuple: (base_color, pattern_name) 

911 self.graphicstate.scolor = (base_color, pattern_name) 

912 log.debug( 

913 f"Set stroke pattern (uncolored): {base_color} + {pattern_name}" 

914 ) 

915 

916 def do_scn(self) -> None: 

917 """Set color for nonstroking operations. 

918 

919 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

920 and ISO 32000-2:2020 §8.7.3 (PDF 2.0): 

921 - Colored patterns (PaintType=1): single operand (pattern name) 

922 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

923 """ 

924 n = self.graphicstate.ncs.ncomponents 

925 

926 components = self.pop(n) 

927 if len(components) != n: 

928 log.warning( 

929 "Cannot set non-stroke color because " 

930 f"expected {n} components but got {components!r}" 

931 ) 

932 

933 elif self.graphicstate.ncs.name != "Pattern": 

934 # Standard colors (gray, RGB, CMYK) - common case 

935 color = self._parse_color_components(components, "non-stroke") 

936 if color is not None: 

937 self.graphicstate.ncolor = color 

938 

939 elif len(components) >= 1: 

940 # Pattern color space (ISO 32000 8.7.3.2-3) 

941 # Last component is always the pattern name 

942 pattern_component = components[-1] 

943 

944 # Per spec: pattern name must be a name object (PSLiteral) 

945 if not isinstance(pattern_component, PSLiteral): 

946 log.warning( 

947 f"Pattern color space requires name object (PSLiteral), " 

948 f"got {type(pattern_component).__name__}: {pattern_component!r}. " 

949 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name scn'. " 

950 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

951 "syntax 'c1...cn /name scn'." 

952 ) 

953 return 

954 

955 pattern_name = literal_name(pattern_component) 

956 

957 if len(components) == 1: 

958 # Colored tiling pattern (PaintType=1): just pattern name 

959 self.graphicstate.ncolor = pattern_name 

960 log.debug(f"Set non-stroke pattern (colored): {pattern_name}") 

961 else: 

962 # Uncolored tiling pattern (PaintType=2): 

963 # color components + pattern name 

964 base_color_components = components[:-1] 

965 

966 # Parse base color using shared logic 

967 base_color = self._parse_color_components( 

968 base_color_components, "non-stroke (uncolored pattern)" 

969 ) 

970 if base_color is None: 

971 return 

972 

973 # Store as tuple: (base_color, pattern_name) 

974 self.graphicstate.ncolor = (base_color, pattern_name) 

975 log.debug( 

976 f"Set non-stroke pattern (uncolored): {base_color} + {pattern_name}" 

977 ) 

978 

979 def do_SC(self) -> None: 

980 """Set color for stroking operations""" 

981 self.do_SCN() 

982 

983 def do_sc(self) -> None: 

984 """Set color for nonstroking operations""" 

985 self.do_scn() 

986 

987 def do_sh(self, name: object) -> None: 

988 """Paint area defined by shading pattern""" 

989 

990 def do_BT(self) -> None: 

991 """Begin text object 

992 

993 Initializing the text matrix, Tm, and the text line matrix, Tlm, to 

994 the identity matrix. Text objects cannot be nested; a second BT cannot 

995 appear before an ET. 

996 """ 

997 self.textstate.reset() 

998 

999 def do_ET(self) -> None: 

1000 """End a text object""" 

1001 

1002 def do_BX(self) -> None: 

1003 """Begin compatibility section""" 

1004 

1005 def do_EX(self) -> None: 

1006 """End compatibility section""" 

1007 

1008 def do_MP(self, tag: PDFStackT) -> None: 

1009 """Define marked-content point""" 

1010 if isinstance(tag, PSLiteral): 

1011 self.device.do_tag(tag) 

1012 else: 

1013 log.warning( 

1014 f"Cannot define marked-content point because {tag!r} is not a PSLiteral" 

1015 ) 

1016 

1017 def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: 

1018 """Define marked-content point with property list""" 

1019 if isinstance(tag, PSLiteral): 

1020 self.device.do_tag(tag, props) 

1021 else: 

1022 log.warning( 

1023 "Cannot define marked-content point with property list " 

1024 f"because {tag!r} is not a PSLiteral" 

1025 ) 

1026 

1027 def do_BMC(self, tag: PDFStackT) -> None: 

1028 """Begin marked-content sequence""" 

1029 if isinstance(tag, PSLiteral): 

1030 self.device.begin_tag(tag) 

1031 else: 

1032 log.warning( 

1033 "Cannot begin marked-content sequence because " 

1034 f"{tag!r} is not a PSLiteral" 

1035 ) 

1036 

1037 def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: 

1038 """Begin marked-content sequence with property list""" 

1039 if isinstance(tag, PSLiteral): 

1040 self.device.begin_tag(tag, props) 

1041 else: 

1042 log.warning( 

1043 f"Cannot begin marked-content sequence with property list " 

1044 f"because {tag!r} is not a PSLiteral" 

1045 ) 

1046 

1047 def do_EMC(self) -> None: 

1048 """End marked-content sequence""" 

1049 self.device.end_tag() 

1050 

1051 def do_Tc(self, space: PDFStackT) -> None: 

1052 """Set character spacing. 

1053 

1054 Character spacing is used by the Tj, TJ, and ' operators. 

1055 

1056 :param space: a number expressed in unscaled text space units. 

1057 """ 

1058 charspace = safe_float(space) 

1059 if charspace is None: 

1060 log.warning( 

1061 "Could not set character spacing because " 

1062 f"{space!r} is an invalid float value" 

1063 ) 

1064 else: 

1065 self.textstate.charspace = charspace 

1066 

1067 def do_Tw(self, space: PDFStackT) -> None: 

1068 """Set the word spacing. 

1069 

1070 Word spacing is used by the Tj, TJ, and ' operators. 

1071 

1072 :param space: a number expressed in unscaled text space units 

1073 """ 

1074 wordspace = safe_float(space) 

1075 if wordspace is None: 

1076 log.warning( 

1077 "Could not set word spacing because " 

1078 f"{space!r} is an invalid float value" 

1079 ) 

1080 else: 

1081 self.textstate.wordspace = wordspace 

1082 

1083 def do_Tz(self, scale: PDFStackT) -> None: 

1084 """Set the horizontal scaling. 

1085 

1086 :param scale: is a number specifying the percentage of the normal width 

1087 """ 

1088 scale_f = safe_float(scale) 

1089 

1090 if scale_f is None: 

1091 log.warning( 

1092 "Could not set horizontal scaling because " 

1093 f"{scale!r} is an invalid float value" 

1094 ) 

1095 else: 

1096 self.textstate.scaling = scale_f 

1097 

1098 def do_TL(self, leading: PDFStackT) -> None: 

1099 """Set the text leading. 

1100 

1101 Text leading is used only by the T*, ', and " operators. 

1102 

1103 :param leading: a number expressed in unscaled text space units 

1104 """ 

1105 leading_f = safe_float(leading) 

1106 if leading_f is None: 

1107 log.warning( 

1108 "Could not set text leading because " 

1109 f"{leading!r} is an invalid float value" 

1110 ) 

1111 else: 

1112 self.textstate.leading = -leading_f 

1113 

1114 def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: 

1115 """Set the text font 

1116 

1117 :param fontid: the name of a font resource in the Font subdictionary 

1118 of the current resource dictionary 

1119 :param fontsize: size is a number representing a scale factor. 

1120 """ 

1121 try: 

1122 self.textstate.font = self.fontmap[literal_name(fontid)] 

1123 except KeyError as err: 

1124 if settings.STRICT: 

1125 raise PDFInterpreterError(f"Undefined Font id: {fontid!r}") from err 

1126 self.textstate.font = self.rsrcmgr.get_font(None, {}) 

1127 

1128 fontsize_f = safe_float(fontsize) 

1129 if fontsize_f is None: 

1130 log.warning( 

1131 f"Could not set text font because " 

1132 f"{fontsize!r} is an invalid float value" 

1133 ) 

1134 else: 

1135 self.textstate.fontsize = fontsize_f 

1136 

1137 def do_Tr(self, render: PDFStackT) -> None: 

1138 """Set the text rendering mode""" 

1139 render_i = safe_int(render) 

1140 

1141 if render_i is None: 

1142 log.warning( 

1143 "Could not set text rendering mode because " 

1144 f"{render!r} is an invalid int value" 

1145 ) 

1146 else: 

1147 self.textstate.render = render_i 

1148 

1149 def do_Ts(self, rise: PDFStackT) -> None: 

1150 """Set the text rise 

1151 

1152 :param rise: a number expressed in unscaled text space units 

1153 """ 

1154 rise_f = safe_float(rise) 

1155 

1156 if rise_f is None: 

1157 log.warning( 

1158 f"Could not set text rise because {rise!r} is an invalid float value" 

1159 ) 

1160 else: 

1161 self.textstate.rise = rise_f 

1162 

1163 def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1164 """Move to the start of the next line 

1165 

1166 Offset from the start of the current line by (tx , ty). 

1167 """ 

1168 tx_ = safe_float(tx) 

1169 ty_ = safe_float(ty) 

1170 if tx_ is not None and ty_ is not None: 

1171 (a, b, c, d, e, f) = self.textstate.matrix 

1172 e_new = tx_ * a + ty_ * c + e 

1173 f_new = tx_ * b + ty_ * d + f 

1174 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1175 

1176 elif settings.STRICT: 

1177 raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td") 

1178 

1179 self.textstate.linematrix = (0, 0) 

1180 

1181 def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1182 """Move to the start of the next line. 

1183 

1184 offset from the start of the current line by (tx , ty). As a side effect, this 

1185 operator sets the leading parameter in the text state. 

1186 """ 

1187 tx_ = safe_float(tx) 

1188 ty_ = safe_float(ty) 

1189 

1190 if tx_ is not None and ty_ is not None: 

1191 (a, b, c, d, e, f) = self.textstate.matrix 

1192 e_new = tx_ * a + ty_ * c + e 

1193 f_new = tx_ * b + ty_ * d + f 

1194 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1195 

1196 elif settings.STRICT: 

1197 raise PDFValueError("Invalid offset ({tx}, {ty}) for TD") 

1198 

1199 if ty_ is not None: 

1200 self.textstate.leading = ty_ 

1201 

1202 self.textstate.linematrix = (0, 0) 

1203 

1204 def do_Tm( 

1205 self, 

1206 a: PDFStackT, 

1207 b: PDFStackT, 

1208 c: PDFStackT, 

1209 d: PDFStackT, 

1210 e: PDFStackT, 

1211 f: PDFStackT, 

1212 ) -> None: 

1213 """Set text matrix and text line matrix""" 

1214 values = (a, b, c, d, e, f) 

1215 matrix = safe_matrix(*values) 

1216 

1217 if matrix is None: 

1218 log.warning( 

1219 f"Could not set text matrix because " 

1220 f"not all values in {values!r} can be parsed as floats" 

1221 ) 

1222 else: 

1223 self.textstate.matrix = matrix 

1224 self.textstate.linematrix = (0, 0) 

1225 

1226 def do_T_a(self) -> None: 

1227 """Move to start of next text line""" 

1228 (a, b, c, d, e, f) = self.textstate.matrix 

1229 self.textstate.matrix = ( 

1230 a, 

1231 b, 

1232 c, 

1233 d, 

1234 self.textstate.leading * c + e, 

1235 self.textstate.leading * d + f, 

1236 ) 

1237 self.textstate.linematrix = (0, 0) 

1238 

1239 def do_TJ(self, seq: PDFStackT) -> None: 

1240 """Show text, allowing individual glyph positioning""" 

1241 if self.textstate.font is None: 

1242 if settings.STRICT: 

1243 raise PDFInterpreterError("No font specified!") 

1244 return 

1245 self.device.render_string( 

1246 self.textstate, 

1247 cast(PDFTextSeq, seq), 

1248 self.graphicstate.ncs, 

1249 self.graphicstate.copy(), 

1250 ) 

1251 

1252 def do_Tj(self, s: PDFStackT) -> None: 

1253 """Show text""" 

1254 self.do_TJ([s]) 

1255 

1256 def do__q(self, s: PDFStackT) -> None: 

1257 """Move to next line and show text 

1258 

1259 The ' (single quote) operator. 

1260 """ 

1261 self.do_T_a() 

1262 self.do_TJ([s]) 

1263 

1264 def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: 

1265 """Set word and character spacing, move to next line, and show text 

1266 

1267 The " (double quote) operator. 

1268 """ 

1269 self.do_Tw(aw) 

1270 self.do_Tc(ac) 

1271 self.do_TJ([s]) 

1272 

1273 def do_BI(self) -> None: 

1274 """Begin inline image object""" 

1275 

1276 def do_ID(self) -> None: 

1277 """Begin inline image data""" 

1278 

1279 def do_EI(self, obj: PDFStackT) -> None: 

1280 """End inline image object""" 

1281 if isinstance(obj, PDFStream) and "W" in obj and "H" in obj: 

1282 iobjid = str(id(obj)) 

1283 self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1284 self.device.render_image(iobjid, obj) 

1285 self.device.end_figure(iobjid) 

1286 

1287 def do_Do(self, xobjid_arg: PDFStackT) -> None: 

1288 """Invoke named XObject""" 

1289 xobjid = literal_name(xobjid_arg) 

1290 try: 

1291 xobj = stream_value(self.xobjmap[xobjid]) 

1292 except KeyError as err: 

1293 if settings.STRICT: 

1294 raise PDFInterpreterError(f"Undefined xobject id: {xobjid!r}") from err 

1295 return 

1296 log.debug("Processing xobj: %r", xobj) 

1297 subtype = xobj.get("Subtype") 

1298 if subtype is LITERAL_FORM and "BBox" in xobj: 

1299 interpreter = self.subinterp() 

1300 bbox = cast(Rect, list_value(xobj["BBox"])) 

1301 matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) 

1302 # According to PDF reference 1.7 section 4.9.1, XObjects in 

1303 # earlier PDFs (prior to v1.2) use the page's Resources entry 

1304 # instead of having their own Resources entry. 

1305 xobjres = xobj.get("Resources") 

1306 resources = dict_value(xobjres) if xobjres else self.resources.copy() 

1307 self.device.begin_figure(xobjid, bbox, matrix) 

1308 interpreter.render_contents( 

1309 resources, 

1310 [xobj], 

1311 ctm=mult_matrix(matrix, self.ctm), 

1312 ) 

1313 self.device.end_figure(xobjid) 

1314 elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: 

1315 self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1316 self.device.render_image(xobjid, xobj) 

1317 self.device.end_figure(xobjid) 

1318 else: 

1319 # unsupported xobject type. 

1320 pass 

1321 

1322 def process_page(self, page: PDFPage) -> None: 

1323 log.debug("Processing page: %r", page) 

1324 (x0, y0, x1, y1) = page.mediabox 

1325 if page.rotate == 90: 

1326 ctm = (0, -1, 1, 0, -y0, x1) 

1327 elif page.rotate == 180: 

1328 ctm = (-1, 0, 0, -1, x1, y1) 

1329 elif page.rotate == 270: 

1330 ctm = (0, 1, -1, 0, y1, -x0) 

1331 else: 

1332 ctm = (1, 0, 0, 1, -x0, -y0) 

1333 self.device.begin_page(page, ctm) 

1334 self.render_contents(page.resources, page.contents, ctm=ctm) 

1335 self.device.end_page(page) 

1336 

1337 def render_contents( 

1338 self, 

1339 resources: dict[object, object], 

1340 streams: Sequence[object], 

1341 ctm: Matrix = MATRIX_IDENTITY, 

1342 ) -> None: 

1343 """Render the content streams. 

1344 

1345 This method may be called recursively. 

1346 """ 

1347 log.debug( 

1348 "render_contents: resources=%r, streams=%r, ctm=%r", 

1349 resources, 

1350 streams, 

1351 ctm, 

1352 ) 

1353 self.init_resources(resources) 

1354 self.init_state(ctm) 

1355 self.execute(list_value(streams)) 

1356 

1357 def execute(self, streams: Sequence[object]) -> None: 

1358 # Detect and prevent circular references in content streams 

1359 # (including Form XObjects). 

1360 # We track stream IDs being executed in the current interpreter and 

1361 # all parent interpreters. If a stream is already being processed 

1362 # in the call stack, we skip 

1363 # it to prevent infinite recursion (CWE-835 vulnerability). 

1364 valid_streams: list[PDFStream] = [] 

1365 self.stream_ids.clear() 

1366 for obj in streams: 

1367 stream = stream_value(obj) 

1368 if stream.objid is None: 

1369 # Inline streams without object IDs can't be tracked for circular refs 

1370 log.warning( 

1371 "Execute called on non-indirect object (inline image?) %r", stream 

1372 ) 

1373 continue 

1374 if stream.objid in self.parent_stream_ids: 

1375 log.warning( 

1376 "Refusing to execute circular reference to content stream %d", 

1377 stream.objid, 

1378 ) 

1379 else: 

1380 valid_streams.append(stream) 

1381 self.stream_ids.add(stream.objid) 

1382 try: 

1383 parser = PDFContentParser(valid_streams) 

1384 except PSEOF: 

1385 # empty page 

1386 return 

1387 while True: 

1388 try: 

1389 (_, obj) = parser.nextobject() 

1390 except PSEOF: 

1391 break 

1392 if isinstance(obj, PSKeyword): 

1393 name = keyword_name(obj) 

1394 method = "do_{}".format( 

1395 name.replace("*", "_a") 

1396 .replace('"', "_w") 

1397 .replace( 

1398 "'", 

1399 "_q", 

1400 ) 

1401 ) 

1402 if hasattr(self, method): 

1403 func = getattr(self, method) 

1404 nargs = func.__code__.co_argcount - 1 

1405 if nargs: 

1406 args = self.pop(nargs) 

1407 log.debug("exec: %s %r", name, args) 

1408 if len(args) == nargs: 

1409 func(*args) 

1410 else: 

1411 log.debug("exec: %s", name) 

1412 func() 

1413 elif settings.STRICT: 

1414 error_msg = f"Unknown operator: {name!r}" 

1415 raise PDFInterpreterError(error_msg) 

1416 else: 

1417 self.push(obj)