Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/pdfinterp.py: 70%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

739 statements  

1import logging 

2import re 

3from collections.abc import Mapping, Sequence 

4from io import BytesIO 

5from typing import Union, cast 

6 

7from pdfminer import settings 

8from pdfminer.casting import safe_cmyk, safe_float, safe_int, safe_matrix, safe_rgb 

9from pdfminer.cmapdb import CMap, CMapBase, CMapDB 

10from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace 

11from pdfminer.pdfdevice import PDFDevice, PDFTextSeq 

12from pdfminer.pdfexceptions import PDFException, PDFValueError 

13from pdfminer.pdffont import ( 

14 PDFCIDFont, 

15 PDFFont, 

16 PDFFontError, 

17 PDFTrueTypeFont, 

18 PDFType1Font, 

19 PDFType3Font, 

20) 

21from pdfminer.pdfpage import PDFPage 

22from pdfminer.pdftypes import ( 

23 LITERALS_ASCII85_DECODE, 

24 PDFObjRef, 

25 PDFStream, 

26 dict_value, 

27 list_value, 

28 resolve1, 

29 stream_value, 

30) 

31from pdfminer.psexceptions import PSEOF, PSTypeError 

32from pdfminer.psparser import ( 

33 KWD, 

34 LIT, 

35 PSKeyword, 

36 PSLiteral, 

37 PSStackParser, 

38 PSStackType, 

39 keyword_name, 

40 literal_name, 

41) 

42from pdfminer.utils import ( 

43 MATRIX_IDENTITY, 

44 Matrix, 

45 PathSegment, 

46 Point, 

47 Rect, 

48 choplist, 

49 mult_matrix, 

50) 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class PDFResourceError(PDFException): 

56 pass 

57 

58 

59class PDFInterpreterError(PDFException): 

60 pass 

61 

62 

63LITERAL_PDF = LIT("PDF") 

64LITERAL_TEXT = LIT("Text") 

65LITERAL_FONT = LIT("Font") 

66LITERAL_FORM = LIT("Form") 

67LITERAL_IMAGE = LIT("Image") 

68 

69 

70class PDFTextState: 

71 matrix: Matrix 

72 linematrix: Point 

73 

74 def __init__(self) -> None: 

75 self.font: PDFFont | None = None 

76 self.fontsize: float = 0 

77 self.charspace: float = 0 

78 self.wordspace: float = 0 

79 self.scaling: float = 100 

80 self.leading: float = 0 

81 self.render: int = 0 

82 self.rise: float = 0 

83 self.reset() 

84 # self.matrix is set 

85 # self.linematrix is set 

86 

87 def __repr__(self) -> str: 

88 return ( 

89 f"<PDFTextState: font={self.font!r}, " 

90 f"fontsize={self.fontsize!r}, " 

91 f"charspace={self.charspace!r}, " 

92 f"wordspace={self.wordspace!r}, " 

93 f"scaling={self.scaling!r}, " 

94 f"leading={self.leading!r}, " 

95 f"render={self.render!r}, " 

96 f"rise={self.rise!r}, " 

97 f"matrix={self.matrix!r}, " 

98 f"linematrix={self.linematrix!r}>" 

99 ) 

100 

101 def copy(self) -> "PDFTextState": 

102 obj = PDFTextState() 

103 obj.font = self.font 

104 obj.fontsize = self.fontsize 

105 obj.charspace = self.charspace 

106 obj.wordspace = self.wordspace 

107 obj.scaling = self.scaling 

108 obj.leading = self.leading 

109 obj.render = self.render 

110 obj.rise = self.rise 

111 obj.matrix = self.matrix 

112 obj.linematrix = self.linematrix 

113 return obj 

114 

115 def reset(self) -> None: 

116 self.matrix = MATRIX_IDENTITY 

117 self.linematrix = (0, 0) 

118 

119 

120# Standard color types (used standalone or as base for uncolored patterns) 

121StandardColor = Union[ 

122 float, # Greyscale 

123 tuple[float, float, float], # R, G, B 

124 tuple[float, float, float, float], # C, M, Y, K 

125] 

126 

127# Complete color type including patterns 

128Color = Union[ 

129 StandardColor, # Standard colors (gray, RGB, CMYK) 

130 str, # Pattern name (colored pattern, PaintType=1) 

131 tuple[ 

132 StandardColor, str 

133 ], # (base_color, pattern_name) (uncolored pattern, PaintType=2) 

134] 

135 

136 

137class PDFGraphicState: 

138 def __init__(self) -> None: 

139 self.linewidth: float = 0 

140 self.linecap: object | None = None 

141 self.linejoin: object | None = None 

142 self.miterlimit: object | None = None 

143 self.dash: tuple[object, object] | None = None 

144 self.intent: object | None = None 

145 self.flatness: object | None = None 

146 

147 # stroking color 

148 self.scolor: Color = 0 

149 self.scs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

150 

151 # non stroking color 

152 self.ncolor: Color = 0 

153 self.ncs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

154 

155 def copy(self) -> "PDFGraphicState": 

156 obj = PDFGraphicState() 

157 obj.linewidth = self.linewidth 

158 obj.linecap = self.linecap 

159 obj.linejoin = self.linejoin 

160 obj.miterlimit = self.miterlimit 

161 obj.dash = self.dash 

162 obj.intent = self.intent 

163 obj.flatness = self.flatness 

164 obj.scolor = self.scolor 

165 obj.scs = self.scs 

166 obj.ncolor = self.ncolor 

167 obj.ncs = self.ncs 

168 return obj 

169 

170 def __repr__(self) -> str: 

171 return ( 

172 f"<PDFGraphicState: " 

173 f"linewidth={self.linewidth!r}, " 

174 f"linecap={self.linecap!r}, " 

175 f"linejoin={self.linejoin!r}, " 

176 f"miterlimit={self.miterlimit!r}, " 

177 f"dash={self.dash!r}, " 

178 f"intent={self.intent!r}, " 

179 f"flatness={self.flatness!r}, " 

180 f"stroking color={self.scolor!r}, " 

181 f"non stroking color={self.ncolor!r}>" 

182 ) 

183 

184 

185class PDFResourceManager: 

186 """Repository of shared resources. 

187 

188 ResourceManager facilitates reuse of shared resources 

189 such as fonts and images so that large objects are not 

190 allocated multiple times. 

191 """ 

192 

193 def __init__(self, caching: bool = True) -> None: 

194 self.caching = caching 

195 self._cached_fonts: dict[object, PDFFont] = {} 

196 

197 def get_procset(self, procs: Sequence[object]) -> None: 

198 for proc in procs: 

199 if proc is LITERAL_PDF or proc is LITERAL_TEXT: 

200 pass 

201 else: 

202 pass 

203 

204 def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: 

205 try: 

206 return CMapDB.get_cmap(cmapname) 

207 except CMapDB.CMapNotFound: 

208 if strict: 

209 raise 

210 return CMap() 

211 

212 def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: 

213 if objid and objid in self._cached_fonts: 

214 font = self._cached_fonts[objid] 

215 else: 

216 log.debug("get_font: create: objid=%r, spec=%r", objid, spec) 

217 if settings.STRICT and spec["Type"] is not LITERAL_FONT: 

218 raise PDFFontError("Type is not /Font") 

219 # Create a Font object. 

220 if "Subtype" in spec: 

221 subtype = literal_name(spec["Subtype"]) 

222 else: 

223 if settings.STRICT: 

224 raise PDFFontError("Font Subtype is not specified.") 

225 subtype = "Type1" 

226 if subtype in ("Type1", "MMType1"): 

227 # Type1 Font 

228 font = PDFType1Font(self, spec) 

229 elif subtype == "TrueType": 

230 # TrueType Font 

231 font = PDFTrueTypeFont(self, spec) 

232 elif subtype == "Type3": 

233 # Type3 Font 

234 font = PDFType3Font(self, spec) 

235 elif subtype in ("CIDFontType0", "CIDFontType2"): 

236 # CID Font 

237 font = PDFCIDFont(self, spec) 

238 elif subtype == "Type0": 

239 # Type0 Font 

240 dfonts = list_value(spec["DescendantFonts"]) 

241 assert dfonts 

242 subspec = dict_value(dfonts[0]).copy() 

243 for k in ("Encoding", "ToUnicode"): 

244 if k in spec: 

245 subspec[k] = resolve1(spec[k]) 

246 font = self.get_font(None, subspec) 

247 else: 

248 if settings.STRICT: 

249 raise PDFFontError(f"Invalid Font spec: {spec!r}") 

250 font = PDFType1Font(self, spec) # this is so wrong! 

251 if objid and self.caching: 

252 self._cached_fonts[objid] = font 

253 return font 

254 

255 

256class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): 

257 def __init__(self, streams: Sequence[object]) -> None: 

258 self.streams = streams 

259 self.istream = 0 

260 # PSStackParser.__init__(fp=None) is safe only because we've overloaded 

261 # all the methods that would attempt to access self.fp without first 

262 # calling self.fillfp(). 

263 PSStackParser.__init__(self, None) # type: ignore[arg-type] 

264 

265 def fillfp(self) -> bool: 

266 if not self.fp: 

267 if self.istream < len(self.streams): 

268 strm = stream_value(self.streams[self.istream]) 

269 self.istream += 1 

270 else: 

271 raise PSEOF("Unexpected EOF, file truncated?") 

272 self.fp = BytesIO(strm.get_data()) 

273 return True 

274 return False 

275 

276 def seek(self, pos: int) -> None: 

277 self.fillfp() 

278 PSStackParser.seek(self, pos) 

279 

280 def fillbuf(self) -> bool: 

281 if self.charpos < len(self.buf): 

282 return False 

283 new_stream = False 

284 while 1: 

285 new_stream = self.fillfp() 

286 self.bufpos = self.fp.tell() 

287 self.buf = self.fp.read(self.BUFSIZ) 

288 if self.buf: 

289 break 

290 self.fp = None # type: ignore[assignment] 

291 self.charpos = 0 

292 return new_stream 

293 

294 def get_inline_data(self, pos: int, target: bytes = b"EI") -> tuple[int, bytes]: 

295 self.seek(pos) 

296 i = 0 

297 data = b"" 

298 while i <= len(target): 

299 self.fillbuf() 

300 if i: 

301 ci = self.buf[self.charpos] 

302 c = bytes((ci,)) 

303 data += c 

304 self.charpos += 1 

305 if (len(target) <= i and c.isspace()) or ( 

306 i < len(target) and c == (bytes((target[i],))) 

307 ): 

308 i += 1 

309 else: 

310 i = 0 

311 else: 

312 try: 

313 j = self.buf.index(target[0], self.charpos) 

314 data += self.buf[self.charpos : j + 1] 

315 self.charpos = j + 1 

316 i = 1 

317 except ValueError: 

318 data += self.buf[self.charpos :] 

319 self.charpos = len(self.buf) 

320 data = data[: -(len(target) + 1)] # strip the last part 

321 data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data) 

322 return (pos, data) 

323 

324 def flush(self) -> None: 

325 self.add_results(*self.popall()) 

326 

327 KEYWORD_BI = KWD(b"BI") 

328 KEYWORD_ID = KWD(b"ID") 

329 KEYWORD_EI = KWD(b"EI") 

330 

331 def do_keyword(self, pos: int, token: PSKeyword) -> None: 

332 if token is self.KEYWORD_BI: 

333 # inline image within a content stream 

334 self.start_type(pos, "inline") 

335 elif token is self.KEYWORD_ID: 

336 try: 

337 (_, objs) = self.end_type("inline") 

338 if len(objs) % 2 != 0: 

339 error_msg = f"Invalid dictionary construct: {objs!r}" 

340 raise PSTypeError(error_msg) 

341 d = {literal_name(k): resolve1(v) for (k, v) in choplist(2, objs)} 

342 eos = b"EI" 

343 filter = d.get("F") 

344 if filter is not None: 

345 if isinstance(filter, PSLiteral): 

346 filter = [filter] 

347 if filter[0] in LITERALS_ASCII85_DECODE: 

348 eos = b"~>" 

349 (pos, data) = self.get_inline_data(pos + len(b"ID "), target=eos) 

350 if eos != b"EI": # it may be necessary for decoding 

351 data += eos 

352 obj = PDFStream(d, data) 

353 self.push((pos, obj)) 

354 if eos == b"EI": # otherwise it is still in the stream 

355 self.push((pos, self.KEYWORD_EI)) 

356 except PSTypeError: 

357 if settings.STRICT: 

358 raise 

359 else: 

360 self.push((pos, token)) 

361 

362 

363# Types that may appear on the PDF argument stack. 

364PDFStackT = PSStackType[PDFStream] 

365 

366 

367class PDFPageInterpreter: 

368 """Processor for the content of a PDF page 

369 

370 Reference: PDF Reference, Appendix A, Operator Summary 

371 """ 

372 

373 def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None: 

374 self.rsrcmgr = rsrcmgr 

375 self.device = device 

376 # Track stream IDs currently being executed to detect circular references 

377 self.stream_ids: set[int] = set() 

378 # Track stream IDs from parent interpreters in the call stack 

379 self.parent_stream_ids: set[int] = set() 

380 

381 def dup(self) -> "PDFPageInterpreter": 

382 return self.__class__(self.rsrcmgr, self.device) 

383 

384 def subinterp(self) -> "PDFPageInterpreter": 

385 """Create a sub-interpreter for processing nested content streams. 

386 

387 This is used when invoking Form XObjects to prevent circular references. 

388 Unlike dup(), this method propagates the stream ID tracking from the 

389 parent interpreter, allowing detection of circular references across 

390 nested XObject invocations. 

391 """ 

392 interp = self.dup() 

393 interp.parent_stream_ids.update(self.parent_stream_ids) 

394 interp.parent_stream_ids.update(self.stream_ids) 

395 return interp 

396 

397 def init_resources(self, resources: dict[object, object]) -> None: 

398 """Prepare the fonts and XObjects listed in the Resource attribute.""" 

399 self.resources = resources 

400 self.fontmap: dict[object, PDFFont] = {} 

401 self.xobjmap = {} 

402 self.csmap: dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() 

403 if not resources: 

404 return 

405 

406 def get_colorspace(spec: object) -> PDFColorSpace | None: 

407 if isinstance(spec, list): 

408 name = literal_name(spec[0]) 

409 else: 

410 name = literal_name(spec) 

411 if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2: 

412 return PDFColorSpace(name, stream_value(spec[1])["N"]) 

413 elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2: 

414 return PDFColorSpace(name, len(list_value(spec[1]))) 

415 else: 

416 return PREDEFINED_COLORSPACE.get(name) 

417 

418 for k, v in dict_value(resources).items(): 

419 log.debug("Resource: %r: %r", k, v) 

420 if k == "Font": 

421 for fontid, spec in dict_value(v).items(): 

422 objid = None 

423 if isinstance(spec, PDFObjRef): 

424 objid = spec.objid 

425 spec = dict_value(spec) 

426 self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) 

427 elif k == "ColorSpace": 

428 for csid, spec in dict_value(v).items(): 

429 colorspace = get_colorspace(resolve1(spec)) 

430 if colorspace is not None: 

431 self.csmap[csid] = colorspace 

432 elif k == "ProcSet": 

433 self.rsrcmgr.get_procset(list_value(v)) 

434 elif k == "XObject": 

435 for xobjid, xobjstrm in dict_value(v).items(): 

436 self.xobjmap[xobjid] = xobjstrm 

437 

438 def init_state(self, ctm: Matrix) -> None: 

439 """Initialize the text and graphic states for rendering a page.""" 

440 # gstack: stack for graphical states. 

441 self.gstack: list[tuple[Matrix, PDFTextState, PDFGraphicState]] = [] 

442 self.ctm = ctm 

443 self.device.set_ctm(self.ctm) 

444 self.textstate = PDFTextState() 

445 self.graphicstate = PDFGraphicState() 

446 self.curpath: list[PathSegment] = [] 

447 # argstack: stack for command arguments. 

448 self.argstack: list[PDFStackT] = [] 

449 

450 def push(self, obj: PDFStackT) -> None: 

451 self.argstack.append(obj) 

452 

453 def pop(self, n: int) -> list[PDFStackT]: 

454 if n == 0: 

455 return [] 

456 x = self.argstack[-n:] 

457 self.argstack = self.argstack[:-n] 

458 return x 

459 

460 def get_current_state(self) -> tuple[Matrix, PDFTextState, PDFGraphicState]: 

461 return (self.ctm, self.textstate.copy(), self.graphicstate.copy()) 

462 

463 def set_current_state( 

464 self, 

465 state: tuple[Matrix, PDFTextState, PDFGraphicState], 

466 ) -> None: 

467 (self.ctm, self.textstate, self.graphicstate) = state 

468 self.device.set_ctm(self.ctm) 

469 

470 def do_q(self) -> None: 

471 """Save graphics state""" 

472 self.gstack.append(self.get_current_state()) 

473 

474 def do_Q(self) -> None: 

475 """Restore graphics state""" 

476 if self.gstack: 

477 self.set_current_state(self.gstack.pop()) 

478 

479 def do_cm( 

480 self, 

481 a1: PDFStackT, 

482 b1: PDFStackT, 

483 c1: PDFStackT, 

484 d1: PDFStackT, 

485 e1: PDFStackT, 

486 f1: PDFStackT, 

487 ) -> None: 

488 """Concatenate matrix to current transformation matrix""" 

489 matrix = safe_matrix(a1, b1, c1, d1, e1, f1) 

490 

491 if matrix is None: 

492 log.warning( 

493 "Cannot concatenate matrix to current transformation matrix " 

494 "because not all values in %r can be parsed as floats", 

495 (a1, b1, c1, d1, e1, f1), 

496 ) 

497 else: 

498 self.ctm = mult_matrix(matrix, self.ctm) 

499 self.device.set_ctm(self.ctm) 

500 

501 def do_w(self, linewidth: PDFStackT) -> None: 

502 """Set line width""" 

503 linewidth_f = safe_float(linewidth) 

504 if linewidth_f is None: 

505 log.warning( 

506 "Cannot set line width because %r is an invalid float value", 

507 linewidth, 

508 ) 

509 else: 

510 scale = (self.ctm[0] ** 2 + self.ctm[1] ** 2) ** 0.5 

511 self.graphicstate.linewidth = linewidth_f * scale 

512 

513 def do_J(self, linecap: PDFStackT) -> None: 

514 """Set line cap style""" 

515 self.graphicstate.linecap = linecap 

516 

517 def do_j(self, linejoin: PDFStackT) -> None: 

518 """Set line join style""" 

519 self.graphicstate.linejoin = linejoin 

520 

521 def do_M(self, miterlimit: PDFStackT) -> None: 

522 """Set miter limit""" 

523 self.graphicstate.miterlimit = miterlimit 

524 

525 def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: 

526 """Set line dash pattern""" 

527 self.graphicstate.dash = (dash, phase) 

528 

529 def do_ri(self, intent: PDFStackT) -> None: 

530 """Set color rendering intent""" 

531 self.graphicstate.intent = intent 

532 

533 def do_i(self, flatness: PDFStackT) -> None: 

534 """Set flatness tolerance""" 

535 self.graphicstate.flatness = flatness 

536 

537 def do_gs(self, name: PDFStackT) -> None: 

538 """Set parameters from graphics state parameter dictionary""" 

539 # TODO 

540 

541 def do_m(self, x: PDFStackT, y: PDFStackT) -> None: 

542 """Begin new subpath""" 

543 x_f = safe_float(x) 

544 y_f = safe_float(y) 

545 

546 if x_f is None or y_f is None: 

547 point = ("m", x, y) 

548 log.warning( 

549 "Cannot start new subpath because not all values " 

550 "in %r can be parsed as floats", 

551 point, 

552 ) 

553 else: 

554 point = ("m", x_f, y_f) 

555 self.curpath.append(point) 

556 

557 def do_l(self, x: PDFStackT, y: PDFStackT) -> None: 

558 """Append straight line segment to path""" 

559 x_f = safe_float(x) 

560 y_f = safe_float(y) 

561 if x_f is None or y_f is None: 

562 point = ("l", x, y) 

563 log.warning( 

564 "Cannot append straight line segment to path " 

565 "because not all values in %r can be parsed as floats", 

566 point, 

567 ) 

568 else: 

569 point = ("l", x_f, y_f) 

570 self.curpath.append(point) 

571 

572 def do_c( 

573 self, 

574 x1: PDFStackT, 

575 y1: PDFStackT, 

576 x2: PDFStackT, 

577 y2: PDFStackT, 

578 x3: PDFStackT, 

579 y3: PDFStackT, 

580 ) -> None: 

581 """Append curved segment to path (three control points)""" 

582 x1_f = safe_float(x1) 

583 y1_f = safe_float(y1) 

584 x2_f = safe_float(x2) 

585 y2_f = safe_float(y2) 

586 x3_f = safe_float(x3) 

587 y3_f = safe_float(y3) 

588 if ( 

589 x1_f is None 

590 or y1_f is None 

591 or x2_f is None 

592 or y2_f is None 

593 or x3_f is None 

594 or y3_f is None 

595 ): 

596 point = ("c", x1, y1, x2, y2, x3, y3) 

597 log.warning( 

598 "Cannot append curved segment to path " 

599 "because not all values in %r can be parsed as floats", 

600 point, 

601 ) 

602 else: 

603 point = ("c", x1_f, y1_f, x2_f, y2_f, x3_f, y3_f) 

604 self.curpath.append(point) 

605 

606 def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

607 """Append curved segment to path (initial point replicated)""" 

608 x2_f = safe_float(x2) 

609 y2_f = safe_float(y2) 

610 x3_f = safe_float(x3) 

611 y3_f = safe_float(y3) 

612 if x2_f is None or y2_f is None or x3_f is None or y3_f is None: 

613 point = ("v", x2, y2, x3, y3) 

614 log.warning( 

615 "Cannot append curved segment to path " 

616 "because not all values in %r can be parsed as floats", 

617 point, 

618 ) 

619 else: 

620 point = ("v", x2_f, y2_f, x3_f, y3_f) 

621 self.curpath.append(point) 

622 

623 def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

624 """Append curved segment to path (final point replicated)""" 

625 x1_f = safe_float(x1) 

626 y1_f = safe_float(y1) 

627 x3_f = safe_float(x3) 

628 y3_f = safe_float(y3) 

629 if x1_f is None or y1_f is None or x3_f is None or y3_f is None: 

630 point = ("y", x1, y1, x3, y3) 

631 log.warning( 

632 "Cannot append curved segment to path " 

633 "because not all values in %r can be parsed as floats", 

634 point, 

635 ) 

636 else: 

637 point = ("y", x1_f, y1_f, x3_f, y3_f) 

638 self.curpath.append(point) 

639 

640 def do_h(self) -> None: 

641 """Close subpath""" 

642 self.curpath.append(("h",)) 

643 

644 def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None: 

645 """Append rectangle to path""" 

646 x_f = safe_float(x) 

647 y_f = safe_float(y) 

648 w_f = safe_float(w) 

649 h_f = safe_float(h) 

650 

651 if x_f is None or y_f is None or w_f is None or h_f is None: 

652 values = (x, y, w, h) 

653 log.warning( 

654 "Cannot append rectangle to path " 

655 "because not all values in %r can be parsed as floats", 

656 values, 

657 ) 

658 else: 

659 self.curpath.append(("m", x_f, y_f)) 

660 self.curpath.append(("l", x_f + w_f, y_f)) 

661 self.curpath.append(("l", x_f + w_f, y_f + h_f)) 

662 self.curpath.append(("l", x_f, y_f + h_f)) 

663 self.curpath.append(("h",)) 

664 

665 def do_S(self) -> None: 

666 """Stroke path""" 

667 self.device.paint_path(self.graphicstate, True, False, False, self.curpath) 

668 self.curpath = [] 

669 

670 def do_s(self) -> None: 

671 """Close and stroke path""" 

672 self.do_h() 

673 self.do_S() 

674 

675 def do_f(self) -> None: 

676 """Fill path using nonzero winding number rule""" 

677 self.device.paint_path(self.graphicstate, False, True, False, self.curpath) 

678 self.curpath = [] 

679 

680 def do_F(self) -> None: 

681 """Fill path using nonzero winding number rule (obsolete)""" 

682 

683 def do_f_a(self) -> None: 

684 """Fill path using even-odd rule""" 

685 self.device.paint_path(self.graphicstate, False, True, True, self.curpath) 

686 self.curpath = [] 

687 

688 def do_B(self) -> None: 

689 """Fill and stroke path using nonzero winding number rule""" 

690 self.device.paint_path(self.graphicstate, True, True, False, self.curpath) 

691 self.curpath = [] 

692 

693 def do_B_a(self) -> None: 

694 """Fill and stroke path using even-odd rule""" 

695 self.device.paint_path(self.graphicstate, True, True, True, self.curpath) 

696 self.curpath = [] 

697 

698 def do_b(self) -> None: 

699 """Close, fill, and stroke path using nonzero winding number rule""" 

700 self.do_h() 

701 self.do_B() 

702 

703 def do_b_a(self) -> None: 

704 """Close, fill, and stroke path using even-odd rule""" 

705 self.do_h() 

706 self.do_B_a() 

707 

708 def do_n(self) -> None: 

709 """End path without filling or stroking""" 

710 self.curpath = [] 

711 

712 def do_W(self) -> None: 

713 """Set clipping path using nonzero winding number rule""" 

714 

715 def do_W_a(self) -> None: 

716 """Set clipping path using even-odd rule""" 

717 

718 def do_CS(self, name: PDFStackT) -> None: 

719 """Set color space for stroking operations 

720 

721 Introduced in PDF 1.1 

722 """ 

723 try: 

724 self.graphicstate.scs = self.csmap[literal_name(name)] 

725 except KeyError as err: 

726 if settings.STRICT: 

727 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

728 

729 def do_cs(self, name: PDFStackT) -> None: 

730 """Set color space for nonstroking operations""" 

731 try: 

732 self.graphicstate.ncs = self.csmap[literal_name(name)] 

733 except KeyError as err: 

734 if settings.STRICT: 

735 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

736 

737 def do_G(self, gray: PDFStackT) -> None: 

738 """Set gray level for stroking operations""" 

739 gray_f = safe_float(gray) 

740 

741 if gray_f is None: 

742 log.warning( 

743 "Cannot set gray level because %r is an invalid float value", 

744 gray, 

745 ) 

746 else: 

747 self.graphicstate.scolor = gray_f 

748 self.graphicstate.scs = self.csmap["DeviceGray"] 

749 

750 def do_g(self, gray: PDFStackT) -> None: 

751 """Set gray level for nonstroking operations""" 

752 gray_f = safe_float(gray) 

753 

754 if gray_f is None: 

755 log.warning( 

756 "Cannot set gray level because %r is an invalid float value", 

757 gray, 

758 ) 

759 else: 

760 self.graphicstate.ncolor = gray_f 

761 self.graphicstate.ncs = self.csmap["DeviceGray"] 

762 

763 def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

764 """Set RGB color for stroking operations""" 

765 rgb = safe_rgb(r, g, b) 

766 

767 if rgb is None: 

768 log.warning( 

769 "Cannot set RGB stroke color " 

770 "because not all values in %r can be parsed as floats", 

771 (r, g, b), 

772 ) 

773 else: 

774 self.graphicstate.scolor = rgb 

775 self.graphicstate.scs = self.csmap["DeviceRGB"] 

776 

777 def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

778 """Set RGB color for nonstroking operations""" 

779 rgb = safe_rgb(r, g, b) 

780 

781 if rgb is None: 

782 log.warning( 

783 "Cannot set RGB non-stroke color " 

784 "because not all values in %r can be parsed as floats", 

785 (r, g, b), 

786 ) 

787 else: 

788 self.graphicstate.ncolor = rgb 

789 self.graphicstate.ncs = self.csmap["DeviceRGB"] 

790 

791 def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

792 """Set CMYK color for stroking operations""" 

793 cmyk = safe_cmyk(c, m, y, k) 

794 

795 if cmyk is None: 

796 log.warning( 

797 "Cannot set CMYK stroke color " 

798 "because not all values in %r can be parsed as floats", 

799 (c, m, y, k), 

800 ) 

801 else: 

802 self.graphicstate.scolor = cmyk 

803 self.graphicstate.scs = self.csmap["DeviceCMYK"] 

804 

805 def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

806 """Set CMYK color for nonstroking operations""" 

807 cmyk = safe_cmyk(c, m, y, k) 

808 

809 if cmyk is None: 

810 log.warning( 

811 "Cannot set CMYK non-stroke color " 

812 "because not all values in %r can be parsed as floats", 

813 (c, m, y, k), 

814 ) 

815 else: 

816 self.graphicstate.ncolor = cmyk 

817 self.graphicstate.ncs = self.csmap["DeviceCMYK"] 

818 

819 def _parse_color_components( 

820 self, components: list[PDFStackT], context: str 

821 ) -> StandardColor | None: 

822 """Parse color components into StandardColor (gray, RGB, or CMYK). 

823 

824 Args: 

825 components: List of 1, 3, or 4 numeric color components 

826 context: Description for error messages (e.g., "stroke", "non-stroke") 

827 

828 Returns: 

829 Parsed color (float for gray, tuple for RGB/CMYK) or None if invalid 

830 """ 

831 if len(components) == 1: 

832 gray = safe_float(components[0]) 

833 if gray is None: 

834 log.warning( 

835 "Cannot set %s color: %r is an invalid float value", 

836 context, 

837 components[0], 

838 ) 

839 return gray 

840 

841 elif len(components) == 3: 

842 rgb = safe_rgb(*components) 

843 if rgb is None: 

844 log.warning( 

845 "Cannot set %s color: components %r cannot be parsed as RGB", 

846 context, 

847 components, 

848 ) 

849 return rgb 

850 

851 elif len(components) == 4: 

852 cmyk = safe_cmyk(*components) 

853 if cmyk is None: 

854 log.warning( 

855 "Cannot set %s color: components %r cannot be parsed as CMYK", 

856 context, 

857 components, 

858 ) 

859 return cmyk 

860 

861 else: 

862 log.warning( 

863 "Cannot set %s color: %d components specified, " 

864 "but only 1 (grayscale), 3 (RGB), and 4 (CMYK) are supported", 

865 context, 

866 len(components), 

867 ) 

868 return None 

869 

870 def do_SCN(self) -> None: 

871 """Set color for stroking operations. 

872 

873 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

874 and ISO 32000-2:2020 8.7.3 (PDF 2.0): 

875 - Colored patterns (PaintType=1): single operand (pattern name) 

876 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

877 """ 

878 n = self.graphicstate.scs.ncomponents 

879 

880 components = self.pop(n) 

881 if len(components) != n: 

882 log.warning( 

883 "Cannot set stroke color because expected %d components but got %r", 

884 n, 

885 components, 

886 ) 

887 

888 elif self.graphicstate.scs.name != "Pattern": 

889 # Standard colors (gray, RGB, CMYK) - common case 

890 color = self._parse_color_components(components, "stroke") 

891 if color is not None: 

892 self.graphicstate.scolor = color 

893 

894 elif len(components) >= 1: 

895 # Pattern color space (ISO 32000 8.7.3.2-3) 

896 # Last component is always the pattern name 

897 pattern_component = components[-1] 

898 

899 # Per spec: pattern name must be a name object (PSLiteral) 

900 if not isinstance(pattern_component, PSLiteral): 

901 log.warning( 

902 "Pattern color space requires name object (PSLiteral), " 

903 "got %s: %r. " 

904 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name SCN'. " 

905 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

906 "syntax 'c1...cn /name SCN'.", 

907 type(pattern_component).__name__, 

908 pattern_component, 

909 ) 

910 return 

911 

912 pattern_name = literal_name(pattern_component) 

913 

914 if len(components) == 1: 

915 # Colored tiling pattern (PaintType=1): just pattern name 

916 self.graphicstate.scolor = pattern_name 

917 log.debug("Set stroke pattern (colored): %s", pattern_name) 

918 else: 

919 # Uncolored tiling pattern (PaintType=2): 

920 # color components + pattern name 

921 base_color_components = components[:-1] 

922 

923 # Parse base color using shared logic 

924 base_color = self._parse_color_components( 

925 base_color_components, "stroke (uncolored pattern)" 

926 ) 

927 if base_color is None: 

928 return 

929 

930 # Store as tuple: (base_color, pattern_name) 

931 self.graphicstate.scolor = (base_color, pattern_name) 

932 log.debug( 

933 "Set stroke pattern (uncolored): %s + %s", base_color, pattern_name 

934 ) 

935 

936 def do_scn(self) -> None: 

937 """Set color for nonstroking operations. 

938 

939 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

940 and ISO 32000-2:2020 §8.7.3 (PDF 2.0): 

941 - Colored patterns (PaintType=1): single operand (pattern name) 

942 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

943 """ 

944 n = self.graphicstate.ncs.ncomponents 

945 

946 components = self.pop(n) 

947 if len(components) != n: 

948 log.warning( 

949 "Cannot set non-stroke color because expected %d components but got %r", 

950 n, 

951 components, 

952 ) 

953 

954 elif self.graphicstate.ncs.name != "Pattern": 

955 # Standard colors (gray, RGB, CMYK) - common case 

956 color = self._parse_color_components(components, "non-stroke") 

957 if color is not None: 

958 self.graphicstate.ncolor = color 

959 

960 elif len(components) >= 1: 

961 # Pattern color space (ISO 32000 8.7.3.2-3) 

962 # Last component is always the pattern name 

963 pattern_component = components[-1] 

964 

965 # Per spec: pattern name must be a name object (PSLiteral) 

966 if not isinstance(pattern_component, PSLiteral): 

967 log.warning( 

968 "Pattern color space requires name object (PSLiteral), " 

969 "got %s: %r. " 

970 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name scn'. " 

971 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

972 "syntax 'c1...cn /name scn'.", 

973 type(pattern_component).__name__, 

974 pattern_component, 

975 ) 

976 return 

977 

978 pattern_name = literal_name(pattern_component) 

979 

980 if len(components) == 1: 

981 # Colored tiling pattern (PaintType=1): just pattern name 

982 self.graphicstate.ncolor = pattern_name 

983 log.debug("Set non-stroke pattern (colored): %s", pattern_name) 

984 else: 

985 # Uncolored tiling pattern (PaintType=2): 

986 # color components + pattern name 

987 base_color_components = components[:-1] 

988 

989 # Parse base color using shared logic 

990 base_color = self._parse_color_components( 

991 base_color_components, "non-stroke (uncolored pattern)" 

992 ) 

993 if base_color is None: 

994 return 

995 

996 # Store as tuple: (base_color, pattern_name) 

997 self.graphicstate.ncolor = (base_color, pattern_name) 

998 log.debug( 

999 "Set non-stroke pattern (uncolored): %s + %s", 

1000 base_color, 

1001 pattern_name, 

1002 ) 

1003 

1004 def do_SC(self) -> None: 

1005 """Set color for stroking operations""" 

1006 self.do_SCN() 

1007 

1008 def do_sc(self) -> None: 

1009 """Set color for nonstroking operations""" 

1010 self.do_scn() 

1011 

1012 def do_sh(self, name: object) -> None: 

1013 """Paint area defined by shading pattern""" 

1014 

1015 def do_BT(self) -> None: 

1016 """Begin text object 

1017 

1018 Initializing the text matrix, Tm, and the text line matrix, Tlm, to 

1019 the identity matrix. Text objects cannot be nested; a second BT cannot 

1020 appear before an ET. 

1021 """ 

1022 self.textstate.reset() 

1023 

1024 def do_ET(self) -> None: 

1025 """End a text object""" 

1026 

1027 def do_BX(self) -> None: 

1028 """Begin compatibility section""" 

1029 

1030 def do_EX(self) -> None: 

1031 """End compatibility section""" 

1032 

1033 def do_MP(self, tag: PDFStackT) -> None: 

1034 """Define marked-content point""" 

1035 if isinstance(tag, PSLiteral): 

1036 self.device.do_tag(tag) 

1037 else: 

1038 log.warning( 

1039 "Cannot define marked-content point because %r is not a PSLiteral", 

1040 tag, 

1041 ) 

1042 

1043 def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: 

1044 """Define marked-content point with property list""" 

1045 if isinstance(tag, PSLiteral): 

1046 self.device.do_tag(tag, props) 

1047 else: 

1048 log.warning( 

1049 "Cannot define marked-content point with property list " 

1050 "because %r is not a PSLiteral", 

1051 tag, 

1052 ) 

1053 

1054 def do_BMC(self, tag: PDFStackT) -> None: 

1055 """Begin marked-content sequence""" 

1056 if isinstance(tag, PSLiteral): 

1057 self.device.begin_tag(tag) 

1058 else: 

1059 log.warning( 

1060 "Cannot begin marked-content sequence because %r is not a PSLiteral", 

1061 tag, 

1062 ) 

1063 

1064 def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: 

1065 """Begin marked-content sequence with property list""" 

1066 if isinstance(tag, PSLiteral): 

1067 self.device.begin_tag(tag, props) 

1068 else: 

1069 log.warning( 

1070 "Cannot begin marked-content sequence with property list " 

1071 "because %r is not a PSLiteral", 

1072 tag, 

1073 ) 

1074 

1075 def do_EMC(self) -> None: 

1076 """End marked-content sequence""" 

1077 self.device.end_tag() 

1078 

1079 def do_Tc(self, space: PDFStackT) -> None: 

1080 """Set character spacing. 

1081 

1082 Character spacing is used by the Tj, TJ, and ' operators. 

1083 

1084 :param space: a number expressed in unscaled text space units. 

1085 """ 

1086 charspace = safe_float(space) 

1087 if charspace is None: 

1088 log.warning( 

1089 "Could not set character spacing because %r is an invalid float value", 

1090 space, 

1091 ) 

1092 else: 

1093 self.textstate.charspace = charspace 

1094 

1095 def do_Tw(self, space: PDFStackT) -> None: 

1096 """Set the word spacing. 

1097 

1098 Word spacing is used by the Tj, TJ, and ' operators. 

1099 

1100 :param space: a number expressed in unscaled text space units 

1101 """ 

1102 wordspace = safe_float(space) 

1103 if wordspace is None: 

1104 log.warning( 

1105 "Could not set word spacing because %r is an invalid float value", 

1106 space, 

1107 ) 

1108 else: 

1109 self.textstate.wordspace = wordspace 

1110 

1111 def do_Tz(self, scale: PDFStackT) -> None: 

1112 """Set the horizontal scaling. 

1113 

1114 :param scale: is a number specifying the percentage of the normal width 

1115 """ 

1116 scale_f = safe_float(scale) 

1117 

1118 if scale_f is None: 

1119 log.warning( 

1120 "Could not set horizontal scaling because %r is an invalid float value", 

1121 scale, 

1122 ) 

1123 else: 

1124 self.textstate.scaling = scale_f 

1125 

1126 def do_TL(self, leading: PDFStackT) -> None: 

1127 """Set the text leading. 

1128 

1129 Text leading is used only by the T*, ', and " operators. 

1130 

1131 :param leading: a number expressed in unscaled text space units 

1132 """ 

1133 leading_f = safe_float(leading) 

1134 if leading_f is None: 

1135 log.warning( 

1136 "Could not set text leading because %r is an invalid float value", 

1137 leading, 

1138 ) 

1139 else: 

1140 self.textstate.leading = -leading_f 

1141 

1142 def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: 

1143 """Set the text font 

1144 

1145 :param fontid: the name of a font resource in the Font subdictionary 

1146 of the current resource dictionary 

1147 :param fontsize: size is a number representing a scale factor. 

1148 """ 

1149 try: 

1150 self.textstate.font = self.fontmap[literal_name(fontid)] 

1151 except KeyError as err: 

1152 if settings.STRICT: 

1153 raise PDFInterpreterError(f"Undefined Font id: {fontid!r}") from err 

1154 self.textstate.font = self.rsrcmgr.get_font(None, {}) 

1155 

1156 fontsize_f = safe_float(fontsize) 

1157 if fontsize_f is None: 

1158 log.warning( 

1159 "Could not set text font because %r is an invalid float value", 

1160 fontsize, 

1161 ) 

1162 else: 

1163 self.textstate.fontsize = fontsize_f 

1164 

1165 def do_Tr(self, render: PDFStackT) -> None: 

1166 """Set the text rendering mode""" 

1167 render_i = safe_int(render) 

1168 

1169 if render_i is None: 

1170 log.warning( 

1171 "Could not set text rendering mode because %r is an invalid int value", 

1172 render, 

1173 ) 

1174 else: 

1175 self.textstate.render = render_i 

1176 

1177 def do_Ts(self, rise: PDFStackT) -> None: 

1178 """Set the text rise 

1179 

1180 :param rise: a number expressed in unscaled text space units 

1181 """ 

1182 rise_f = safe_float(rise) 

1183 

1184 if rise_f is None: 

1185 log.warning( 

1186 "Could not set text rise because %r is an invalid float value", 

1187 rise, 

1188 ) 

1189 else: 

1190 self.textstate.rise = rise_f 

1191 

1192 def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1193 """Move to the start of the next line 

1194 

1195 Offset from the start of the current line by (tx , ty). 

1196 """ 

1197 tx_ = safe_float(tx) 

1198 ty_ = safe_float(ty) 

1199 if tx_ is not None and ty_ is not None: 

1200 (a, b, c, d, e, f) = self.textstate.matrix 

1201 e_new = tx_ * a + ty_ * c + e 

1202 f_new = tx_ * b + ty_ * d + f 

1203 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1204 

1205 elif settings.STRICT: 

1206 raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td") 

1207 

1208 self.textstate.linematrix = (0, 0) 

1209 

1210 def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1211 """Move to the start of the next line. 

1212 

1213 offset from the start of the current line by (tx , ty). As a side effect, this 

1214 operator sets the leading parameter in the text state. 

1215 """ 

1216 tx_ = safe_float(tx) 

1217 ty_ = safe_float(ty) 

1218 

1219 if tx_ is not None and ty_ is not None: 

1220 (a, b, c, d, e, f) = self.textstate.matrix 

1221 e_new = tx_ * a + ty_ * c + e 

1222 f_new = tx_ * b + ty_ * d + f 

1223 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1224 

1225 elif settings.STRICT: 

1226 raise PDFValueError("Invalid offset ({tx}, {ty}) for TD") 

1227 

1228 if ty_ is not None: 

1229 self.textstate.leading = ty_ 

1230 

1231 self.textstate.linematrix = (0, 0) 

1232 

1233 def do_Tm( 

1234 self, 

1235 a: PDFStackT, 

1236 b: PDFStackT, 

1237 c: PDFStackT, 

1238 d: PDFStackT, 

1239 e: PDFStackT, 

1240 f: PDFStackT, 

1241 ) -> None: 

1242 """Set text matrix and text line matrix""" 

1243 values = (a, b, c, d, e, f) 

1244 matrix = safe_matrix(*values) 

1245 

1246 if matrix is None: 

1247 log.warning( 

1248 "Could not set text matrix because " 

1249 "not all values in %r can be parsed as floats", 

1250 values, 

1251 ) 

1252 else: 

1253 self.textstate.matrix = matrix 

1254 self.textstate.linematrix = (0, 0) 

1255 

1256 def do_T_a(self) -> None: 

1257 """Move to start of next text line""" 

1258 (a, b, c, d, e, f) = self.textstate.matrix 

1259 self.textstate.matrix = ( 

1260 a, 

1261 b, 

1262 c, 

1263 d, 

1264 self.textstate.leading * c + e, 

1265 self.textstate.leading * d + f, 

1266 ) 

1267 self.textstate.linematrix = (0, 0) 

1268 

1269 def do_TJ(self, seq: PDFStackT) -> None: 

1270 """Show text, allowing individual glyph positioning""" 

1271 if self.textstate.font is None: 

1272 if settings.STRICT: 

1273 raise PDFInterpreterError("No font specified!") 

1274 return 

1275 self.device.render_string( 

1276 self.textstate, 

1277 cast(PDFTextSeq, seq), 

1278 self.graphicstate.ncs, 

1279 self.graphicstate.copy(), 

1280 ) 

1281 

1282 def do_Tj(self, s: PDFStackT) -> None: 

1283 """Show text""" 

1284 self.do_TJ([s]) 

1285 

1286 def do__q(self, s: PDFStackT) -> None: 

1287 """Move to next line and show text 

1288 

1289 The ' (single quote) operator. 

1290 """ 

1291 self.do_T_a() 

1292 self.do_TJ([s]) 

1293 

1294 def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: 

1295 """Set word and character spacing, move to next line, and show text 

1296 

1297 The " (double quote) operator. 

1298 """ 

1299 self.do_Tw(aw) 

1300 self.do_Tc(ac) 

1301 self.do_TJ([s]) 

1302 

1303 def do_BI(self) -> None: 

1304 """Begin inline image object""" 

1305 

1306 def do_ID(self) -> None: 

1307 """Begin inline image data""" 

1308 

1309 def do_EI(self, obj: PDFStackT) -> None: 

1310 """End inline image object""" 

1311 if isinstance(obj, PDFStream) and "W" in obj and "H" in obj: 

1312 iobjid = str(id(obj)) 

1313 self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1314 self.device.render_image(iobjid, obj) 

1315 self.device.end_figure(iobjid) 

1316 

1317 def do_Do(self, xobjid_arg: PDFStackT) -> None: 

1318 """Invoke named XObject""" 

1319 xobjid = literal_name(xobjid_arg) 

1320 try: 

1321 xobj = stream_value(self.xobjmap[xobjid]) 

1322 except KeyError as err: 

1323 if settings.STRICT: 

1324 raise PDFInterpreterError(f"Undefined xobject id: {xobjid!r}") from err 

1325 return 

1326 log.debug("Processing xobj: %r", xobj) 

1327 subtype = xobj.get("Subtype") 

1328 if subtype is LITERAL_FORM and "BBox" in xobj: 

1329 interpreter = self.subinterp() 

1330 bbox = cast(Rect, list_value(xobj["BBox"])) 

1331 matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) 

1332 # According to PDF reference 1.7 section 4.9.1, XObjects in 

1333 # earlier PDFs (prior to v1.2) use the page's Resources entry 

1334 # instead of having their own Resources entry. 

1335 xobjres = xobj.get("Resources") 

1336 resources = dict_value(xobjres) if xobjres else self.resources.copy() 

1337 self.device.begin_figure(xobjid, bbox, matrix) 

1338 interpreter.render_contents( 

1339 resources, 

1340 [xobj], 

1341 ctm=mult_matrix(matrix, self.ctm), 

1342 ) 

1343 self.device.end_figure(xobjid) 

1344 elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: 

1345 self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1346 self.device.render_image(xobjid, xobj) 

1347 self.device.end_figure(xobjid) 

1348 else: 

1349 # unsupported xobject type. 

1350 pass 

1351 

1352 def process_page(self, page: PDFPage) -> None: 

1353 log.debug("Processing page: %r", page) 

1354 (x0, y0, x1, y1) = page.mediabox 

1355 if page.rotate == 90: 

1356 ctm = (0, -1, 1, 0, -y0, x1) 

1357 elif page.rotate == 180: 

1358 ctm = (-1, 0, 0, -1, x1, y1) 

1359 elif page.rotate == 270: 

1360 ctm = (0, 1, -1, 0, y1, -x0) 

1361 else: 

1362 ctm = (1, 0, 0, 1, -x0, -y0) 

1363 self.device.begin_page(page, ctm) 

1364 self.render_contents(page.resources, page.contents, ctm=ctm) 

1365 self.device.end_page(page) 

1366 

1367 def render_contents( 

1368 self, 

1369 resources: dict[object, object], 

1370 streams: Sequence[object], 

1371 ctm: Matrix = MATRIX_IDENTITY, 

1372 ) -> None: 

1373 """Render the content streams. 

1374 

1375 This method may be called recursively. 

1376 """ 

1377 log.debug( 

1378 "render_contents: resources=%r, streams=%r, ctm=%r", 

1379 resources, 

1380 streams, 

1381 ctm, 

1382 ) 

1383 self.init_resources(resources) 

1384 self.init_state(ctm) 

1385 self.execute(list_value(streams)) 

1386 

1387 def execute(self, streams: Sequence[object]) -> None: 

1388 # Detect and prevent circular references in content streams 

1389 # (including Form XObjects). 

1390 # We track stream IDs being executed in the current interpreter and 

1391 # all parent interpreters. If a stream is already being processed 

1392 # in the call stack, we skip 

1393 # it to prevent infinite recursion (CWE-835 vulnerability). 

1394 valid_streams: list[PDFStream] = [] 

1395 self.stream_ids.clear() 

1396 for obj in streams: 

1397 stream = stream_value(obj) 

1398 if stream.objid is None: 

1399 # Inline streams without object IDs can't be tracked for circular refs 

1400 log.warning( 

1401 "Execute called on non-indirect object (inline image?) %r", stream 

1402 ) 

1403 continue 

1404 if stream.objid in self.parent_stream_ids: 

1405 log.warning( 

1406 "Refusing to execute circular reference to content stream %d", 

1407 stream.objid, 

1408 ) 

1409 else: 

1410 valid_streams.append(stream) 

1411 self.stream_ids.add(stream.objid) 

1412 try: 

1413 parser = PDFContentParser(valid_streams) 

1414 except PSEOF: 

1415 # empty page 

1416 return 

1417 while True: 

1418 try: 

1419 (_, obj) = parser.nextobject() 

1420 except PSEOF: 

1421 break 

1422 if isinstance(obj, PSKeyword): 

1423 name = keyword_name(obj) 

1424 method = "do_{}".format( 

1425 name.replace("*", "_a") 

1426 .replace('"', "_w") 

1427 .replace( 

1428 "'", 

1429 "_q", 

1430 ) 

1431 ) 

1432 if hasattr(self, method): 

1433 func = getattr(self, method) 

1434 nargs = func.__code__.co_argcount - 1 

1435 if nargs: 

1436 args = self.pop(nargs) 

1437 log.debug("exec: %s %r", name, args) 

1438 if len(args) == nargs: 

1439 func(*args) 

1440 else: 

1441 log.debug("exec: %s", name) 

1442 func() 

1443 elif settings.STRICT: 

1444 error_msg = f"Unknown operator: {name!r}" 

1445 raise PDFInterpreterError(error_msg) 

1446 else: 

1447 self.push(obj)