Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/pdfinterp.py: 85%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

739 statements  

1import logging 

2import re 

3from collections.abc import Mapping, Sequence 

4from io import BytesIO 

5from typing import Union, cast 

6 

7from pdfminer import settings 

8from pdfminer.casting import safe_cmyk, safe_float, safe_int, safe_matrix, safe_rgb 

9from pdfminer.cmapdb import CMap, CMapBase, CMapDB 

10from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace 

11from pdfminer.pdfdevice import PDFDevice, PDFTextSeq 

12from pdfminer.pdfexceptions import PDFException, PDFValueError 

13from pdfminer.pdffont import ( 

14 PDFCIDFont, 

15 PDFFont, 

16 PDFFontError, 

17 PDFTrueTypeFont, 

18 PDFType1Font, 

19 PDFType3Font, 

20) 

21from pdfminer.pdfpage import PDFPage 

22from pdfminer.pdftypes import ( 

23 LITERALS_ASCII85_DECODE, 

24 PDFObjRef, 

25 PDFStream, 

26 dict_value, 

27 int_value, 

28 list_value, 

29 resolve1, 

30 stream_value, 

31) 

32from pdfminer.psexceptions import PSEOF, PSTypeError 

33from pdfminer.psparser import ( 

34 KWD, 

35 LIT, 

36 PSKeyword, 

37 PSLiteral, 

38 PSStackParser, 

39 PSStackType, 

40 keyword_name, 

41 literal_name, 

42) 

43from pdfminer.utils import ( 

44 MATRIX_IDENTITY, 

45 Matrix, 

46 PathSegment, 

47 Point, 

48 Rect, 

49 choplist, 

50 mult_matrix, 

51) 

52 

53log = logging.getLogger(__name__) 

54 

55 

56class PDFResourceError(PDFException): 

57 pass 

58 

59 

60class PDFInterpreterError(PDFException): 

61 pass 

62 

63 

64LITERAL_PDF = LIT("PDF") 

65LITERAL_TEXT = LIT("Text") 

66LITERAL_FONT = LIT("Font") 

67LITERAL_FORM = LIT("Form") 

68LITERAL_IMAGE = LIT("Image") 

69 

70 

71class PDFTextState: 

72 matrix: Matrix 

73 linematrix: Point 

74 

75 def __init__(self) -> None: 

76 self.font: PDFFont | None = None 

77 self.fontsize: float = 0 

78 self.charspace: float = 0 

79 self.wordspace: float = 0 

80 self.scaling: float = 100 

81 self.leading: float = 0 

82 self.render: int = 0 

83 self.rise: float = 0 

84 self.reset() 

85 # self.matrix is set 

86 # self.linematrix is set 

87 

88 def __repr__(self) -> str: 

89 return ( 

90 f"<PDFTextState: font={self.font!r}, " 

91 f"fontsize={self.fontsize!r}, " 

92 f"charspace={self.charspace!r}, " 

93 f"wordspace={self.wordspace!r}, " 

94 f"scaling={self.scaling!r}, " 

95 f"leading={self.leading!r}, " 

96 f"render={self.render!r}, " 

97 f"rise={self.rise!r}, " 

98 f"matrix={self.matrix!r}, " 

99 f"linematrix={self.linematrix!r}>" 

100 ) 

101 

102 def copy(self) -> "PDFTextState": 

103 obj = PDFTextState() 

104 obj.font = self.font 

105 obj.fontsize = self.fontsize 

106 obj.charspace = self.charspace 

107 obj.wordspace = self.wordspace 

108 obj.scaling = self.scaling 

109 obj.leading = self.leading 

110 obj.render = self.render 

111 obj.rise = self.rise 

112 obj.matrix = self.matrix 

113 obj.linematrix = self.linematrix 

114 return obj 

115 

116 def reset(self) -> None: 

117 self.matrix = MATRIX_IDENTITY 

118 self.linematrix = (0, 0) 

119 

120 

121# Standard color types (used standalone or as base for uncolored patterns) 

122StandardColor = Union[ 

123 float, # Greyscale 

124 tuple[float, float, float], # R, G, B 

125 tuple[float, float, float, float], # C, M, Y, K 

126] 

127 

128# Complete color type including patterns 

129Color = Union[ 

130 StandardColor, # Standard colors (gray, RGB, CMYK) 

131 str, # Pattern name (colored pattern, PaintType=1) 

132 tuple[ 

133 StandardColor, str 

134 ], # (base_color, pattern_name) (uncolored pattern, PaintType=2) 

135] 

136 

137 

138class PDFGraphicState: 

139 def __init__(self) -> None: 

140 self.linewidth: float = 0 

141 self.linecap: object | None = None 

142 self.linejoin: object | None = None 

143 self.miterlimit: object | None = None 

144 self.dash: tuple[object, object] | None = None 

145 self.intent: object | None = None 

146 self.flatness: object | None = None 

147 

148 # stroking color 

149 self.scolor: Color = 0 

150 self.scs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

151 

152 # non stroking color 

153 self.ncolor: Color = 0 

154 self.ncs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

155 

156 def copy(self) -> "PDFGraphicState": 

157 obj = PDFGraphicState() 

158 obj.linewidth = self.linewidth 

159 obj.linecap = self.linecap 

160 obj.linejoin = self.linejoin 

161 obj.miterlimit = self.miterlimit 

162 obj.dash = self.dash 

163 obj.intent = self.intent 

164 obj.flatness = self.flatness 

165 obj.scolor = self.scolor 

166 obj.scs = self.scs 

167 obj.ncolor = self.ncolor 

168 obj.ncs = self.ncs 

169 return obj 

170 

171 def __repr__(self) -> str: 

172 return ( 

173 f"<PDFGraphicState: " 

174 f"linewidth={self.linewidth!r}, " 

175 f"linecap={self.linecap!r}, " 

176 f"linejoin={self.linejoin!r}, " 

177 f"miterlimit={self.miterlimit!r}, " 

178 f"dash={self.dash!r}, " 

179 f"intent={self.intent!r}, " 

180 f"flatness={self.flatness!r}, " 

181 f"stroking color={self.scolor!r}, " 

182 f"non stroking color={self.ncolor!r}>" 

183 ) 

184 

185 

186class PDFResourceManager: 

187 """Repository of shared resources. 

188 

189 ResourceManager facilitates reuse of shared resources 

190 such as fonts and images so that large objects are not 

191 allocated multiple times. 

192 """ 

193 

194 def __init__(self, caching: bool = True) -> None: 

195 self.caching = caching 

196 self._cached_fonts: dict[object, PDFFont] = {} 

197 

198 def get_procset(self, procs: Sequence[object]) -> None: 

199 for proc in procs: 

200 if proc is LITERAL_PDF or proc is LITERAL_TEXT: 

201 pass 

202 else: 

203 pass 

204 

205 def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: 

206 try: 

207 return CMapDB.get_cmap(cmapname) 

208 except CMapDB.CMapNotFound: 

209 if strict: 

210 raise 

211 return CMap() 

212 

213 def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: 

214 if objid and objid in self._cached_fonts: 

215 font = self._cached_fonts[objid] 

216 else: 

217 log.debug("get_font: create: objid=%r, spec=%r", objid, spec) 

218 if settings.STRICT and spec["Type"] is not LITERAL_FONT: 

219 raise PDFFontError("Type is not /Font") 

220 # Create a Font object. 

221 if "Subtype" in spec: 

222 subtype = literal_name(spec["Subtype"]) 

223 else: 

224 if settings.STRICT: 

225 raise PDFFontError("Font Subtype is not specified.") 

226 subtype = "Type1" 

227 if subtype in ("Type1", "MMType1"): 

228 # Type1 Font 

229 font = PDFType1Font(self, spec) 

230 elif subtype == "TrueType": 

231 # TrueType Font 

232 font = PDFTrueTypeFont(self, spec) 

233 elif subtype == "Type3": 

234 # Type3 Font 

235 font = PDFType3Font(self, spec) 

236 elif subtype in ("CIDFontType0", "CIDFontType2"): 

237 # CID Font 

238 font = PDFCIDFont(self, spec) 

239 elif subtype == "Type0": 

240 # Type0 Font 

241 dfonts = list_value(spec["DescendantFonts"]) 

242 assert dfonts 

243 subspec = dict_value(dfonts[0]).copy() 

244 for k in ("Encoding", "ToUnicode"): 

245 if k in spec: 

246 subspec[k] = resolve1(spec[k]) 

247 font = self.get_font(None, subspec) 

248 else: 

249 if settings.STRICT: 

250 raise PDFFontError(f"Invalid Font spec: {spec!r}") 

251 font = PDFType1Font(self, spec) # this is so wrong! 

252 if objid and self.caching: 

253 self._cached_fonts[objid] = font 

254 return font 

255 

256 

257class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): 

258 def __init__(self, streams: Sequence[object]) -> None: 

259 self.streams = streams 

260 self.istream = 0 

261 # PSStackParser.__init__(fp=None) is safe only because we've overloaded 

262 # all the methods that would attempt to access self.fp without first 

263 # calling self.fillfp(). 

264 PSStackParser.__init__(self, None) # type: ignore[arg-type] 

265 

266 def fillfp(self) -> bool: 

267 if not self.fp: 

268 if self.istream < len(self.streams): 

269 strm = stream_value(self.streams[self.istream]) 

270 self.istream += 1 

271 else: 

272 raise PSEOF("Unexpected EOF, file truncated?") 

273 self.fp = BytesIO(strm.get_data()) 

274 return True 

275 return False 

276 

277 def seek(self, pos: int) -> None: 

278 self.fillfp() 

279 PSStackParser.seek(self, pos) 

280 

281 def fillbuf(self) -> bool: 

282 if self.charpos < len(self.buf): 

283 return False 

284 new_stream = False 

285 while 1: 

286 new_stream = self.fillfp() 

287 self.bufpos = self.fp.tell() 

288 self.buf = self.fp.read(self.BUFSIZ) 

289 if self.buf: 

290 break 

291 self.fp = None # type: ignore[assignment] 

292 self.charpos = 0 

293 return new_stream 

294 

295 def get_inline_data(self, pos: int, target: bytes = b"EI") -> tuple[int, bytes]: 

296 self.seek(pos) 

297 i = 0 

298 data = b"" 

299 while i <= len(target): 

300 self.fillbuf() 

301 if i: 

302 ci = self.buf[self.charpos] 

303 c = bytes((ci,)) 

304 data += c 

305 self.charpos += 1 

306 if (len(target) <= i and c.isspace()) or ( 

307 i < len(target) and c == (bytes((target[i],))) 

308 ): 

309 i += 1 

310 else: 

311 i = 0 

312 else: 

313 try: 

314 j = self.buf.index(target[0], self.charpos) 

315 data += self.buf[self.charpos : j + 1] 

316 self.charpos = j + 1 

317 i = 1 

318 except ValueError: 

319 data += self.buf[self.charpos :] 

320 self.charpos = len(self.buf) 

321 data = data[: -(len(target) + 1)] # strip the last part 

322 data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data) 

323 return (pos, data) 

324 

325 def flush(self) -> None: 

326 self.add_results(*self.popall()) 

327 

328 KEYWORD_BI = KWD(b"BI") 

329 KEYWORD_ID = KWD(b"ID") 

330 KEYWORD_EI = KWD(b"EI") 

331 

332 def do_keyword(self, pos: int, token: PSKeyword) -> None: 

333 if token is self.KEYWORD_BI: 

334 # inline image within a content stream 

335 self.start_type(pos, "inline") 

336 elif token is self.KEYWORD_ID: 

337 try: 

338 (_, objs) = self.end_type("inline") 

339 if len(objs) % 2 != 0: 

340 error_msg = f"Invalid dictionary construct: {objs!r}" 

341 raise PSTypeError(error_msg) 

342 d = {literal_name(k): resolve1(v) for (k, v) in choplist(2, objs)} 

343 eos = b"EI" 

344 filter = d.get("F") 

345 if filter is not None: 

346 if isinstance(filter, PSLiteral): 

347 filter = [filter] 

348 if filter[0] in LITERALS_ASCII85_DECODE: 

349 eos = b"~>" 

350 (pos, data) = self.get_inline_data(pos + len(b"ID "), target=eos) 

351 if eos != b"EI": # it may be necessary for decoding 

352 data += eos 

353 obj = PDFStream(d, data) 

354 self.push((pos, obj)) 

355 if eos == b"EI": # otherwise it is still in the stream 

356 self.push((pos, self.KEYWORD_EI)) 

357 except PSTypeError: 

358 if settings.STRICT: 

359 raise 

360 else: 

361 self.push((pos, token)) 

362 

363 

364# Types that may appear on the PDF argument stack. 

365PDFStackT = PSStackType[PDFStream] 

366 

367 

368class PDFPageInterpreter: 

369 """Processor for the content of a PDF page 

370 

371 Reference: PDF Reference, Appendix A, Operator Summary 

372 """ 

373 

374 def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None: 

375 self.rsrcmgr = rsrcmgr 

376 self.device = device 

377 # Track stream IDs currently being executed to detect circular references 

378 self.stream_ids: set[int] = set() 

379 # Track stream IDs from parent interpreters in the call stack 

380 self.parent_stream_ids: set[int] = set() 

381 

382 def dup(self) -> "PDFPageInterpreter": 

383 return self.__class__(self.rsrcmgr, self.device) 

384 

385 def subinterp(self) -> "PDFPageInterpreter": 

386 """Create a sub-interpreter for processing nested content streams. 

387 

388 This is used when invoking Form XObjects to prevent circular references. 

389 Unlike dup(), this method propagates the stream ID tracking from the 

390 parent interpreter, allowing detection of circular references across 

391 nested XObject invocations. 

392 """ 

393 interp = self.dup() 

394 interp.parent_stream_ids.update(self.parent_stream_ids) 

395 interp.parent_stream_ids.update(self.stream_ids) 

396 return interp 

397 

398 def init_resources(self, resources: dict[object, object]) -> None: 

399 """Prepare the fonts and XObjects listed in the Resource attribute.""" 

400 self.resources = resources 

401 self.fontmap: dict[object, PDFFont] = {} 

402 self.xobjmap = {} 

403 self.csmap: dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() 

404 if not resources: 

405 return 

406 

407 def get_colorspace(spec: object) -> PDFColorSpace | None: 

408 if isinstance(spec, list): 

409 name = literal_name(spec[0]) 

410 else: 

411 name = literal_name(spec) 

412 if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2: 

413 return PDFColorSpace(name, int_value(stream_value(spec[1])["N"])) 

414 elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2: 

415 return PDFColorSpace(name, len(list_value(spec[1]))) 

416 else: 

417 return PREDEFINED_COLORSPACE.get(name) 

418 

419 for k, v in dict_value(resources).items(): 

420 log.debug("Resource: %r: %r", k, v) 

421 if k == "Font": 

422 for fontid, spec in dict_value(v).items(): 

423 objid = None 

424 if isinstance(spec, PDFObjRef): 

425 objid = spec.objid 

426 spec = dict_value(spec) 

427 self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) 

428 elif k == "ColorSpace": 

429 for csid, spec in dict_value(v).items(): 

430 colorspace = get_colorspace(resolve1(spec)) 

431 if colorspace is not None: 

432 self.csmap[csid] = colorspace 

433 elif k == "ProcSet": 

434 self.rsrcmgr.get_procset(list_value(v)) 

435 elif k == "XObject": 

436 for xobjid, xobjstrm in dict_value(v).items(): 

437 self.xobjmap[xobjid] = xobjstrm 

438 

439 def init_state(self, ctm: Matrix) -> None: 

440 """Initialize the text and graphic states for rendering a page.""" 

441 # gstack: stack for graphical states. 

442 self.gstack: list[tuple[Matrix, PDFTextState, PDFGraphicState]] = [] 

443 self.ctm = ctm 

444 self.device.set_ctm(self.ctm) 

445 self.textstate = PDFTextState() 

446 self.graphicstate = PDFGraphicState() 

447 self.curpath: list[PathSegment] = [] 

448 # argstack: stack for command arguments. 

449 self.argstack: list[PDFStackT] = [] 

450 

451 def push(self, obj: PDFStackT) -> None: 

452 self.argstack.append(obj) 

453 

454 def pop(self, n: int) -> list[PDFStackT]: 

455 if n == 0: 

456 return [] 

457 x = self.argstack[-n:] 

458 self.argstack = self.argstack[:-n] 

459 return x 

460 

461 def get_current_state(self) -> tuple[Matrix, PDFTextState, PDFGraphicState]: 

462 return (self.ctm, self.textstate.copy(), self.graphicstate.copy()) 

463 

464 def set_current_state( 

465 self, 

466 state: tuple[Matrix, PDFTextState, PDFGraphicState], 

467 ) -> None: 

468 (self.ctm, self.textstate, self.graphicstate) = state 

469 self.device.set_ctm(self.ctm) 

470 

471 def do_q(self) -> None: 

472 """Save graphics state""" 

473 self.gstack.append(self.get_current_state()) 

474 

475 def do_Q(self) -> None: 

476 """Restore graphics state""" 

477 if self.gstack: 

478 self.set_current_state(self.gstack.pop()) 

479 

480 def do_cm( 

481 self, 

482 a1: PDFStackT, 

483 b1: PDFStackT, 

484 c1: PDFStackT, 

485 d1: PDFStackT, 

486 e1: PDFStackT, 

487 f1: PDFStackT, 

488 ) -> None: 

489 """Concatenate matrix to current transformation matrix""" 

490 matrix = safe_matrix(a1, b1, c1, d1, e1, f1) 

491 

492 if matrix is None: 

493 log.warning( 

494 "Cannot concatenate matrix to current transformation matrix " 

495 "because not all values in %r can be parsed as floats", 

496 (a1, b1, c1, d1, e1, f1), 

497 ) 

498 else: 

499 self.ctm = mult_matrix(matrix, self.ctm) 

500 self.device.set_ctm(self.ctm) 

501 

502 def do_w(self, linewidth: PDFStackT) -> None: 

503 """Set line width""" 

504 linewidth_f = safe_float(linewidth) 

505 if linewidth_f is None: 

506 log.warning( 

507 "Cannot set line width because %r is an invalid float value", 

508 linewidth, 

509 ) 

510 else: 

511 scale = (self.ctm[0] ** 2 + self.ctm[1] ** 2) ** 0.5 

512 self.graphicstate.linewidth = linewidth_f * scale 

513 

514 def do_J(self, linecap: PDFStackT) -> None: 

515 """Set line cap style""" 

516 self.graphicstate.linecap = linecap 

517 

518 def do_j(self, linejoin: PDFStackT) -> None: 

519 """Set line join style""" 

520 self.graphicstate.linejoin = linejoin 

521 

522 def do_M(self, miterlimit: PDFStackT) -> None: 

523 """Set miter limit""" 

524 self.graphicstate.miterlimit = miterlimit 

525 

526 def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: 

527 """Set line dash pattern""" 

528 self.graphicstate.dash = (dash, phase) 

529 

530 def do_ri(self, intent: PDFStackT) -> None: 

531 """Set color rendering intent""" 

532 self.graphicstate.intent = intent 

533 

534 def do_i(self, flatness: PDFStackT) -> None: 

535 """Set flatness tolerance""" 

536 self.graphicstate.flatness = flatness 

537 

538 def do_gs(self, name: PDFStackT) -> None: 

539 """Set parameters from graphics state parameter dictionary""" 

540 # TODO 

541 

542 def do_m(self, x: PDFStackT, y: PDFStackT) -> None: 

543 """Begin new subpath""" 

544 x_f = safe_float(x) 

545 y_f = safe_float(y) 

546 

547 if x_f is None or y_f is None: 

548 point = ("m", x, y) 

549 log.warning( 

550 "Cannot start new subpath because not all values " 

551 "in %r can be parsed as floats", 

552 point, 

553 ) 

554 else: 

555 point = ("m", x_f, y_f) 

556 self.curpath.append(point) 

557 

558 def do_l(self, x: PDFStackT, y: PDFStackT) -> None: 

559 """Append straight line segment to path""" 

560 x_f = safe_float(x) 

561 y_f = safe_float(y) 

562 if x_f is None or y_f is None: 

563 point = ("l", x, y) 

564 log.warning( 

565 "Cannot append straight line segment to path " 

566 "because not all values in %r can be parsed as floats", 

567 point, 

568 ) 

569 else: 

570 point = ("l", x_f, y_f) 

571 self.curpath.append(point) 

572 

573 def do_c( 

574 self, 

575 x1: PDFStackT, 

576 y1: PDFStackT, 

577 x2: PDFStackT, 

578 y2: PDFStackT, 

579 x3: PDFStackT, 

580 y3: PDFStackT, 

581 ) -> None: 

582 """Append curved segment to path (three control points)""" 

583 x1_f = safe_float(x1) 

584 y1_f = safe_float(y1) 

585 x2_f = safe_float(x2) 

586 y2_f = safe_float(y2) 

587 x3_f = safe_float(x3) 

588 y3_f = safe_float(y3) 

589 if ( 

590 x1_f is None 

591 or y1_f is None 

592 or x2_f is None 

593 or y2_f is None 

594 or x3_f is None 

595 or y3_f is None 

596 ): 

597 point = ("c", x1, y1, x2, y2, x3, y3) 

598 log.warning( 

599 "Cannot append curved segment to path " 

600 "because not all values in %r can be parsed as floats", 

601 point, 

602 ) 

603 else: 

604 point = ("c", x1_f, y1_f, x2_f, y2_f, x3_f, y3_f) 

605 self.curpath.append(point) 

606 

607 def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

608 """Append curved segment to path (initial point replicated)""" 

609 x2_f = safe_float(x2) 

610 y2_f = safe_float(y2) 

611 x3_f = safe_float(x3) 

612 y3_f = safe_float(y3) 

613 if x2_f is None or y2_f is None or x3_f is None or y3_f is None: 

614 point = ("v", x2, y2, x3, y3) 

615 log.warning( 

616 "Cannot append curved segment to path " 

617 "because not all values in %r can be parsed as floats", 

618 point, 

619 ) 

620 else: 

621 point = ("v", x2_f, y2_f, x3_f, y3_f) 

622 self.curpath.append(point) 

623 

624 def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

625 """Append curved segment to path (final point replicated)""" 

626 x1_f = safe_float(x1) 

627 y1_f = safe_float(y1) 

628 x3_f = safe_float(x3) 

629 y3_f = safe_float(y3) 

630 if x1_f is None or y1_f is None or x3_f is None or y3_f is None: 

631 point = ("y", x1, y1, x3, y3) 

632 log.warning( 

633 "Cannot append curved segment to path " 

634 "because not all values in %r can be parsed as floats", 

635 point, 

636 ) 

637 else: 

638 point = ("y", x1_f, y1_f, x3_f, y3_f) 

639 self.curpath.append(point) 

640 

641 def do_h(self) -> None: 

642 """Close subpath""" 

643 self.curpath.append(("h",)) 

644 

645 def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None: 

646 """Append rectangle to path""" 

647 x_f = safe_float(x) 

648 y_f = safe_float(y) 

649 w_f = safe_float(w) 

650 h_f = safe_float(h) 

651 

652 if x_f is None or y_f is None or w_f is None or h_f is None: 

653 values = (x, y, w, h) 

654 log.warning( 

655 "Cannot append rectangle to path " 

656 "because not all values in %r can be parsed as floats", 

657 values, 

658 ) 

659 else: 

660 self.curpath.append(("m", x_f, y_f)) 

661 self.curpath.append(("l", x_f + w_f, y_f)) 

662 self.curpath.append(("l", x_f + w_f, y_f + h_f)) 

663 self.curpath.append(("l", x_f, y_f + h_f)) 

664 self.curpath.append(("h",)) 

665 

666 def do_S(self) -> None: 

667 """Stroke path""" 

668 self.device.paint_path(self.graphicstate, True, False, False, self.curpath) 

669 self.curpath = [] 

670 

671 def do_s(self) -> None: 

672 """Close and stroke path""" 

673 self.do_h() 

674 self.do_S() 

675 

676 def do_f(self) -> None: 

677 """Fill path using nonzero winding number rule""" 

678 self.device.paint_path(self.graphicstate, False, True, False, self.curpath) 

679 self.curpath = [] 

680 

681 def do_F(self) -> None: 

682 """Fill path using nonzero winding number rule (obsolete)""" 

683 

684 def do_f_a(self) -> None: 

685 """Fill path using even-odd rule""" 

686 self.device.paint_path(self.graphicstate, False, True, True, self.curpath) 

687 self.curpath = [] 

688 

689 def do_B(self) -> None: 

690 """Fill and stroke path using nonzero winding number rule""" 

691 self.device.paint_path(self.graphicstate, True, True, False, self.curpath) 

692 self.curpath = [] 

693 

694 def do_B_a(self) -> None: 

695 """Fill and stroke path using even-odd rule""" 

696 self.device.paint_path(self.graphicstate, True, True, True, self.curpath) 

697 self.curpath = [] 

698 

699 def do_b(self) -> None: 

700 """Close, fill, and stroke path using nonzero winding number rule""" 

701 self.do_h() 

702 self.do_B() 

703 

704 def do_b_a(self) -> None: 

705 """Close, fill, and stroke path using even-odd rule""" 

706 self.do_h() 

707 self.do_B_a() 

708 

709 def do_n(self) -> None: 

710 """End path without filling or stroking""" 

711 self.curpath = [] 

712 

713 def do_W(self) -> None: 

714 """Set clipping path using nonzero winding number rule""" 

715 

716 def do_W_a(self) -> None: 

717 """Set clipping path using even-odd rule""" 

718 

719 def do_CS(self, name: PDFStackT) -> None: 

720 """Set color space for stroking operations 

721 

722 Introduced in PDF 1.1 

723 """ 

724 try: 

725 self.graphicstate.scs = self.csmap[literal_name(name)] 

726 except KeyError as err: 

727 if settings.STRICT: 

728 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

729 

730 def do_cs(self, name: PDFStackT) -> None: 

731 """Set color space for nonstroking operations""" 

732 try: 

733 self.graphicstate.ncs = self.csmap[literal_name(name)] 

734 except KeyError as err: 

735 if settings.STRICT: 

736 raise PDFInterpreterError(f"Undefined ColorSpace: {name!r}") from err 

737 

738 def do_G(self, gray: PDFStackT) -> None: 

739 """Set gray level for stroking operations""" 

740 gray_f = safe_float(gray) 

741 

742 if gray_f is None: 

743 log.warning( 

744 "Cannot set gray level because %r is an invalid float value", 

745 gray, 

746 ) 

747 else: 

748 self.graphicstate.scolor = gray_f 

749 self.graphicstate.scs = self.csmap["DeviceGray"] 

750 

751 def do_g(self, gray: PDFStackT) -> None: 

752 """Set gray level for nonstroking operations""" 

753 gray_f = safe_float(gray) 

754 

755 if gray_f is None: 

756 log.warning( 

757 "Cannot set gray level because %r is an invalid float value", 

758 gray, 

759 ) 

760 else: 

761 self.graphicstate.ncolor = gray_f 

762 self.graphicstate.ncs = self.csmap["DeviceGray"] 

763 

764 def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

765 """Set RGB color for stroking operations""" 

766 rgb = safe_rgb(r, g, b) 

767 

768 if rgb is None: 

769 log.warning( 

770 "Cannot set RGB stroke color " 

771 "because not all values in %r can be parsed as floats", 

772 (r, g, b), 

773 ) 

774 else: 

775 self.graphicstate.scolor = rgb 

776 self.graphicstate.scs = self.csmap["DeviceRGB"] 

777 

778 def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

779 """Set RGB color for nonstroking operations""" 

780 rgb = safe_rgb(r, g, b) 

781 

782 if rgb is None: 

783 log.warning( 

784 "Cannot set RGB non-stroke color " 

785 "because not all values in %r can be parsed as floats", 

786 (r, g, b), 

787 ) 

788 else: 

789 self.graphicstate.ncolor = rgb 

790 self.graphicstate.ncs = self.csmap["DeviceRGB"] 

791 

792 def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

793 """Set CMYK color for stroking operations""" 

794 cmyk = safe_cmyk(c, m, y, k) 

795 

796 if cmyk is None: 

797 log.warning( 

798 "Cannot set CMYK stroke color " 

799 "because not all values in %r can be parsed as floats", 

800 (c, m, y, k), 

801 ) 

802 else: 

803 self.graphicstate.scolor = cmyk 

804 self.graphicstate.scs = self.csmap["DeviceCMYK"] 

805 

806 def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

807 """Set CMYK color for nonstroking operations""" 

808 cmyk = safe_cmyk(c, m, y, k) 

809 

810 if cmyk is None: 

811 log.warning( 

812 "Cannot set CMYK non-stroke color " 

813 "because not all values in %r can be parsed as floats", 

814 (c, m, y, k), 

815 ) 

816 else: 

817 self.graphicstate.ncolor = cmyk 

818 self.graphicstate.ncs = self.csmap["DeviceCMYK"] 

819 

820 def _parse_color_components( 

821 self, components: list[PDFStackT], context: str 

822 ) -> StandardColor | None: 

823 """Parse color components into StandardColor (gray, RGB, or CMYK). 

824 

825 Args: 

826 components: List of 1, 3, or 4 numeric color components 

827 context: Description for error messages (e.g., "stroke", "non-stroke") 

828 

829 Returns: 

830 Parsed color (float for gray, tuple for RGB/CMYK) or None if invalid 

831 """ 

832 if len(components) == 1: 

833 gray = safe_float(components[0]) 

834 if gray is None: 

835 log.warning( 

836 "Cannot set %s color: %r is an invalid float value", 

837 context, 

838 components[0], 

839 ) 

840 return gray 

841 

842 elif len(components) == 3: 

843 rgb = safe_rgb(*components) 

844 if rgb is None: 

845 log.warning( 

846 "Cannot set %s color: components %r cannot be parsed as RGB", 

847 context, 

848 components, 

849 ) 

850 return rgb 

851 

852 elif len(components) == 4: 

853 cmyk = safe_cmyk(*components) 

854 if cmyk is None: 

855 log.warning( 

856 "Cannot set %s color: components %r cannot be parsed as CMYK", 

857 context, 

858 components, 

859 ) 

860 return cmyk 

861 

862 else: 

863 log.warning( 

864 "Cannot set %s color: %d components specified, " 

865 "but only 1 (grayscale), 3 (RGB), and 4 (CMYK) are supported", 

866 context, 

867 len(components), 

868 ) 

869 return None 

870 

871 def do_SCN(self) -> None: 

872 """Set color for stroking operations. 

873 

874 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

875 and ISO 32000-2:2020 8.7.3 (PDF 2.0): 

876 - Colored patterns (PaintType=1): single operand (pattern name) 

877 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

878 """ 

879 n = self.graphicstate.scs.ncomponents 

880 

881 components = self.pop(n) 

882 if len(components) != n: 

883 log.warning( 

884 "Cannot set stroke color because expected %d components but got %r", 

885 n, 

886 components, 

887 ) 

888 

889 elif self.graphicstate.scs.name != "Pattern": 

890 # Standard colors (gray, RGB, CMYK) - common case 

891 color = self._parse_color_components(components, "stroke") 

892 if color is not None: 

893 self.graphicstate.scolor = color 

894 

895 elif len(components) >= 1: 

896 # Pattern color space (ISO 32000 8.7.3.2-3) 

897 # Last component is always the pattern name 

898 pattern_component = components[-1] 

899 

900 # Per spec: pattern name must be a name object (PSLiteral) 

901 if not isinstance(pattern_component, PSLiteral): 

902 log.warning( 

903 "Pattern color space requires name object (PSLiteral), " 

904 "got %s: %r. " 

905 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name SCN'. " 

906 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

907 "syntax 'c1...cn /name SCN'.", 

908 type(pattern_component).__name__, 

909 pattern_component, 

910 ) 

911 return 

912 

913 pattern_name = literal_name(pattern_component) 

914 

915 if len(components) == 1: 

916 # Colored tiling pattern (PaintType=1): just pattern name 

917 self.graphicstate.scolor = pattern_name 

918 log.debug("Set stroke pattern (colored): %s", pattern_name) 

919 else: 

920 # Uncolored tiling pattern (PaintType=2): 

921 # color components + pattern name 

922 base_color_components = components[:-1] 

923 

924 # Parse base color using shared logic 

925 base_color = self._parse_color_components( 

926 base_color_components, "stroke (uncolored pattern)" 

927 ) 

928 if base_color is None: 

929 return 

930 

931 # Store as tuple: (base_color, pattern_name) 

932 self.graphicstate.scolor = (base_color, pattern_name) 

933 log.debug( 

934 "Set stroke pattern (uncolored): %s + %s", base_color, pattern_name 

935 ) 

936 

937 def do_scn(self) -> None: 

938 """Set color for nonstroking operations. 

939 

940 Handles Pattern color spaces per ISO 32000-1:2008 4.5.5 (PDF 1.7) 

941 and ISO 32000-2:2020 §8.7.3 (PDF 2.0): 

942 - Colored patterns (PaintType=1): single operand (pattern name) 

943 - Uncolored patterns (PaintType=2): n+1 operands (colors + pattern name) 

944 """ 

945 n = self.graphicstate.ncs.ncomponents 

946 

947 components = self.pop(n) 

948 if len(components) != n: 

949 log.warning( 

950 "Cannot set non-stroke color because expected %d components but got %r", 

951 n, 

952 components, 

953 ) 

954 

955 elif self.graphicstate.ncs.name != "Pattern": 

956 # Standard colors (gray, RGB, CMYK) - common case 

957 color = self._parse_color_components(components, "non-stroke") 

958 if color is not None: 

959 self.graphicstate.ncolor = color 

960 

961 elif len(components) >= 1: 

962 # Pattern color space (ISO 32000 8.7.3.2-3) 

963 # Last component is always the pattern name 

964 pattern_component = components[-1] 

965 

966 # Per spec: pattern name must be a name object (PSLiteral) 

967 if not isinstance(pattern_component, PSLiteral): 

968 log.warning( 

969 "Pattern color space requires name object (PSLiteral), " 

970 "got %s: %r. " 

971 "Per ISO 32000 8.7.3.2, colored patterns use syntax '/name scn'. " 

972 "Per ISO 32000 8.7.3.3, uncolored patterns use " 

973 "syntax 'c1...cn /name scn'.", 

974 type(pattern_component).__name__, 

975 pattern_component, 

976 ) 

977 return 

978 

979 pattern_name = literal_name(pattern_component) 

980 

981 if len(components) == 1: 

982 # Colored tiling pattern (PaintType=1): just pattern name 

983 self.graphicstate.ncolor = pattern_name 

984 log.debug("Set non-stroke pattern (colored): %s", pattern_name) 

985 else: 

986 # Uncolored tiling pattern (PaintType=2): 

987 # color components + pattern name 

988 base_color_components = components[:-1] 

989 

990 # Parse base color using shared logic 

991 base_color = self._parse_color_components( 

992 base_color_components, "non-stroke (uncolored pattern)" 

993 ) 

994 if base_color is None: 

995 return 

996 

997 # Store as tuple: (base_color, pattern_name) 

998 self.graphicstate.ncolor = (base_color, pattern_name) 

999 log.debug( 

1000 "Set non-stroke pattern (uncolored): %s + %s", 

1001 base_color, 

1002 pattern_name, 

1003 ) 

1004 

1005 def do_SC(self) -> None: 

1006 """Set color for stroking operations""" 

1007 self.do_SCN() 

1008 

1009 def do_sc(self) -> None: 

1010 """Set color for nonstroking operations""" 

1011 self.do_scn() 

1012 

1013 def do_sh(self, name: object) -> None: 

1014 """Paint area defined by shading pattern""" 

1015 

1016 def do_BT(self) -> None: 

1017 """Begin text object 

1018 

1019 Initializing the text matrix, Tm, and the text line matrix, Tlm, to 

1020 the identity matrix. Text objects cannot be nested; a second BT cannot 

1021 appear before an ET. 

1022 """ 

1023 self.textstate.reset() 

1024 

1025 def do_ET(self) -> None: 

1026 """End a text object""" 

1027 

1028 def do_BX(self) -> None: 

1029 """Begin compatibility section""" 

1030 

1031 def do_EX(self) -> None: 

1032 """End compatibility section""" 

1033 

1034 def do_MP(self, tag: PDFStackT) -> None: 

1035 """Define marked-content point""" 

1036 if isinstance(tag, PSLiteral): 

1037 self.device.do_tag(tag) 

1038 else: 

1039 log.warning( 

1040 "Cannot define marked-content point because %r is not a PSLiteral", 

1041 tag, 

1042 ) 

1043 

1044 def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: 

1045 """Define marked-content point with property list""" 

1046 if isinstance(tag, PSLiteral): 

1047 self.device.do_tag(tag, props) 

1048 else: 

1049 log.warning( 

1050 "Cannot define marked-content point with property list " 

1051 "because %r is not a PSLiteral", 

1052 tag, 

1053 ) 

1054 

1055 def do_BMC(self, tag: PDFStackT) -> None: 

1056 """Begin marked-content sequence""" 

1057 if isinstance(tag, PSLiteral): 

1058 self.device.begin_tag(tag) 

1059 else: 

1060 log.warning( 

1061 "Cannot begin marked-content sequence because %r is not a PSLiteral", 

1062 tag, 

1063 ) 

1064 

1065 def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: 

1066 """Begin marked-content sequence with property list""" 

1067 if isinstance(tag, PSLiteral): 

1068 self.device.begin_tag(tag, props) 

1069 else: 

1070 log.warning( 

1071 "Cannot begin marked-content sequence with property list " 

1072 "because %r is not a PSLiteral", 

1073 tag, 

1074 ) 

1075 

1076 def do_EMC(self) -> None: 

1077 """End marked-content sequence""" 

1078 self.device.end_tag() 

1079 

1080 def do_Tc(self, space: PDFStackT) -> None: 

1081 """Set character spacing. 

1082 

1083 Character spacing is used by the Tj, TJ, and ' operators. 

1084 

1085 :param space: a number expressed in unscaled text space units. 

1086 """ 

1087 charspace = safe_float(space) 

1088 if charspace is None: 

1089 log.warning( 

1090 "Could not set character spacing because %r is an invalid float value", 

1091 space, 

1092 ) 

1093 else: 

1094 self.textstate.charspace = charspace 

1095 

1096 def do_Tw(self, space: PDFStackT) -> None: 

1097 """Set the word spacing. 

1098 

1099 Word spacing is used by the Tj, TJ, and ' operators. 

1100 

1101 :param space: a number expressed in unscaled text space units 

1102 """ 

1103 wordspace = safe_float(space) 

1104 if wordspace is None: 

1105 log.warning( 

1106 "Could not set word spacing because %r is an invalid float value", 

1107 space, 

1108 ) 

1109 else: 

1110 self.textstate.wordspace = wordspace 

1111 

1112 def do_Tz(self, scale: PDFStackT) -> None: 

1113 """Set the horizontal scaling. 

1114 

1115 :param scale: is a number specifying the percentage of the normal width 

1116 """ 

1117 scale_f = safe_float(scale) 

1118 

1119 if scale_f is None: 

1120 log.warning( 

1121 "Could not set horizontal scaling because %r is an invalid float value", 

1122 scale, 

1123 ) 

1124 else: 

1125 self.textstate.scaling = scale_f 

1126 

1127 def do_TL(self, leading: PDFStackT) -> None: 

1128 """Set the text leading. 

1129 

1130 Text leading is used only by the T*, ', and " operators. 

1131 

1132 :param leading: a number expressed in unscaled text space units 

1133 """ 

1134 leading_f = safe_float(leading) 

1135 if leading_f is None: 

1136 log.warning( 

1137 "Could not set text leading because %r is an invalid float value", 

1138 leading, 

1139 ) 

1140 else: 

1141 self.textstate.leading = -leading_f 

1142 

1143 def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: 

1144 """Set the text font 

1145 

1146 :param fontid: the name of a font resource in the Font subdictionary 

1147 of the current resource dictionary 

1148 :param fontsize: size is a number representing a scale factor. 

1149 """ 

1150 try: 

1151 self.textstate.font = self.fontmap[literal_name(fontid)] 

1152 except KeyError as err: 

1153 if settings.STRICT: 

1154 raise PDFInterpreterError(f"Undefined Font id: {fontid!r}") from err 

1155 self.textstate.font = self.rsrcmgr.get_font(None, {}) 

1156 

1157 fontsize_f = safe_float(fontsize) 

1158 if fontsize_f is None: 

1159 log.warning( 

1160 "Could not set text font because %r is an invalid float value", 

1161 fontsize, 

1162 ) 

1163 else: 

1164 self.textstate.fontsize = fontsize_f 

1165 

1166 def do_Tr(self, render: PDFStackT) -> None: 

1167 """Set the text rendering mode""" 

1168 render_i = safe_int(render) 

1169 

1170 if render_i is None: 

1171 log.warning( 

1172 "Could not set text rendering mode because %r is an invalid int value", 

1173 render, 

1174 ) 

1175 else: 

1176 self.textstate.render = render_i 

1177 

1178 def do_Ts(self, rise: PDFStackT) -> None: 

1179 """Set the text rise 

1180 

1181 :param rise: a number expressed in unscaled text space units 

1182 """ 

1183 rise_f = safe_float(rise) 

1184 

1185 if rise_f is None: 

1186 log.warning( 

1187 "Could not set text rise because %r is an invalid float value", 

1188 rise, 

1189 ) 

1190 else: 

1191 self.textstate.rise = rise_f 

1192 

1193 def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1194 """Move to the start of the next line 

1195 

1196 Offset from the start of the current line by (tx , ty). 

1197 """ 

1198 tx_ = safe_float(tx) 

1199 ty_ = safe_float(ty) 

1200 if tx_ is not None and ty_ is not None: 

1201 (a, b, c, d, e, f) = self.textstate.matrix 

1202 e_new = tx_ * a + ty_ * c + e 

1203 f_new = tx_ * b + ty_ * d + f 

1204 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1205 

1206 elif settings.STRICT: 

1207 raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td") 

1208 

1209 self.textstate.linematrix = (0, 0) 

1210 

1211 def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1212 """Move to the start of the next line. 

1213 

1214 offset from the start of the current line by (tx , ty). As a side effect, this 

1215 operator sets the leading parameter in the text state. 

1216 """ 

1217 tx_ = safe_float(tx) 

1218 ty_ = safe_float(ty) 

1219 

1220 if tx_ is not None and ty_ is not None: 

1221 (a, b, c, d, e, f) = self.textstate.matrix 

1222 e_new = tx_ * a + ty_ * c + e 

1223 f_new = tx_ * b + ty_ * d + f 

1224 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1225 

1226 elif settings.STRICT: 

1227 raise PDFValueError("Invalid offset ({tx}, {ty}) for TD") 

1228 

1229 if ty_ is not None: 

1230 self.textstate.leading = ty_ 

1231 

1232 self.textstate.linematrix = (0, 0) 

1233 

1234 def do_Tm( 

1235 self, 

1236 a: PDFStackT, 

1237 b: PDFStackT, 

1238 c: PDFStackT, 

1239 d: PDFStackT, 

1240 e: PDFStackT, 

1241 f: PDFStackT, 

1242 ) -> None: 

1243 """Set text matrix and text line matrix""" 

1244 values = (a, b, c, d, e, f) 

1245 matrix = safe_matrix(*values) 

1246 

1247 if matrix is None: 

1248 log.warning( 

1249 "Could not set text matrix because " 

1250 "not all values in %r can be parsed as floats", 

1251 values, 

1252 ) 

1253 else: 

1254 self.textstate.matrix = matrix 

1255 self.textstate.linematrix = (0, 0) 

1256 

1257 def do_T_a(self) -> None: 

1258 """Move to start of next text line""" 

1259 (a, b, c, d, e, f) = self.textstate.matrix 

1260 self.textstate.matrix = ( 

1261 a, 

1262 b, 

1263 c, 

1264 d, 

1265 self.textstate.leading * c + e, 

1266 self.textstate.leading * d + f, 

1267 ) 

1268 self.textstate.linematrix = (0, 0) 

1269 

1270 def do_TJ(self, seq: PDFStackT) -> None: 

1271 """Show text, allowing individual glyph positioning""" 

1272 if self.textstate.font is None: 

1273 if settings.STRICT: 

1274 raise PDFInterpreterError("No font specified!") 

1275 return 

1276 self.device.render_string( 

1277 self.textstate, 

1278 cast(PDFTextSeq, seq), 

1279 self.graphicstate.ncs, 

1280 self.graphicstate.copy(), 

1281 ) 

1282 

1283 def do_Tj(self, s: PDFStackT) -> None: 

1284 """Show text""" 

1285 self.do_TJ([s]) 

1286 

1287 def do__q(self, s: PDFStackT) -> None: 

1288 """Move to next line and show text 

1289 

1290 The ' (single quote) operator. 

1291 """ 

1292 self.do_T_a() 

1293 self.do_TJ([s]) 

1294 

1295 def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: 

1296 """Set word and character spacing, move to next line, and show text 

1297 

1298 The " (double quote) operator. 

1299 """ 

1300 self.do_Tw(aw) 

1301 self.do_Tc(ac) 

1302 self.do_TJ([s]) 

1303 

1304 def do_BI(self) -> None: 

1305 """Begin inline image object""" 

1306 

1307 def do_ID(self) -> None: 

1308 """Begin inline image data""" 

1309 

1310 def do_EI(self, obj: PDFStackT) -> None: 

1311 """End inline image object""" 

1312 if isinstance(obj, PDFStream) and "W" in obj and "H" in obj: 

1313 iobjid = str(id(obj)) 

1314 self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1315 self.device.render_image(iobjid, obj) 

1316 self.device.end_figure(iobjid) 

1317 

1318 def do_Do(self, xobjid_arg: PDFStackT) -> None: 

1319 """Invoke named XObject""" 

1320 xobjid = literal_name(xobjid_arg) 

1321 try: 

1322 xobj = stream_value(self.xobjmap[xobjid]) 

1323 except KeyError as err: 

1324 if settings.STRICT: 

1325 raise PDFInterpreterError(f"Undefined xobject id: {xobjid!r}") from err 

1326 return 

1327 log.debug("Processing xobj: %r", xobj) 

1328 subtype = xobj.get("Subtype") 

1329 if subtype is LITERAL_FORM and "BBox" in xobj: 

1330 interpreter = self.subinterp() 

1331 bbox = cast(Rect, list_value(xobj["BBox"])) 

1332 matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) 

1333 # According to PDF reference 1.7 section 4.9.1, XObjects in 

1334 # earlier PDFs (prior to v1.2) use the page's Resources entry 

1335 # instead of having their own Resources entry. 

1336 xobjres = xobj.get("Resources") 

1337 resources = dict_value(xobjres) if xobjres else self.resources.copy() 

1338 self.device.begin_figure(xobjid, bbox, matrix) 

1339 interpreter.render_contents( 

1340 resources, 

1341 [xobj], 

1342 ctm=mult_matrix(matrix, self.ctm), 

1343 ) 

1344 self.device.end_figure(xobjid) 

1345 elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: 

1346 self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1347 self.device.render_image(xobjid, xobj) 

1348 self.device.end_figure(xobjid) 

1349 else: 

1350 # unsupported xobject type. 

1351 pass 

1352 

1353 def process_page(self, page: PDFPage) -> None: 

1354 log.debug("Processing page: %r", page) 

1355 (x0, y0, x1, y1) = page.mediabox 

1356 if page.rotate == 90: 

1357 ctm = (0, -1, 1, 0, -y0, x1) 

1358 elif page.rotate == 180: 

1359 ctm = (-1, 0, 0, -1, x1, y1) 

1360 elif page.rotate == 270: 

1361 ctm = (0, 1, -1, 0, y1, -x0) 

1362 else: 

1363 ctm = (1, 0, 0, 1, -x0, -y0) 

1364 self.device.begin_page(page, ctm) 

1365 self.render_contents(page.resources, page.contents, ctm=ctm) 

1366 self.device.end_page(page) 

1367 

1368 def render_contents( 

1369 self, 

1370 resources: dict[object, object], 

1371 streams: Sequence[object], 

1372 ctm: Matrix = MATRIX_IDENTITY, 

1373 ) -> None: 

1374 """Render the content streams. 

1375 

1376 This method may be called recursively. 

1377 """ 

1378 log.debug( 

1379 "render_contents: resources=%r, streams=%r, ctm=%r", 

1380 resources, 

1381 streams, 

1382 ctm, 

1383 ) 

1384 self.init_resources(resources) 

1385 self.init_state(ctm) 

1386 self.execute(list_value(streams)) 

1387 

1388 def execute(self, streams: Sequence[object]) -> None: 

1389 # Detect and prevent circular references in content streams 

1390 # (including Form XObjects). 

1391 # We track stream IDs being executed in the current interpreter and 

1392 # all parent interpreters. If a stream is already being processed 

1393 # in the call stack, we skip 

1394 # it to prevent infinite recursion (CWE-835 vulnerability). 

1395 valid_streams: list[PDFStream] = [] 

1396 self.stream_ids.clear() 

1397 for obj in streams: 

1398 stream = stream_value(obj) 

1399 if stream.objid is None: 

1400 # Inline streams without object IDs can't be tracked for circular refs 

1401 log.warning( 

1402 "Execute called on non-indirect object (inline image?) %r", stream 

1403 ) 

1404 continue 

1405 if stream.objid in self.parent_stream_ids: 

1406 log.warning( 

1407 "Refusing to execute circular reference to content stream %d", 

1408 stream.objid, 

1409 ) 

1410 else: 

1411 valid_streams.append(stream) 

1412 self.stream_ids.add(stream.objid) 

1413 try: 

1414 parser = PDFContentParser(valid_streams) 

1415 except PSEOF: 

1416 # empty page 

1417 return 

1418 while True: 

1419 try: 

1420 (_, obj) = parser.nextobject() 

1421 except PSEOF: 

1422 break 

1423 if isinstance(obj, PSKeyword): 

1424 name = keyword_name(obj) 

1425 method = "do_{}".format( 

1426 name.replace("*", "_a") 

1427 .replace('"', "_w") 

1428 .replace( 

1429 "'", 

1430 "_q", 

1431 ) 

1432 ) 

1433 if hasattr(self, method): 

1434 func = getattr(self, method) 

1435 nargs = func.__code__.co_argcount - 1 

1436 if nargs: 

1437 args = self.pop(nargs) 

1438 log.debug("exec: %s %r", name, args) 

1439 if len(args) == nargs: 

1440 func(*args) 

1441 else: 

1442 log.debug("exec: %s", name) 

1443 func() 

1444 elif settings.STRICT: 

1445 error_msg = f"Unknown operator: {name!r}" 

1446 raise PDFInterpreterError(error_msg) 

1447 else: 

1448 self.push(obj)