Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/pdfinterp.py: 96%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

696 statements  

1import logging 

2import re 

3from io import BytesIO 

4from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast 

5 

6from pdfminer import settings 

7from pdfminer.casting import safe_cmyk, safe_float, safe_int, safe_matrix, safe_rgb 

8from pdfminer.cmapdb import CMap, CMapBase, CMapDB 

9from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace 

10from pdfminer.pdfdevice import PDFDevice, PDFTextSeq 

11from pdfminer.pdfexceptions import PDFException, PDFValueError 

12from pdfminer.pdffont import ( 

13 PDFCIDFont, 

14 PDFFont, 

15 PDFFontError, 

16 PDFTrueTypeFont, 

17 PDFType1Font, 

18 PDFType3Font, 

19) 

20from pdfminer.pdfpage import PDFPage 

21from pdfminer.pdftypes import ( 

22 LITERALS_ASCII85_DECODE, 

23 PDFObjRef, 

24 PDFStream, 

25 dict_value, 

26 list_value, 

27 resolve1, 

28 stream_value, 

29) 

30from pdfminer.psexceptions import PSEOF, PSTypeError 

31from pdfminer.psparser import ( 

32 KWD, 

33 LIT, 

34 PSKeyword, 

35 PSLiteral, 

36 PSStackParser, 

37 PSStackType, 

38 keyword_name, 

39 literal_name, 

40) 

41from pdfminer.utils import ( 

42 MATRIX_IDENTITY, 

43 Matrix, 

44 PathSegment, 

45 Point, 

46 Rect, 

47 choplist, 

48 mult_matrix, 

49) 

50 

51log = logging.getLogger(__name__) 

52 

53 

54class PDFResourceError(PDFException): 

55 pass 

56 

57 

58class PDFInterpreterError(PDFException): 

59 pass 

60 

61 

62LITERAL_PDF = LIT("PDF") 

63LITERAL_TEXT = LIT("Text") 

64LITERAL_FONT = LIT("Font") 

65LITERAL_FORM = LIT("Form") 

66LITERAL_IMAGE = LIT("Image") 

67 

68 

69class PDFTextState: 

70 matrix: Matrix 

71 linematrix: Point 

72 

73 def __init__(self) -> None: 

74 self.font: Optional[PDFFont] = None 

75 self.fontsize: float = 0 

76 self.charspace: float = 0 

77 self.wordspace: float = 0 

78 self.scaling: float = 100 

79 self.leading: float = 0 

80 self.render: int = 0 

81 self.rise: float = 0 

82 self.reset() 

83 # self.matrix is set 

84 # self.linematrix is set 

85 

86 def __repr__(self) -> str: 

87 return ( 

88 "<PDFTextState: font=%r, fontsize=%r, charspace=%r, " 

89 "wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, " 

90 "matrix=%r, linematrix=%r>" 

91 % ( 

92 self.font, 

93 self.fontsize, 

94 self.charspace, 

95 self.wordspace, 

96 self.scaling, 

97 self.leading, 

98 self.render, 

99 self.rise, 

100 self.matrix, 

101 self.linematrix, 

102 ) 

103 ) 

104 

105 def copy(self) -> "PDFTextState": 

106 obj = PDFTextState() 

107 obj.font = self.font 

108 obj.fontsize = self.fontsize 

109 obj.charspace = self.charspace 

110 obj.wordspace = self.wordspace 

111 obj.scaling = self.scaling 

112 obj.leading = self.leading 

113 obj.render = self.render 

114 obj.rise = self.rise 

115 obj.matrix = self.matrix 

116 obj.linematrix = self.linematrix 

117 return obj 

118 

119 def reset(self) -> None: 

120 self.matrix = MATRIX_IDENTITY 

121 self.linematrix = (0, 0) 

122 

123 

124Color = Union[ 

125 float, # Greyscale 

126 Tuple[float, float, float], # R, G, B 

127 Tuple[float, float, float, float], # C, M, Y, K 

128] 

129 

130 

131class PDFGraphicState: 

132 def __init__(self) -> None: 

133 self.linewidth: float = 0 

134 self.linecap: Optional[object] = None 

135 self.linejoin: Optional[object] = None 

136 self.miterlimit: Optional[object] = None 

137 self.dash: Optional[Tuple[object, object]] = None 

138 self.intent: Optional[object] = None 

139 self.flatness: Optional[object] = None 

140 

141 # stroking color 

142 self.scolor: Color = 0 

143 self.scs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

144 

145 # non stroking color 

146 self.ncolor: Color = 0 

147 self.ncs: PDFColorSpace = PREDEFINED_COLORSPACE["DeviceGray"] 

148 

149 def copy(self) -> "PDFGraphicState": 

150 obj = PDFGraphicState() 

151 obj.linewidth = self.linewidth 

152 obj.linecap = self.linecap 

153 obj.linejoin = self.linejoin 

154 obj.miterlimit = self.miterlimit 

155 obj.dash = self.dash 

156 obj.intent = self.intent 

157 obj.flatness = self.flatness 

158 obj.scolor = self.scolor 

159 obj.scs = self.scs 

160 obj.ncolor = self.ncolor 

161 obj.ncs = self.ncs 

162 return obj 

163 

164 def __repr__(self) -> str: 

165 return ( 

166 "<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, " 

167 " miterlimit=%r, dash=%r, intent=%r, flatness=%r, " 

168 " stroking color=%r, non stroking color=%r>" 

169 % ( 

170 self.linewidth, 

171 self.linecap, 

172 self.linejoin, 

173 self.miterlimit, 

174 self.dash, 

175 self.intent, 

176 self.flatness, 

177 self.scolor, 

178 self.ncolor, 

179 ) 

180 ) 

181 

182 

183class PDFResourceManager: 

184 """Repository of shared resources. 

185 

186 ResourceManager facilitates reuse of shared resources 

187 such as fonts and images so that large objects are not 

188 allocated multiple times. 

189 """ 

190 

191 def __init__(self, caching: bool = True) -> None: 

192 self.caching = caching 

193 self._cached_fonts: Dict[object, PDFFont] = {} 

194 

195 def get_procset(self, procs: Sequence[object]) -> None: 

196 for proc in procs: 

197 if proc is LITERAL_PDF or proc is LITERAL_TEXT: 

198 pass 

199 else: 

200 pass 

201 

202 def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: 

203 try: 

204 return CMapDB.get_cmap(cmapname) 

205 except CMapDB.CMapNotFound: 

206 if strict: 

207 raise 

208 return CMap() 

209 

210 def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: 

211 if objid and objid in self._cached_fonts: 

212 font = self._cached_fonts[objid] 

213 else: 

214 log.debug("get_font: create: objid=%r, spec=%r", objid, spec) 

215 if settings.STRICT: 

216 if spec["Type"] is not LITERAL_FONT: 

217 raise PDFFontError("Type is not /Font") 

218 # Create a Font object. 

219 if "Subtype" in spec: 

220 subtype = literal_name(spec["Subtype"]) 

221 else: 

222 if settings.STRICT: 

223 raise PDFFontError("Font Subtype is not specified.") 

224 subtype = "Type1" 

225 if subtype in ("Type1", "MMType1"): 

226 # Type1 Font 

227 font = PDFType1Font(self, spec) 

228 elif subtype == "TrueType": 

229 # TrueType Font 

230 font = PDFTrueTypeFont(self, spec) 

231 elif subtype == "Type3": 

232 # Type3 Font 

233 font = PDFType3Font(self, spec) 

234 elif subtype in ("CIDFontType0", "CIDFontType2"): 

235 # CID Font 

236 font = PDFCIDFont(self, spec) 

237 elif subtype == "Type0": 

238 # Type0 Font 

239 dfonts = list_value(spec["DescendantFonts"]) 

240 assert dfonts 

241 subspec = dict_value(dfonts[0]).copy() 

242 for k in ("Encoding", "ToUnicode"): 

243 if k in spec: 

244 subspec[k] = resolve1(spec[k]) 

245 font = self.get_font(None, subspec) 

246 else: 

247 if settings.STRICT: 

248 raise PDFFontError("Invalid Font spec: %r" % spec) 

249 font = PDFType1Font(self, spec) # this is so wrong! 

250 if objid and self.caching: 

251 self._cached_fonts[objid] = font 

252 return font 

253 

254 

255class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): 

256 def __init__(self, streams: Sequence[object]) -> None: 

257 self.streams = streams 

258 self.istream = 0 

259 # PSStackParser.__init__(fp=None) is safe only because we've overloaded 

260 # all the methods that would attempt to access self.fp without first 

261 # calling self.fillfp(). 

262 PSStackParser.__init__(self, None) # type: ignore[arg-type] 

263 

264 def fillfp(self) -> None: 

265 if not self.fp: 

266 if self.istream < len(self.streams): 

267 strm = stream_value(self.streams[self.istream]) 

268 self.istream += 1 

269 else: 

270 raise PSEOF("Unexpected EOF, file truncated?") 

271 self.fp = BytesIO(strm.get_data()) 

272 

273 def seek(self, pos: int) -> None: 

274 self.fillfp() 

275 PSStackParser.seek(self, pos) 

276 

277 def fillbuf(self) -> None: 

278 if self.charpos < len(self.buf): 

279 return 

280 while 1: 

281 self.fillfp() 

282 self.bufpos = self.fp.tell() 

283 self.buf = self.fp.read(self.BUFSIZ) 

284 if self.buf: 

285 break 

286 self.fp = None # type: ignore[assignment] 

287 self.charpos = 0 

288 

289 def get_inline_data(self, pos: int, target: bytes = b"EI") -> Tuple[int, bytes]: 

290 self.seek(pos) 

291 i = 0 

292 data = b"" 

293 while i <= len(target): 

294 self.fillbuf() 

295 if i: 

296 ci = self.buf[self.charpos] 

297 c = bytes((ci,)) 

298 data += c 

299 self.charpos += 1 

300 if ( 

301 len(target) <= i 

302 and c.isspace() 

303 or i < len(target) 

304 and c == (bytes((target[i],))) 

305 ): 

306 i += 1 

307 else: 

308 i = 0 

309 else: 

310 try: 

311 j = self.buf.index(target[0], self.charpos) 

312 data += self.buf[self.charpos : j + 1] 

313 self.charpos = j + 1 

314 i = 1 

315 except ValueError: 

316 data += self.buf[self.charpos :] 

317 self.charpos = len(self.buf) 

318 data = data[: -(len(target) + 1)] # strip the last part 

319 data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data) 

320 return (pos, data) 

321 

322 def flush(self) -> None: 

323 self.add_results(*self.popall()) 

324 

325 KEYWORD_BI = KWD(b"BI") 

326 KEYWORD_ID = KWD(b"ID") 

327 KEYWORD_EI = KWD(b"EI") 

328 

329 def do_keyword(self, pos: int, token: PSKeyword) -> None: 

330 if token is self.KEYWORD_BI: 

331 # inline image within a content stream 

332 self.start_type(pos, "inline") 

333 elif token is self.KEYWORD_ID: 

334 try: 

335 (_, objs) = self.end_type("inline") 

336 if len(objs) % 2 != 0: 

337 error_msg = f"Invalid dictionary construct: {objs!r}" 

338 raise PSTypeError(error_msg) 

339 d = {literal_name(k): resolve1(v) for (k, v) in choplist(2, objs)} 

340 eos = b"EI" 

341 filter = d.get("F", None) 

342 if filter is not None: 

343 if isinstance(filter, PSLiteral): 

344 filter = [filter] 

345 if filter[0] in LITERALS_ASCII85_DECODE: 

346 eos = b"~>" 

347 (pos, data) = self.get_inline_data(pos + len(b"ID "), target=eos) 

348 if eos != b"EI": # it may be necessary for decoding 

349 data += eos 

350 obj = PDFStream(d, data) 

351 self.push((pos, obj)) 

352 if eos == b"EI": # otherwise it is still in the stream 

353 self.push((pos, self.KEYWORD_EI)) 

354 except PSTypeError: 

355 if settings.STRICT: 

356 raise 

357 else: 

358 self.push((pos, token)) 

359 

360 

361PDFStackT = PSStackType[PDFStream] 

362"""Types that may appear on the PDF argument stack.""" 

363 

364 

365class PDFPageInterpreter: 

366 """Processor for the content of a PDF page 

367 

368 Reference: PDF Reference, Appendix A, Operator Summary 

369 """ 

370 

371 def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None: 

372 self.rsrcmgr = rsrcmgr 

373 self.device = device 

374 

375 def dup(self) -> "PDFPageInterpreter": 

376 return self.__class__(self.rsrcmgr, self.device) 

377 

378 def init_resources(self, resources: Dict[object, object]) -> None: 

379 """Prepare the fonts and XObjects listed in the Resource attribute.""" 

380 self.resources = resources 

381 self.fontmap: Dict[object, PDFFont] = {} 

382 self.xobjmap = {} 

383 self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() 

384 if not resources: 

385 return 

386 

387 def get_colorspace(spec: object) -> Optional[PDFColorSpace]: 

388 if isinstance(spec, list): 

389 name = literal_name(spec[0]) 

390 else: 

391 name = literal_name(spec) 

392 if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2: 

393 return PDFColorSpace(name, stream_value(spec[1])["N"]) 

394 elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2: 

395 return PDFColorSpace(name, len(list_value(spec[1]))) 

396 else: 

397 return PREDEFINED_COLORSPACE.get(name) 

398 

399 for k, v in dict_value(resources).items(): 

400 log.debug("Resource: %r: %r", k, v) 

401 if k == "Font": 

402 for fontid, spec in dict_value(v).items(): 

403 objid = None 

404 if isinstance(spec, PDFObjRef): 

405 objid = spec.objid 

406 spec = dict_value(spec) 

407 self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) 

408 elif k == "ColorSpace": 

409 for csid, spec in dict_value(v).items(): 

410 colorspace = get_colorspace(resolve1(spec)) 

411 if colorspace is not None: 

412 self.csmap[csid] = colorspace 

413 elif k == "ProcSet": 

414 self.rsrcmgr.get_procset(list_value(v)) 

415 elif k == "XObject": 

416 for xobjid, xobjstrm in dict_value(v).items(): 

417 self.xobjmap[xobjid] = xobjstrm 

418 

419 def init_state(self, ctm: Matrix) -> None: 

420 """Initialize the text and graphic states for rendering a page.""" 

421 # gstack: stack for graphical states. 

422 self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = [] 

423 self.ctm = ctm 

424 self.device.set_ctm(self.ctm) 

425 self.textstate = PDFTextState() 

426 self.graphicstate = PDFGraphicState() 

427 self.curpath: List[PathSegment] = [] 

428 # argstack: stack for command arguments. 

429 self.argstack: List[PDFStackT] = [] 

430 

431 def push(self, obj: PDFStackT) -> None: 

432 self.argstack.append(obj) 

433 

434 def pop(self, n: int) -> List[PDFStackT]: 

435 if n == 0: 

436 return [] 

437 x = self.argstack[-n:] 

438 self.argstack = self.argstack[:-n] 

439 return x 

440 

441 def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]: 

442 return (self.ctm, self.textstate.copy(), self.graphicstate.copy()) 

443 

444 def set_current_state( 

445 self, 

446 state: Tuple[Matrix, PDFTextState, PDFGraphicState], 

447 ) -> None: 

448 (self.ctm, self.textstate, self.graphicstate) = state 

449 self.device.set_ctm(self.ctm) 

450 

451 def do_q(self) -> None: 

452 """Save graphics state""" 

453 self.gstack.append(self.get_current_state()) 

454 

455 def do_Q(self) -> None: 

456 """Restore graphics state""" 

457 if self.gstack: 

458 self.set_current_state(self.gstack.pop()) 

459 

460 def do_cm( 

461 self, 

462 a1: PDFStackT, 

463 b1: PDFStackT, 

464 c1: PDFStackT, 

465 d1: PDFStackT, 

466 e1: PDFStackT, 

467 f1: PDFStackT, 

468 ) -> None: 

469 """Concatenate matrix to current transformation matrix""" 

470 matrix = safe_matrix(a1, b1, c1, d1, e1, f1) 

471 

472 if matrix is None: 

473 log.warning( 

474 f"Cannot concatenate matrix to current transformation matrix because not all values in {(a1, b1, c1, d1, e1, f1)!r} can be parsed as floats" 

475 ) 

476 else: 

477 self.ctm = mult_matrix(matrix, self.ctm) 

478 self.device.set_ctm(self.ctm) 

479 

480 def do_w(self, linewidth: PDFStackT) -> None: 

481 """Set line width""" 

482 linewidth_f = safe_float(linewidth) 

483 if linewidth_f is None: 

484 log.warning( 

485 f"Cannot set line width because {linewidth!r} is an invalid float value" 

486 ) 

487 else: 

488 self.graphicstate.linewidth = linewidth_f 

489 

490 def do_J(self, linecap: PDFStackT) -> None: 

491 """Set line cap style""" 

492 self.graphicstate.linecap = linecap 

493 

494 def do_j(self, linejoin: PDFStackT) -> None: 

495 """Set line join style""" 

496 self.graphicstate.linejoin = linejoin 

497 

498 def do_M(self, miterlimit: PDFStackT) -> None: 

499 """Set miter limit""" 

500 self.graphicstate.miterlimit = miterlimit 

501 

502 def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: 

503 """Set line dash pattern""" 

504 self.graphicstate.dash = (dash, phase) 

505 

506 def do_ri(self, intent: PDFStackT) -> None: 

507 """Set color rendering intent""" 

508 self.graphicstate.intent = intent 

509 

510 def do_i(self, flatness: PDFStackT) -> None: 

511 """Set flatness tolerance""" 

512 self.graphicstate.flatness = flatness 

513 

514 def do_gs(self, name: PDFStackT) -> None: 

515 """Set parameters from graphics state parameter dictionary""" 

516 # TODO 

517 

518 def do_m(self, x: PDFStackT, y: PDFStackT) -> None: 

519 """Begin new subpath""" 

520 x_f = safe_float(x) 

521 y_f = safe_float(y) 

522 

523 if x_f is None or y_f is None: 

524 point = ("m", x, y) 

525 log.warning( 

526 f"Cannot start new subpath because not all values in {point!r} can be parsed as floats" 

527 ) 

528 else: 

529 point = ("m", x_f, y_f) 

530 self.curpath.append(point) 

531 

532 def do_l(self, x: PDFStackT, y: PDFStackT) -> None: 

533 """Append straight line segment to path""" 

534 x_f = safe_float(x) 

535 y_f = safe_float(y) 

536 if x_f is None or y_f is None: 

537 point = ("l", x, y) 

538 log.warning( 

539 f"Cannot append straight line segment to path because not all values in {point!r} can be parsed as floats" 

540 ) 

541 else: 

542 point = ("l", x_f, y_f) 

543 self.curpath.append(point) 

544 

545 def do_c( 

546 self, 

547 x1: PDFStackT, 

548 y1: PDFStackT, 

549 x2: PDFStackT, 

550 y2: PDFStackT, 

551 x3: PDFStackT, 

552 y3: PDFStackT, 

553 ) -> None: 

554 """Append curved segment to path (three control points)""" 

555 x1_f = safe_float(x1) 

556 y1_f = safe_float(y1) 

557 x2_f = safe_float(x2) 

558 y2_f = safe_float(y2) 

559 x3_f = safe_float(x3) 

560 y3_f = safe_float(y3) 

561 if ( 

562 x1_f is None 

563 or y1_f is None 

564 or x2_f is None 

565 or y2_f is None 

566 or x3_f is None 

567 or y3_f is None 

568 ): 

569 point = ("c", x1, y1, x2, y2, x3, y3) 

570 log.warning( 

571 f"Cannot append curved segment to path because not all values in {point!r} can be parsed as floats" 

572 ) 

573 else: 

574 point = ("c", x1_f, y1_f, x2_f, y2_f, x3_f, y3_f) 

575 self.curpath.append(point) 

576 

577 def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

578 """Append curved segment to path (initial point replicated)""" 

579 x2_f = safe_float(x2) 

580 y2_f = safe_float(y2) 

581 x3_f = safe_float(x3) 

582 y3_f = safe_float(y3) 

583 if x2_f is None or y2_f is None or x3_f is None or y3_f is None: 

584 point = ("v", x2, y2, x3, y3) 

585 log.warning( 

586 f"Cannot append curved segment to path because not all values in {point!r} can be parsed as floats" 

587 ) 

588 else: 

589 point = ("v", x2_f, y2_f, x3_f, y3_f) 

590 self.curpath.append(point) 

591 

592 def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: 

593 """Append curved segment to path (final point replicated)""" 

594 x1_f = safe_float(x1) 

595 y1_f = safe_float(y1) 

596 x3_f = safe_float(x3) 

597 y3_f = safe_float(y3) 

598 if x1_f is None or y1_f is None or x3_f is None or y3_f is None: 

599 point = ("y", x1, y1, x3, y3) 

600 log.warning( 

601 f"Cannot append curved segment to path because not all values in {point!r} can be parsed as floats" 

602 ) 

603 else: 

604 point = ("y", x1_f, y1_f, x3_f, y3_f) 

605 self.curpath.append(point) 

606 

607 def do_h(self) -> None: 

608 """Close subpath""" 

609 self.curpath.append(("h",)) 

610 

611 def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None: 

612 """Append rectangle to path""" 

613 x_f = safe_float(x) 

614 y_f = safe_float(y) 

615 w_f = safe_float(w) 

616 h_f = safe_float(h) 

617 

618 if x_f is None or y_f is None or w_f is None or h_f is None: 

619 values = (x, y, w, h) 

620 log.warning( 

621 f"Cannot append rectangle to path because not all values in {values!r} can be parsed as floats" 

622 ) 

623 else: 

624 self.curpath.append(("m", x_f, y_f)) 

625 self.curpath.append(("l", x_f + w_f, y_f)) 

626 self.curpath.append(("l", x_f + w_f, y_f + h_f)) 

627 self.curpath.append(("l", x_f, y_f + h_f)) 

628 self.curpath.append(("h",)) 

629 

630 def do_S(self) -> None: 

631 """Stroke path""" 

632 self.device.paint_path(self.graphicstate, True, False, False, self.curpath) 

633 self.curpath = [] 

634 

635 def do_s(self) -> None: 

636 """Close and stroke path""" 

637 self.do_h() 

638 self.do_S() 

639 

640 def do_f(self) -> None: 

641 """Fill path using nonzero winding number rule""" 

642 self.device.paint_path(self.graphicstate, False, True, False, self.curpath) 

643 self.curpath = [] 

644 

645 def do_F(self) -> None: 

646 """Fill path using nonzero winding number rule (obsolete)""" 

647 

648 def do_f_a(self) -> None: 

649 """Fill path using even-odd rule""" 

650 self.device.paint_path(self.graphicstate, False, True, True, self.curpath) 

651 self.curpath = [] 

652 

653 def do_B(self) -> None: 

654 """Fill and stroke path using nonzero winding number rule""" 

655 self.device.paint_path(self.graphicstate, True, True, False, self.curpath) 

656 self.curpath = [] 

657 

658 def do_B_a(self) -> None: 

659 """Fill and stroke path using even-odd rule""" 

660 self.device.paint_path(self.graphicstate, True, True, True, self.curpath) 

661 self.curpath = [] 

662 

663 def do_b(self) -> None: 

664 """Close, fill, and stroke path using nonzero winding number rule""" 

665 self.do_h() 

666 self.do_B() 

667 

668 def do_b_a(self) -> None: 

669 """Close, fill, and stroke path using even-odd rule""" 

670 self.do_h() 

671 self.do_B_a() 

672 

673 def do_n(self) -> None: 

674 """End path without filling or stroking""" 

675 self.curpath = [] 

676 

677 def do_W(self) -> None: 

678 """Set clipping path using nonzero winding number rule""" 

679 

680 def do_W_a(self) -> None: 

681 """Set clipping path using even-odd rule""" 

682 

683 def do_CS(self, name: PDFStackT) -> None: 

684 """Set color space for stroking operations 

685 

686 Introduced in PDF 1.1 

687 """ 

688 try: 

689 self.graphicstate.scs = self.csmap[literal_name(name)] 

690 except KeyError: 

691 if settings.STRICT: 

692 raise PDFInterpreterError("Undefined ColorSpace: %r" % name) 

693 

694 def do_cs(self, name: PDFStackT) -> None: 

695 """Set color space for nonstroking operations""" 

696 try: 

697 self.graphicstate.ncs = self.csmap[literal_name(name)] 

698 except KeyError: 

699 if settings.STRICT: 

700 raise PDFInterpreterError("Undefined ColorSpace: %r" % name) 

701 

702 def do_G(self, gray: PDFStackT) -> None: 

703 """Set gray level for stroking operations""" 

704 gray_f = safe_float(gray) 

705 

706 if gray_f is None: 

707 log.warning( 

708 f"Cannot set gray level because {gray!r} is an invalid float value" 

709 ) 

710 else: 

711 self.graphicstate.scolor = gray_f 

712 self.graphicstate.scs = self.csmap["DeviceGray"] 

713 

714 def do_g(self, gray: PDFStackT) -> None: 

715 """Set gray level for nonstroking operations""" 

716 gray_f = safe_float(gray) 

717 

718 if gray_f is None: 

719 log.warning( 

720 f"Cannot set gray level because {gray!r} is an invalid float value" 

721 ) 

722 else: 

723 self.graphicstate.ncolor = gray_f 

724 self.graphicstate.ncs = self.csmap["DeviceGray"] 

725 

726 def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

727 """Set RGB color for stroking operations""" 

728 rgb = safe_rgb(r, g, b) 

729 

730 if rgb is None: 

731 log.warning( 

732 f"Cannot set RGB stroke color because not all values in {(r, g, b)!r} can be parsed as floats" 

733 ) 

734 else: 

735 self.graphicstate.scolor = rgb 

736 self.graphicstate.scs = self.csmap["DeviceRGB"] 

737 

738 def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: 

739 """Set RGB color for nonstroking operations""" 

740 rgb = safe_rgb(r, g, b) 

741 

742 if rgb is None: 

743 log.warning( 

744 f"Cannot set RGB non-stroke color because not all values in {(r, g, b)!r} can be parsed as floats" 

745 ) 

746 else: 

747 self.graphicstate.ncolor = rgb 

748 self.graphicstate.ncs = self.csmap["DeviceRGB"] 

749 

750 def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

751 """Set CMYK color for stroking operations""" 

752 cmyk = safe_cmyk(c, m, y, k) 

753 

754 if cmyk is None: 

755 log.warning( 

756 f"Cannot set CMYK stroke color because not all values in {(c, m, y, k)!r} can be parsed as floats" 

757 ) 

758 else: 

759 self.graphicstate.scolor = cmyk 

760 self.graphicstate.scs = self.csmap["DeviceCMYK"] 

761 

762 def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: 

763 """Set CMYK color for nonstroking operations""" 

764 cmyk = safe_cmyk(c, m, y, k) 

765 

766 if cmyk is None: 

767 log.warning( 

768 f"Cannot set CMYK non-stroke color because not all values in {(c, m, y, k)!r} can be parsed as floats" 

769 ) 

770 else: 

771 self.graphicstate.ncolor = cmyk 

772 self.graphicstate.ncs = self.csmap["DeviceCMYK"] 

773 

774 def do_SCN(self) -> None: 

775 """Set color for stroking operations.""" 

776 n = self.graphicstate.scs.ncomponents 

777 

778 components = self.pop(n) 

779 if len(components) != n: 

780 log.warning( 

781 f"Cannot set stroke color because expected {n} components but got {components!r}" 

782 ) 

783 

784 elif len(components) == 1: 

785 gray = components[0] 

786 gray_f = safe_float(gray) 

787 if gray_f is None: 

788 log.warning( 

789 f"Cannot set gray stroke color because {gray!r} is an invalid float value" 

790 ) 

791 else: 

792 self.graphicstate.scolor = gray_f 

793 

794 elif len(components) == 3: 

795 rgb = safe_rgb(*components) 

796 

797 if rgb is None: 

798 log.warning( 

799 f"Cannot set RGB stroke color because components {components!r} cannot be parsed as RGB" 

800 ) 

801 else: 

802 self.graphicstate.scolor = rgb 

803 

804 elif len(components) == 4: 

805 cmyk = safe_cmyk(*components) 

806 

807 if cmyk is None: 

808 log.warning( 

809 f"Cannot set CMYK stroke color because components {components!r} cannot be parsed as CMYK" 

810 ) 

811 else: 

812 self.graphicstate.scolor = cmyk 

813 

814 else: 

815 log.warning( 

816 f"Cannot set stroke color because {len(components)} components are specified but only 1 (grayscale), 3 (rgb) and 4 (cmyk) are supported" 

817 ) 

818 

819 def do_scn(self) -> None: 

820 """Set color for nonstroking operations""" 

821 n = self.graphicstate.ncs.ncomponents 

822 

823 components = self.pop(n) 

824 if len(components) != n: 

825 log.warning( 

826 f"Cannot set non-stroke color because expected {n} components but got {components!r}" 

827 ) 

828 

829 elif len(components) == 1: 

830 gray = components[0] 

831 gray_f = safe_float(gray) 

832 if gray_f is None: 

833 log.warning( 

834 f"Cannot set gray non-stroke color because {gray!r} is an invalid float value" 

835 ) 

836 else: 

837 self.graphicstate.ncolor = gray_f 

838 

839 elif len(components) == 3: 

840 rgb = safe_rgb(*components) 

841 

842 if rgb is None: 

843 log.warning( 

844 f"Cannot set RGB non-stroke color because components {components!r} cannot be parsed as RGB" 

845 ) 

846 else: 

847 self.graphicstate.ncolor = rgb 

848 

849 elif len(components) == 4: 

850 cmyk = safe_cmyk(*components) 

851 

852 if cmyk is None: 

853 log.warning( 

854 f"Cannot set CMYK non-stroke color because components {components!r} cannot be parsed as CMYK" 

855 ) 

856 else: 

857 self.graphicstate.ncolor = cmyk 

858 

859 else: 

860 log.warning( 

861 f"Cannot set non-stroke color because {len(components)} components are specified but only 1 (grayscale), 3 (rgb) and 4 (cmyk) are supported" 

862 ) 

863 

864 def do_SC(self) -> None: 

865 """Set color for stroking operations""" 

866 self.do_SCN() 

867 

868 def do_sc(self) -> None: 

869 """Set color for nonstroking operations""" 

870 self.do_scn() 

871 

872 def do_sh(self, name: object) -> None: 

873 """Paint area defined by shading pattern""" 

874 

875 def do_BT(self) -> None: 

876 """Begin text object 

877 

878 Initializing the text matrix, Tm, and the text line matrix, Tlm, to 

879 the identity matrix. Text objects cannot be nested; a second BT cannot 

880 appear before an ET. 

881 """ 

882 self.textstate.reset() 

883 

884 def do_ET(self) -> None: 

885 """End a text object""" 

886 

887 def do_BX(self) -> None: 

888 """Begin compatibility section""" 

889 

890 def do_EX(self) -> None: 

891 """End compatibility section""" 

892 

893 def do_MP(self, tag: PDFStackT) -> None: 

894 """Define marked-content point""" 

895 if isinstance(tag, PSLiteral): 

896 self.device.do_tag(tag) 

897 else: 

898 log.warning( 

899 f"Cannot define marked-content point because {tag!r} is not a PSLiteral" 

900 ) 

901 

902 def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: 

903 """Define marked-content point with property list""" 

904 if isinstance(tag, PSLiteral): 

905 self.device.do_tag(tag, props) 

906 else: 

907 log.warning( 

908 f"Cannot define marked-content point with property list because {tag!r} is not a PSLiteral" 

909 ) 

910 

911 def do_BMC(self, tag: PDFStackT) -> None: 

912 """Begin marked-content sequence""" 

913 if isinstance(tag, PSLiteral): 

914 self.device.begin_tag(tag) 

915 else: 

916 log.warning( 

917 f"Cannot begin marked-content sequence because {tag!r} is not a PSLiteral" 

918 ) 

919 

920 def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: 

921 """Begin marked-content sequence with property list""" 

922 if isinstance(tag, PSLiteral): 

923 self.device.begin_tag(tag, props) 

924 else: 

925 log.warning( 

926 f"Cannot begin marked-content sequence with property list because {tag!r} is not a PSLiteral" 

927 ) 

928 

929 def do_EMC(self) -> None: 

930 """End marked-content sequence""" 

931 self.device.end_tag() 

932 

933 def do_Tc(self, space: PDFStackT) -> None: 

934 """Set character spacing. 

935 

936 Character spacing is used by the Tj, TJ, and ' operators. 

937 

938 :param space: a number expressed in unscaled text space units. 

939 """ 

940 charspace = safe_float(space) 

941 if charspace is None: 

942 log.warning( 

943 f"Could not set character spacing because {space!r} is an invalid float value" 

944 ) 

945 else: 

946 self.textstate.charspace = charspace 

947 

948 def do_Tw(self, space: PDFStackT) -> None: 

949 """Set the word spacing. 

950 

951 Word spacing is used by the Tj, TJ, and ' operators. 

952 

953 :param space: a number expressed in unscaled text space units 

954 """ 

955 wordspace = safe_float(space) 

956 if wordspace is None: 

957 log.warning( 

958 f"Could not set word spacing becuase {space!r} is an invalid float value" 

959 ) 

960 else: 

961 self.textstate.wordspace = wordspace 

962 

963 def do_Tz(self, scale: PDFStackT) -> None: 

964 """Set the horizontal scaling. 

965 

966 :param scale: is a number specifying the percentage of the normal width 

967 """ 

968 scale_f = safe_float(scale) 

969 

970 if scale_f is None: 

971 log.warning( 

972 f"Could not set horizontal scaling because {scale!r} is an invalid float value" 

973 ) 

974 else: 

975 self.textstate.scaling = scale_f 

976 

977 def do_TL(self, leading: PDFStackT) -> None: 

978 """Set the text leading. 

979 

980 Text leading is used only by the T*, ', and " operators. 

981 

982 :param leading: a number expressed in unscaled text space units 

983 """ 

984 leading_f = safe_float(leading) 

985 if leading_f is None: 

986 log.warning( 

987 f"Could not set text leading because {leading!r} is an invalid float value" 

988 ) 

989 else: 

990 self.textstate.leading = -leading_f 

991 

992 def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: 

993 """Set the text font 

994 

995 :param fontid: the name of a font resource in the Font subdictionary 

996 of the current resource dictionary 

997 :param fontsize: size is a number representing a scale factor. 

998 """ 

999 try: 

1000 self.textstate.font = self.fontmap[literal_name(fontid)] 

1001 except KeyError: 

1002 if settings.STRICT: 

1003 raise PDFInterpreterError("Undefined Font id: %r" % fontid) 

1004 self.textstate.font = self.rsrcmgr.get_font(None, {}) 

1005 

1006 fontsize_f = safe_float(fontsize) 

1007 if fontsize_f is None: 

1008 log.warning( 

1009 f"Could not set text font because {fontsize!r} is an invalid float value" 

1010 ) 

1011 else: 

1012 self.textstate.fontsize = fontsize_f 

1013 

1014 def do_Tr(self, render: PDFStackT) -> None: 

1015 """Set the text rendering mode""" 

1016 render_i = safe_int(render) 

1017 

1018 if render_i is None: 

1019 log.warning( 

1020 f"Could not set text rendering mode because {render!r} is an invalid int value" 

1021 ) 

1022 else: 

1023 self.textstate.render = render_i 

1024 

1025 def do_Ts(self, rise: PDFStackT) -> None: 

1026 """Set the text rise 

1027 

1028 :param rise: a number expressed in unscaled text space units 

1029 """ 

1030 rise_f = safe_float(rise) 

1031 

1032 if rise_f is None: 

1033 log.warning( 

1034 f"Could not set text rise because {rise!r} is an invalid float value" 

1035 ) 

1036 else: 

1037 self.textstate.rise = rise_f 

1038 

1039 def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1040 """Move to the start of the next line 

1041 

1042 Offset from the start of the current line by (tx , ty). 

1043 """ 

1044 tx_ = safe_float(tx) 

1045 ty_ = safe_float(ty) 

1046 if tx_ is not None and ty_ is not None: 

1047 (a, b, c, d, e, f) = self.textstate.matrix 

1048 e_new = tx_ * a + ty_ * c + e 

1049 f_new = tx_ * b + ty_ * d + f 

1050 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1051 

1052 elif settings.STRICT: 

1053 raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td") 

1054 

1055 self.textstate.linematrix = (0, 0) 

1056 

1057 def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: 

1058 """Move to the start of the next line. 

1059 

1060 offset from the start of the current line by (tx , ty). As a side effect, this 

1061 operator sets the leading parameter in the text state. 

1062 """ 

1063 tx_ = safe_float(tx) 

1064 ty_ = safe_float(ty) 

1065 

1066 if tx_ is not None and ty_ is not None: 

1067 (a, b, c, d, e, f) = self.textstate.matrix 

1068 e_new = tx_ * a + ty_ * c + e 

1069 f_new = tx_ * b + ty_ * d + f 

1070 self.textstate.matrix = (a, b, c, d, e_new, f_new) 

1071 

1072 elif settings.STRICT: 

1073 raise PDFValueError("Invalid offset ({tx}, {ty}) for TD") 

1074 

1075 if ty_ is not None: 

1076 self.textstate.leading = ty_ 

1077 

1078 self.textstate.linematrix = (0, 0) 

1079 

1080 def do_Tm( 

1081 self, 

1082 a: PDFStackT, 

1083 b: PDFStackT, 

1084 c: PDFStackT, 

1085 d: PDFStackT, 

1086 e: PDFStackT, 

1087 f: PDFStackT, 

1088 ) -> None: 

1089 """Set text matrix and text line matrix""" 

1090 values = (a, b, c, d, e, f) 

1091 matrix = safe_matrix(*values) 

1092 

1093 if matrix is None: 

1094 log.warning( 

1095 f"Could not set text matrix because not all values in {values!r} can be parsed as floats" 

1096 ) 

1097 else: 

1098 self.textstate.matrix = matrix 

1099 self.textstate.linematrix = (0, 0) 

1100 

1101 def do_T_a(self) -> None: 

1102 """Move to start of next text line""" 

1103 (a, b, c, d, e, f) = self.textstate.matrix 

1104 self.textstate.matrix = ( 

1105 a, 

1106 b, 

1107 c, 

1108 d, 

1109 self.textstate.leading * c + e, 

1110 self.textstate.leading * d + f, 

1111 ) 

1112 self.textstate.linematrix = (0, 0) 

1113 

1114 def do_TJ(self, seq: PDFStackT) -> None: 

1115 """Show text, allowing individual glyph positioning""" 

1116 if self.textstate.font is None: 

1117 if settings.STRICT: 

1118 raise PDFInterpreterError("No font specified!") 

1119 return 

1120 self.device.render_string( 

1121 self.textstate, 

1122 cast(PDFTextSeq, seq), 

1123 self.graphicstate.ncs, 

1124 self.graphicstate.copy(), 

1125 ) 

1126 

1127 def do_Tj(self, s: PDFStackT) -> None: 

1128 """Show text""" 

1129 self.do_TJ([s]) 

1130 

1131 def do__q(self, s: PDFStackT) -> None: 

1132 """Move to next line and show text 

1133 

1134 The ' (single quote) operator. 

1135 """ 

1136 self.do_T_a() 

1137 self.do_TJ([s]) 

1138 

1139 def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: 

1140 """Set word and character spacing, move to next line, and show text 

1141 

1142 The " (double quote) operator. 

1143 """ 

1144 self.do_Tw(aw) 

1145 self.do_Tc(ac) 

1146 self.do_TJ([s]) 

1147 

1148 def do_BI(self) -> None: 

1149 """Begin inline image object""" 

1150 

1151 def do_ID(self) -> None: 

1152 """Begin inline image data""" 

1153 

1154 def do_EI(self, obj: PDFStackT) -> None: 

1155 """End inline image object""" 

1156 if isinstance(obj, PDFStream) and "W" in obj and "H" in obj: 

1157 iobjid = str(id(obj)) 

1158 self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1159 self.device.render_image(iobjid, obj) 

1160 self.device.end_figure(iobjid) 

1161 

1162 def do_Do(self, xobjid_arg: PDFStackT) -> None: 

1163 """Invoke named XObject""" 

1164 xobjid = literal_name(xobjid_arg) 

1165 try: 

1166 xobj = stream_value(self.xobjmap[xobjid]) 

1167 except KeyError: 

1168 if settings.STRICT: 

1169 raise PDFInterpreterError("Undefined xobject id: %r" % xobjid) 

1170 return 

1171 log.debug("Processing xobj: %r", xobj) 

1172 subtype = xobj.get("Subtype") 

1173 if subtype is LITERAL_FORM and "BBox" in xobj: 

1174 interpreter = self.dup() 

1175 bbox = cast(Rect, list_value(xobj["BBox"])) 

1176 matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) 

1177 # According to PDF reference 1.7 section 4.9.1, XObjects in 

1178 # earlier PDFs (prior to v1.2) use the page's Resources entry 

1179 # instead of having their own Resources entry. 

1180 xobjres = xobj.get("Resources") 

1181 if xobjres: 

1182 resources = dict_value(xobjres) 

1183 else: 

1184 resources = self.resources.copy() 

1185 self.device.begin_figure(xobjid, bbox, matrix) 

1186 interpreter.render_contents( 

1187 resources, 

1188 [xobj], 

1189 ctm=mult_matrix(matrix, self.ctm), 

1190 ) 

1191 self.device.end_figure(xobjid) 

1192 elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: 

1193 self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 

1194 self.device.render_image(xobjid, xobj) 

1195 self.device.end_figure(xobjid) 

1196 else: 

1197 # unsupported xobject type. 

1198 pass 

1199 

1200 def process_page(self, page: PDFPage) -> None: 

1201 log.debug("Processing page: %r", page) 

1202 (x0, y0, x1, y1) = page.mediabox 

1203 if page.rotate == 90: 

1204 ctm = (0, -1, 1, 0, -y0, x1) 

1205 elif page.rotate == 180: 

1206 ctm = (-1, 0, 0, -1, x1, y1) 

1207 elif page.rotate == 270: 

1208 ctm = (0, 1, -1, 0, y1, -x0) 

1209 else: 

1210 ctm = (1, 0, 0, 1, -x0, -y0) 

1211 self.device.begin_page(page, ctm) 

1212 self.render_contents(page.resources, page.contents, ctm=ctm) 

1213 self.device.end_page(page) 

1214 

1215 def render_contents( 

1216 self, 

1217 resources: Dict[object, object], 

1218 streams: Sequence[object], 

1219 ctm: Matrix = MATRIX_IDENTITY, 

1220 ) -> None: 

1221 """Render the content streams. 

1222 

1223 This method may be called recursively. 

1224 """ 

1225 log.debug( 

1226 "render_contents: resources=%r, streams=%r, ctm=%r", 

1227 resources, 

1228 streams, 

1229 ctm, 

1230 ) 

1231 self.init_resources(resources) 

1232 self.init_state(ctm) 

1233 self.execute(list_value(streams)) 

1234 

1235 def execute(self, streams: Sequence[object]) -> None: 

1236 try: 

1237 parser = PDFContentParser(streams) 

1238 except PSEOF: 

1239 # empty page 

1240 return 

1241 while True: 

1242 try: 

1243 (_, obj) = parser.nextobject() 

1244 except PSEOF: 

1245 break 

1246 if isinstance(obj, PSKeyword): 

1247 name = keyword_name(obj) 

1248 method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace( 

1249 "'", 

1250 "_q", 

1251 ) 

1252 if hasattr(self, method): 

1253 func = getattr(self, method) 

1254 nargs = func.__code__.co_argcount - 1 

1255 if nargs: 

1256 args = self.pop(nargs) 

1257 log.debug("exec: %s %r", name, args) 

1258 if len(args) == nargs: 

1259 func(*args) 

1260 else: 

1261 log.debug("exec: %s", name) 

1262 func() 

1263 elif settings.STRICT: 

1264 error_msg = "Unknown operator: %r" % name 

1265 raise PDFInterpreterError(error_msg) 

1266 else: 

1267 self.push(obj)