Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/matplotlib/dviread.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

510 statements  

1""" 

2A module for reading dvi files output by TeX. Several limitations make 

3this not (currently) useful as a general-purpose dvi preprocessor, but 

4it is currently used by the pdf backend for processing usetex text. 

5 

6Interface:: 

7 

8 with Dvi(filename, 72) as dvi: 

9 # iterate over pages: 

10 for page in dvi: 

11 w, h, d = page.width, page.height, page.descent 

12 for x, y, font, glyph, width in page.text: 

13 fontname = font.texname 

14 pointsize = font.size 

15 ... 

16 for x, y, height, width in page.boxes: 

17 ... 

18""" 

19 

20from collections import namedtuple 

21import enum 

22from functools import lru_cache, partial, wraps 

23import logging 

24import os 

25from pathlib import Path 

26import re 

27import struct 

28import subprocess 

29import sys 

30 

31import numpy as np 

32 

33from matplotlib import _api, cbook 

34 

35_log = logging.getLogger(__name__) 

36 

37# Many dvi related files are looked for by external processes, require 

38# additional parsing, and are used many times per rendering, which is why they 

39# are cached using lru_cache(). 

40 

41# Dvi is a bytecode format documented in 

42# https://ctan.org/pkg/dvitype 

43# https://texdoc.org/serve/dvitype.pdf/0 

44# 

45# The file consists of a preamble, some number of pages, a postamble, 

46# and a finale. Different opcodes are allowed in different contexts, 

47# so the Dvi object has a parser state: 

48# 

49# pre: expecting the preamble 

50# outer: between pages (followed by a page or the postamble, 

51# also e.g. font definitions are allowed) 

52# page: processing a page 

53# post_post: state after the postamble (our current implementation 

54# just stops reading) 

55# finale: the finale (unimplemented in our current implementation) 

56 

57_dvistate = enum.Enum('DviState', 'pre outer inpage post_post finale') 

58 

59# The marks on a page consist of text and boxes. A page also has dimensions. 

60Page = namedtuple('Page', 'text boxes height width descent') 

61Box = namedtuple('Box', 'x y height width') 

62 

63 

64# Also a namedtuple, for backcompat. 

65class Text(namedtuple('Text', 'x y font glyph width')): 

66 """ 

67 A glyph in the dvi file. 

68 

69 The *x* and *y* attributes directly position the glyph. The *font*, 

70 *glyph*, and *width* attributes are kept public for back-compatibility, 

71 but users wanting to draw the glyph themselves are encouraged to instead 

72 load the font specified by `font_path` at `font_size`, warp it with the 

73 effects specified by `font_effects`, and load the glyph specified by 

74 `glyph_name_or_index`. 

75 """ 

76 

77 def _get_pdftexmap_entry(self): 

78 return PsfontsMap(find_tex_file("pdftex.map"))[self.font.texname] 

79 

80 @property 

81 def font_path(self): 

82 """The `~pathlib.Path` to the font for this glyph.""" 

83 psfont = self._get_pdftexmap_entry() 

84 if psfont.filename is None: 

85 raise ValueError("No usable font file found for {} ({}); " 

86 "the font may lack a Type-1 version" 

87 .format(psfont.psname.decode("ascii"), 

88 psfont.texname.decode("ascii"))) 

89 return Path(psfont.filename) 

90 

91 @property 

92 def font_size(self): 

93 """The font size.""" 

94 return self.font.size 

95 

96 @property 

97 def font_effects(self): 

98 """ 

99 The "font effects" dict for this glyph. 

100 

101 This dict contains the values for this glyph of SlantFont and 

102 ExtendFont (if any), read off :file:`pdftex.map`. 

103 """ 

104 return self._get_pdftexmap_entry().effects 

105 

106 @property 

107 def glyph_name_or_index(self): 

108 """ 

109 Either the glyph name or the native charmap glyph index. 

110 

111 If :file:`pdftex.map` specifies an encoding for this glyph's font, that 

112 is a mapping of glyph indices to Adobe glyph names; use it to convert 

113 dvi indices to glyph names. Callers can then convert glyph names to 

114 glyph indices (with FT_Get_Name_Index/get_name_index), and load the 

115 glyph using FT_Load_Glyph/load_glyph. 

116 

117 If :file:`pdftex.map` specifies no encoding, the indices directly map 

118 to the font's "native" charmap; glyphs should directly load using 

119 FT_Load_Char/load_char after selecting the native charmap. 

120 """ 

121 entry = self._get_pdftexmap_entry() 

122 return (_parse_enc(entry.encoding)[self.glyph] 

123 if entry.encoding is not None else self.glyph) 

124 

125 

126# Opcode argument parsing 

127# 

128# Each of the following functions takes a Dvi object and delta, which is the 

129# difference between the opcode and the minimum opcode with the same meaning. 

130# Dvi opcodes often encode the number of argument bytes in this delta. 

131_arg_mapping = dict( 

132 # raw: Return delta as is. 

133 raw=lambda dvi, delta: delta, 

134 # u1: Read 1 byte as an unsigned number. 

135 u1=lambda dvi, delta: dvi._arg(1, signed=False), 

136 # u4: Read 4 bytes as an unsigned number. 

137 u4=lambda dvi, delta: dvi._arg(4, signed=False), 

138 # s4: Read 4 bytes as a signed number. 

139 s4=lambda dvi, delta: dvi._arg(4, signed=True), 

140 # slen: Read delta bytes as a signed number, or None if delta is None. 

141 slen=lambda dvi, delta: dvi._arg(delta, signed=True) if delta else None, 

142 # slen1: Read (delta + 1) bytes as a signed number. 

143 slen1=lambda dvi, delta: dvi._arg(delta + 1, signed=True), 

144 # ulen1: Read (delta + 1) bytes as an unsigned number. 

145 ulen1=lambda dvi, delta: dvi._arg(delta + 1, signed=False), 

146 # olen1: Read (delta + 1) bytes as an unsigned number if less than 4 bytes, 

147 # as a signed number if 4 bytes. 

148 olen1=lambda dvi, delta: dvi._arg(delta + 1, signed=(delta == 3)), 

149) 

150 

151 

152def _dispatch(table, min, max=None, state=None, args=('raw',)): 

153 """ 

154 Decorator for dispatch by opcode. Sets the values in *table* 

155 from *min* to *max* to this method, adds a check that the Dvi state 

156 matches *state* if not None, reads arguments from the file according 

157 to *args*. 

158 

159 Parameters 

160 ---------- 

161 table : dict[int, callable] 

162 The dispatch table to be filled in. 

163 

164 min, max : int 

165 Range of opcodes that calls the registered function; *max* defaults to 

166 *min*. 

167 

168 state : _dvistate, optional 

169 State of the Dvi object in which these opcodes are allowed. 

170 

171 args : list[str], default: ['raw'] 

172 Sequence of argument specifications: 

173 

174 - 'raw': opcode minus minimum 

175 - 'u1': read one unsigned byte 

176 - 'u4': read four bytes, treat as an unsigned number 

177 - 's4': read four bytes, treat as a signed number 

178 - 'slen': read (opcode - minimum) bytes, treat as signed 

179 - 'slen1': read (opcode - minimum + 1) bytes, treat as signed 

180 - 'ulen1': read (opcode - minimum + 1) bytes, treat as unsigned 

181 - 'olen1': read (opcode - minimum + 1) bytes, treat as unsigned 

182 if under four bytes, signed if four bytes 

183 """ 

184 def decorate(method): 

185 get_args = [_arg_mapping[x] for x in args] 

186 

187 @wraps(method) 

188 def wrapper(self, byte): 

189 if state is not None and self.state != state: 

190 raise ValueError("state precondition failed") 

191 return method(self, *[f(self, byte-min) for f in get_args]) 

192 if max is None: 

193 table[min] = wrapper 

194 else: 

195 for i in range(min, max+1): 

196 assert table[i] is None 

197 table[i] = wrapper 

198 return wrapper 

199 return decorate 

200 

201 

202class Dvi: 

203 """ 

204 A reader for a dvi ("device-independent") file, as produced by TeX. 

205 

206 The current implementation can only iterate through pages in order, 

207 and does not even attempt to verify the postamble. 

208 

209 This class can be used as a context manager to close the underlying 

210 file upon exit. Pages can be read via iteration. Here is an overly 

211 simple way to extract text without trying to detect whitespace:: 

212 

213 >>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi: 

214 ... for page in dvi: 

215 ... print(''.join(chr(t.glyph) for t in page.text)) 

216 """ 

217 # dispatch table 

218 _dtable = [None] * 256 

219 _dispatch = partial(_dispatch, _dtable) 

220 

221 def __init__(self, filename, dpi): 

222 """ 

223 Read the data from the file named *filename* and convert 

224 TeX's internal units to units of *dpi* per inch. 

225 *dpi* only sets the units and does not limit the resolution. 

226 Use None to return TeX's internal units. 

227 """ 

228 _log.debug('Dvi: %s', filename) 

229 self.file = open(filename, 'rb') 

230 self.dpi = dpi 

231 self.fonts = {} 

232 self.state = _dvistate.pre 

233 

234 def __enter__(self): 

235 """Context manager enter method, does nothing.""" 

236 return self 

237 

238 def __exit__(self, etype, evalue, etrace): 

239 """ 

240 Context manager exit method, closes the underlying file if it is open. 

241 """ 

242 self.close() 

243 

244 def __iter__(self): 

245 """ 

246 Iterate through the pages of the file. 

247 

248 Yields 

249 ------ 

250 Page 

251 Details of all the text and box objects on the page. 

252 The Page tuple contains lists of Text and Box tuples and 

253 the page dimensions, and the Text and Box tuples contain 

254 coordinates transformed into a standard Cartesian 

255 coordinate system at the dpi value given when initializing. 

256 The coordinates are floating point numbers, but otherwise 

257 precision is not lost and coordinate values are not clipped to 

258 integers. 

259 """ 

260 while self._read(): 

261 yield self._output() 

262 

263 def close(self): 

264 """Close the underlying file if it is open.""" 

265 if not self.file.closed: 

266 self.file.close() 

267 

268 def _output(self): 

269 """ 

270 Output the text and boxes belonging to the most recent page. 

271 page = dvi._output() 

272 """ 

273 minx, miny, maxx, maxy = np.inf, np.inf, -np.inf, -np.inf 

274 maxy_pure = -np.inf 

275 for elt in self.text + self.boxes: 

276 if isinstance(elt, Box): 

277 x, y, h, w = elt 

278 e = 0 # zero depth 

279 else: # glyph 

280 x, y, font, g, w = elt 

281 h, e = font._height_depth_of(g) 

282 minx = min(minx, x) 

283 miny = min(miny, y - h) 

284 maxx = max(maxx, x + w) 

285 maxy = max(maxy, y + e) 

286 maxy_pure = max(maxy_pure, y) 

287 if self._baseline_v is not None: 

288 maxy_pure = self._baseline_v # This should normally be the case. 

289 self._baseline_v = None 

290 

291 if not self.text and not self.boxes: # Avoid infs/nans from inf+/-inf. 

292 return Page(text=[], boxes=[], width=0, height=0, descent=0) 

293 

294 if self.dpi is None: 

295 # special case for ease of debugging: output raw dvi coordinates 

296 return Page(text=self.text, boxes=self.boxes, 

297 width=maxx-minx, height=maxy_pure-miny, 

298 descent=maxy-maxy_pure) 

299 

300 # convert from TeX's "scaled points" to dpi units 

301 d = self.dpi / (72.27 * 2**16) 

302 descent = (maxy - maxy_pure) * d 

303 

304 text = [Text((x-minx)*d, (maxy-y)*d - descent, f, g, w*d) 

305 for (x, y, f, g, w) in self.text] 

306 boxes = [Box((x-minx)*d, (maxy-y)*d - descent, h*d, w*d) 

307 for (x, y, h, w) in self.boxes] 

308 

309 return Page(text=text, boxes=boxes, width=(maxx-minx)*d, 

310 height=(maxy_pure-miny)*d, descent=descent) 

311 

312 def _read(self): 

313 """ 

314 Read one page from the file. Return True if successful, 

315 False if there were no more pages. 

316 """ 

317 # Pages appear to start with the sequence 

318 # bop (begin of page) 

319 # xxx comment 

320 # <push, ..., pop> # if using chemformula 

321 # down 

322 # push 

323 # down 

324 # <push, push, xxx, right, xxx, pop, pop> # if using xcolor 

325 # down 

326 # push 

327 # down (possibly multiple) 

328 # push <= here, v is the baseline position. 

329 # etc. 

330 # (dviasm is useful to explore this structure.) 

331 # Thus, we use the vertical position at the first time the stack depth 

332 # reaches 3, while at least three "downs" have been executed (excluding 

333 # those popped out (corresponding to the chemformula preamble)), as the 

334 # baseline (the "down" count is necessary to handle xcolor). 

335 down_stack = [0] 

336 self._baseline_v = None 

337 while True: 

338 byte = self.file.read(1)[0] 

339 self._dtable[byte](self, byte) 

340 name = self._dtable[byte].__name__ 

341 if name == "_push": 

342 down_stack.append(down_stack[-1]) 

343 elif name == "_pop": 

344 down_stack.pop() 

345 elif name == "_down": 

346 down_stack[-1] += 1 

347 if (self._baseline_v is None 

348 and len(getattr(self, "stack", [])) == 3 

349 and down_stack[-1] >= 4): 

350 self._baseline_v = self.v 

351 if byte == 140: # end of page 

352 return True 

353 if self.state is _dvistate.post_post: # end of file 

354 self.close() 

355 return False 

356 

357 def _arg(self, nbytes, signed=False): 

358 """ 

359 Read and return a big-endian integer *nbytes* long. 

360 Signedness is determined by the *signed* keyword. 

361 """ 

362 return int.from_bytes(self.file.read(nbytes), "big", signed=signed) 

363 

364 @_dispatch(min=0, max=127, state=_dvistate.inpage) 

365 def _set_char_immediate(self, char): 

366 self._put_char_real(char) 

367 self.h += self.fonts[self.f]._width_of(char) 

368 

369 @_dispatch(min=128, max=131, state=_dvistate.inpage, args=('olen1',)) 

370 def _set_char(self, char): 

371 self._put_char_real(char) 

372 self.h += self.fonts[self.f]._width_of(char) 

373 

374 @_dispatch(132, state=_dvistate.inpage, args=('s4', 's4')) 

375 def _set_rule(self, a, b): 

376 self._put_rule_real(a, b) 

377 self.h += b 

378 

379 @_dispatch(min=133, max=136, state=_dvistate.inpage, args=('olen1',)) 

380 def _put_char(self, char): 

381 self._put_char_real(char) 

382 

383 def _put_char_real(self, char): 

384 font = self.fonts[self.f] 

385 if font._vf is None: 

386 self.text.append(Text(self.h, self.v, font, char, 

387 font._width_of(char))) 

388 else: 

389 scale = font._scale 

390 for x, y, f, g, w in font._vf[char].text: 

391 newf = DviFont(scale=_mul2012(scale, f._scale), 

392 tfm=f._tfm, texname=f.texname, vf=f._vf) 

393 self.text.append(Text(self.h + _mul2012(x, scale), 

394 self.v + _mul2012(y, scale), 

395 newf, g, newf._width_of(g))) 

396 self.boxes.extend([Box(self.h + _mul2012(x, scale), 

397 self.v + _mul2012(y, scale), 

398 _mul2012(a, scale), _mul2012(b, scale)) 

399 for x, y, a, b in font._vf[char].boxes]) 

400 

401 @_dispatch(137, state=_dvistate.inpage, args=('s4', 's4')) 

402 def _put_rule(self, a, b): 

403 self._put_rule_real(a, b) 

404 

405 def _put_rule_real(self, a, b): 

406 if a > 0 and b > 0: 

407 self.boxes.append(Box(self.h, self.v, a, b)) 

408 

409 @_dispatch(138) 

410 def _nop(self, _): 

411 pass 

412 

413 @_dispatch(139, state=_dvistate.outer, args=('s4',)*11) 

414 def _bop(self, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, p): 

415 self.state = _dvistate.inpage 

416 self.h, self.v, self.w, self.x, self.y, self.z = 0, 0, 0, 0, 0, 0 

417 self.stack = [] 

418 self.text = [] # list of Text objects 

419 self.boxes = [] # list of Box objects 

420 

421 @_dispatch(140, state=_dvistate.inpage) 

422 def _eop(self, _): 

423 self.state = _dvistate.outer 

424 del self.h, self.v, self.w, self.x, self.y, self.z, self.stack 

425 

426 @_dispatch(141, state=_dvistate.inpage) 

427 def _push(self, _): 

428 self.stack.append((self.h, self.v, self.w, self.x, self.y, self.z)) 

429 

430 @_dispatch(142, state=_dvistate.inpage) 

431 def _pop(self, _): 

432 self.h, self.v, self.w, self.x, self.y, self.z = self.stack.pop() 

433 

434 @_dispatch(min=143, max=146, state=_dvistate.inpage, args=('slen1',)) 

435 def _right(self, b): 

436 self.h += b 

437 

438 @_dispatch(min=147, max=151, state=_dvistate.inpage, args=('slen',)) 

439 def _right_w(self, new_w): 

440 if new_w is not None: 

441 self.w = new_w 

442 self.h += self.w 

443 

444 @_dispatch(min=152, max=156, state=_dvistate.inpage, args=('slen',)) 

445 def _right_x(self, new_x): 

446 if new_x is not None: 

447 self.x = new_x 

448 self.h += self.x 

449 

450 @_dispatch(min=157, max=160, state=_dvistate.inpage, args=('slen1',)) 

451 def _down(self, a): 

452 self.v += a 

453 

454 @_dispatch(min=161, max=165, state=_dvistate.inpage, args=('slen',)) 

455 def _down_y(self, new_y): 

456 if new_y is not None: 

457 self.y = new_y 

458 self.v += self.y 

459 

460 @_dispatch(min=166, max=170, state=_dvistate.inpage, args=('slen',)) 

461 def _down_z(self, new_z): 

462 if new_z is not None: 

463 self.z = new_z 

464 self.v += self.z 

465 

466 @_dispatch(min=171, max=234, state=_dvistate.inpage) 

467 def _fnt_num_immediate(self, k): 

468 self.f = k 

469 

470 @_dispatch(min=235, max=238, state=_dvistate.inpage, args=('olen1',)) 

471 def _fnt_num(self, new_f): 

472 self.f = new_f 

473 

474 @_dispatch(min=239, max=242, args=('ulen1',)) 

475 def _xxx(self, datalen): 

476 special = self.file.read(datalen) 

477 _log.debug( 

478 'Dvi._xxx: encountered special: %s', 

479 ''.join([chr(ch) if 32 <= ch < 127 else '<%02x>' % ch 

480 for ch in special])) 

481 

482 @_dispatch(min=243, max=246, args=('olen1', 'u4', 'u4', 'u4', 'u1', 'u1')) 

483 def _fnt_def(self, k, c, s, d, a, l): 

484 self._fnt_def_real(k, c, s, d, a, l) 

485 

486 def _fnt_def_real(self, k, c, s, d, a, l): 

487 n = self.file.read(a + l) 

488 fontname = n[-l:].decode('ascii') 

489 tfm = _tfmfile(fontname) 

490 if c != 0 and tfm.checksum != 0 and c != tfm.checksum: 

491 raise ValueError('tfm checksum mismatch: %s' % n) 

492 try: 

493 vf = _vffile(fontname) 

494 except FileNotFoundError: 

495 vf = None 

496 self.fonts[k] = DviFont(scale=s, tfm=tfm, texname=n, vf=vf) 

497 

498 @_dispatch(247, state=_dvistate.pre, args=('u1', 'u4', 'u4', 'u4', 'u1')) 

499 def _pre(self, i, num, den, mag, k): 

500 self.file.read(k) # comment in the dvi file 

501 if i != 2: 

502 raise ValueError("Unknown dvi format %d" % i) 

503 if num != 25400000 or den != 7227 * 2**16: 

504 raise ValueError("Nonstandard units in dvi file") 

505 # meaning: TeX always uses those exact values, so it 

506 # should be enough for us to support those 

507 # (There are 72.27 pt to an inch so 7227 pt = 

508 # 7227 * 2**16 sp to 100 in. The numerator is multiplied 

509 # by 10^5 to get units of 10**-7 meters.) 

510 if mag != 1000: 

511 raise ValueError("Nonstandard magnification in dvi file") 

512 # meaning: LaTeX seems to frown on setting \mag, so 

513 # I think we can assume this is constant 

514 self.state = _dvistate.outer 

515 

516 @_dispatch(248, state=_dvistate.outer) 

517 def _post(self, _): 

518 self.state = _dvistate.post_post 

519 # TODO: actually read the postamble and finale? 

520 # currently post_post just triggers closing the file 

521 

522 @_dispatch(249) 

523 def _post_post(self, _): 

524 raise NotImplementedError 

525 

526 @_dispatch(min=250, max=255) 

527 def _malformed(self, offset): 

528 raise ValueError(f"unknown command: byte {250 + offset}") 

529 

530 

531class DviFont: 

532 """ 

533 Encapsulation of a font that a DVI file can refer to. 

534 

535 This class holds a font's texname and size, supports comparison, 

536 and knows the widths of glyphs in the same units as the AFM file. 

537 There are also internal attributes (for use by dviread.py) that 

538 are *not* used for comparison. 

539 

540 The size is in Adobe points (converted from TeX points). 

541 

542 Parameters 

543 ---------- 

544 scale : float 

545 Factor by which the font is scaled from its natural size. 

546 tfm : Tfm 

547 TeX font metrics for this font 

548 texname : bytes 

549 Name of the font as used internally by TeX and friends, as an ASCII 

550 bytestring. This is usually very different from any external font 

551 names; `PsfontsMap` can be used to find the external name of the font. 

552 vf : Vf 

553 A TeX "virtual font" file, or None if this font is not virtual. 

554 

555 Attributes 

556 ---------- 

557 texname : bytes 

558 size : float 

559 Size of the font in Adobe points, converted from the slightly 

560 smaller TeX points. 

561 widths : list 

562 Widths of glyphs in glyph-space units, typically 1/1000ths of 

563 the point size. 

564 

565 """ 

566 __slots__ = ('texname', 'size', 'widths', '_scale', '_vf', '_tfm') 

567 

568 def __init__(self, scale, tfm, texname, vf): 

569 _api.check_isinstance(bytes, texname=texname) 

570 self._scale = scale 

571 self._tfm = tfm 

572 self.texname = texname 

573 self._vf = vf 

574 self.size = scale * (72.0 / (72.27 * 2**16)) 

575 try: 

576 nchars = max(tfm.width) + 1 

577 except ValueError: 

578 nchars = 0 

579 self.widths = [(1000*tfm.width.get(char, 0)) >> 20 

580 for char in range(nchars)] 

581 

582 def __eq__(self, other): 

583 return (type(self) is type(other) 

584 and self.texname == other.texname and self.size == other.size) 

585 

586 def __ne__(self, other): 

587 return not self.__eq__(other) 

588 

589 def __repr__(self): 

590 return f"<{type(self).__name__}: {self.texname}>" 

591 

592 def _width_of(self, char): 

593 """Width of char in dvi units.""" 

594 width = self._tfm.width.get(char, None) 

595 if width is not None: 

596 return _mul2012(width, self._scale) 

597 _log.debug('No width for char %d in font %s.', char, self.texname) 

598 return 0 

599 

600 def _height_depth_of(self, char): 

601 """Height and depth of char in dvi units.""" 

602 result = [] 

603 for metric, name in ((self._tfm.height, "height"), 

604 (self._tfm.depth, "depth")): 

605 value = metric.get(char, None) 

606 if value is None: 

607 _log.debug('No %s for char %d in font %s', 

608 name, char, self.texname) 

609 result.append(0) 

610 else: 

611 result.append(_mul2012(value, self._scale)) 

612 # cmsyXX (symbols font) glyph 0 ("minus") has a nonzero descent 

613 # so that TeX aligns equations properly 

614 # (https://tex.stackexchange.com/q/526103/) 

615 # but we actually care about the rasterization depth to align 

616 # the dvipng-generated images. 

617 if re.match(br'^cmsy\d+$', self.texname) and char == 0: 

618 result[-1] = 0 

619 return result 

620 

621 

622class Vf(Dvi): 

623 r""" 

624 A virtual font (\*.vf file) containing subroutines for dvi files. 

625 

626 Parameters 

627 ---------- 

628 filename : str or path-like 

629 

630 Notes 

631 ----- 

632 The virtual font format is a derivative of dvi: 

633 http://mirrors.ctan.org/info/knuth/virtual-fonts 

634 This class reuses some of the machinery of `Dvi` 

635 but replaces the `_read` loop and dispatch mechanism. 

636 

637 Examples 

638 -------- 

639 :: 

640 

641 vf = Vf(filename) 

642 glyph = vf[code] 

643 glyph.text, glyph.boxes, glyph.width 

644 """ 

645 

646 def __init__(self, filename): 

647 super().__init__(filename, 0) 

648 try: 

649 self._first_font = None 

650 self._chars = {} 

651 self._read() 

652 finally: 

653 self.close() 

654 

655 def __getitem__(self, code): 

656 return self._chars[code] 

657 

658 def _read(self): 

659 """ 

660 Read one page from the file. Return True if successful, 

661 False if there were no more pages. 

662 """ 

663 packet_char, packet_ends = None, None 

664 packet_len, packet_width = None, None 

665 while True: 

666 byte = self.file.read(1)[0] 

667 # If we are in a packet, execute the dvi instructions 

668 if self.state is _dvistate.inpage: 

669 byte_at = self.file.tell()-1 

670 if byte_at == packet_ends: 

671 self._finalize_packet(packet_char, packet_width) 

672 packet_len, packet_char, packet_width = None, None, None 

673 # fall through to out-of-packet code 

674 elif byte_at > packet_ends: 

675 raise ValueError("Packet length mismatch in vf file") 

676 else: 

677 if byte in (139, 140) or byte >= 243: 

678 raise ValueError( 

679 "Inappropriate opcode %d in vf file" % byte) 

680 Dvi._dtable[byte](self, byte) 

681 continue 

682 

683 # We are outside a packet 

684 if byte < 242: # a short packet (length given by byte) 

685 packet_len = byte 

686 packet_char, packet_width = self._arg(1), self._arg(3) 

687 packet_ends = self._init_packet(byte) 

688 self.state = _dvistate.inpage 

689 elif byte == 242: # a long packet 

690 packet_len, packet_char, packet_width = \ 

691 [self._arg(x) for x in (4, 4, 4)] 

692 self._init_packet(packet_len) 

693 elif 243 <= byte <= 246: 

694 k = self._arg(byte - 242, byte == 246) 

695 c, s, d, a, l = [self._arg(x) for x in (4, 4, 4, 1, 1)] 

696 self._fnt_def_real(k, c, s, d, a, l) 

697 if self._first_font is None: 

698 self._first_font = k 

699 elif byte == 247: # preamble 

700 i, k = self._arg(1), self._arg(1) 

701 x = self.file.read(k) 

702 cs, ds = self._arg(4), self._arg(4) 

703 self._pre(i, x, cs, ds) 

704 elif byte == 248: # postamble (just some number of 248s) 

705 break 

706 else: 

707 raise ValueError("Unknown vf opcode %d" % byte) 

708 

709 def _init_packet(self, pl): 

710 if self.state != _dvistate.outer: 

711 raise ValueError("Misplaced packet in vf file") 

712 self.h, self.v, self.w, self.x, self.y, self.z = 0, 0, 0, 0, 0, 0 

713 self.stack, self.text, self.boxes = [], [], [] 

714 self.f = self._first_font 

715 return self.file.tell() + pl 

716 

717 def _finalize_packet(self, packet_char, packet_width): 

718 self._chars[packet_char] = Page( 

719 text=self.text, boxes=self.boxes, width=packet_width, 

720 height=None, descent=None) 

721 self.state = _dvistate.outer 

722 

723 def _pre(self, i, x, cs, ds): 

724 if self.state is not _dvistate.pre: 

725 raise ValueError("pre command in middle of vf file") 

726 if i != 202: 

727 raise ValueError("Unknown vf format %d" % i) 

728 if len(x): 

729 _log.debug('vf file comment: %s', x) 

730 self.state = _dvistate.outer 

731 # cs = checksum, ds = design size 

732 

733 

734def _mul2012(num1, num2): 

735 """Multiply two numbers in 20.12 fixed point format.""" 

736 # Separated into a function because >> has surprising precedence 

737 return (num1*num2) >> 20 

738 

739 

740class Tfm: 

741 """ 

742 A TeX Font Metric file. 

743 

744 This implementation covers only the bare minimum needed by the Dvi class. 

745 

746 Parameters 

747 ---------- 

748 filename : str or path-like 

749 

750 Attributes 

751 ---------- 

752 checksum : int 

753 Used for verifying against the dvi file. 

754 design_size : int 

755 Design size of the font (unknown units) 

756 width, height, depth : dict 

757 Dimensions of each character, need to be scaled by the factor 

758 specified in the dvi file. These are dicts because indexing may 

759 not start from 0. 

760 """ 

761 __slots__ = ('checksum', 'design_size', 'width', 'height', 'depth') 

762 

763 def __init__(self, filename): 

764 _log.debug('opening tfm file %s', filename) 

765 with open(filename, 'rb') as file: 

766 header1 = file.read(24) 

767 lh, bc, ec, nw, nh, nd = struct.unpack('!6H', header1[2:14]) 

768 _log.debug('lh=%d, bc=%d, ec=%d, nw=%d, nh=%d, nd=%d', 

769 lh, bc, ec, nw, nh, nd) 

770 header2 = file.read(4*lh) 

771 self.checksum, self.design_size = struct.unpack('!2I', header2[:8]) 

772 # there is also encoding information etc. 

773 char_info = file.read(4*(ec-bc+1)) 

774 widths = struct.unpack(f'!{nw}i', file.read(4*nw)) 

775 heights = struct.unpack(f'!{nh}i', file.read(4*nh)) 

776 depths = struct.unpack(f'!{nd}i', file.read(4*nd)) 

777 self.width, self.height, self.depth = {}, {}, {} 

778 for idx, char in enumerate(range(bc, ec+1)): 

779 byte0 = char_info[4*idx] 

780 byte1 = char_info[4*idx+1] 

781 self.width[char] = widths[byte0] 

782 self.height[char] = heights[byte1 >> 4] 

783 self.depth[char] = depths[byte1 & 0xf] 

784 

785 

786PsFont = namedtuple('PsFont', 'texname psname effects encoding filename') 

787 

788 

789class PsfontsMap: 

790 """ 

791 A psfonts.map formatted file, mapping TeX fonts to PS fonts. 

792 

793 Parameters 

794 ---------- 

795 filename : str or path-like 

796 

797 Notes 

798 ----- 

799 For historical reasons, TeX knows many Type-1 fonts by different 

800 names than the outside world. (For one thing, the names have to 

801 fit in eight characters.) Also, TeX's native fonts are not Type-1 

802 but Metafont, which is nontrivial to convert to PostScript except 

803 as a bitmap. While high-quality conversions to Type-1 format exist 

804 and are shipped with modern TeX distributions, we need to know 

805 which Type-1 fonts are the counterparts of which native fonts. For 

806 these reasons a mapping is needed from internal font names to font 

807 file names. 

808 

809 A texmf tree typically includes mapping files called e.g. 

810 :file:`psfonts.map`, :file:`pdftex.map`, or :file:`dvipdfm.map`. 

811 The file :file:`psfonts.map` is used by :program:`dvips`, 

812 :file:`pdftex.map` by :program:`pdfTeX`, and :file:`dvipdfm.map` 

813 by :program:`dvipdfm`. :file:`psfonts.map` might avoid embedding 

814 the 35 PostScript fonts (i.e., have no filename for them, as in 

815 the Times-Bold example above), while the pdf-related files perhaps 

816 only avoid the "Base 14" pdf fonts. But the user may have 

817 configured these files differently. 

818 

819 Examples 

820 -------- 

821 >>> map = PsfontsMap(find_tex_file('pdftex.map')) 

822 >>> entry = map[b'ptmbo8r'] 

823 >>> entry.texname 

824 b'ptmbo8r' 

825 >>> entry.psname 

826 b'Times-Bold' 

827 >>> entry.encoding 

828 '/usr/local/texlive/2008/texmf-dist/fonts/enc/dvips/base/8r.enc' 

829 >>> entry.effects 

830 {'slant': 0.16700000000000001} 

831 >>> entry.filename 

832 """ 

833 __slots__ = ('_filename', '_unparsed', '_parsed') 

834 

835 # Create a filename -> PsfontsMap cache, so that calling 

836 # `PsfontsMap(filename)` with the same filename a second time immediately 

837 # returns the same object. 

838 @lru_cache 

839 def __new__(cls, filename): 

840 self = object.__new__(cls) 

841 self._filename = os.fsdecode(filename) 

842 # Some TeX distributions have enormous pdftex.map files which would 

843 # take hundreds of milliseconds to parse, but it is easy enough to just 

844 # store the unparsed lines (keyed by the first word, which is the 

845 # texname) and parse them on-demand. 

846 with open(filename, 'rb') as file: 

847 self._unparsed = {} 

848 for line in file: 

849 tfmname = line.split(b' ', 1)[0] 

850 self._unparsed.setdefault(tfmname, []).append(line) 

851 self._parsed = {} 

852 return self 

853 

854 def __getitem__(self, texname): 

855 assert isinstance(texname, bytes) 

856 if texname in self._unparsed: 

857 for line in self._unparsed.pop(texname): 

858 if self._parse_and_cache_line(line): 

859 break 

860 try: 

861 return self._parsed[texname] 

862 except KeyError: 

863 raise LookupError( 

864 f"An associated PostScript font (required by Matplotlib) " 

865 f"could not be found for TeX font {texname.decode('ascii')!r} " 

866 f"in {self._filename!r}; this problem can often be solved by " 

867 f"installing a suitable PostScript font package in your TeX " 

868 f"package manager") from None 

869 

870 def _parse_and_cache_line(self, line): 

871 """ 

872 Parse a line in the font mapping file. 

873 

874 The format is (partially) documented at 

875 http://mirrors.ctan.org/systems/doc/pdftex/manual/pdftex-a.pdf 

876 https://tug.org/texinfohtml/dvips.html#psfonts_002emap 

877 Each line can have the following fields: 

878 

879 - tfmname (first, only required field), 

880 - psname (defaults to tfmname, must come immediately after tfmname if 

881 present), 

882 - fontflags (integer, must come immediately after psname if present, 

883 ignored by us), 

884 - special (SlantFont and ExtendFont, only field that is double-quoted), 

885 - fontfile, encodingfile (optional, prefixed by <, <<, or <[; << always 

886 precedes a font, <[ always precedes an encoding, < can precede either 

887 but then an encoding file must have extension .enc; < and << also 

888 request different font subsetting behaviors but we ignore that; < can 

889 be separated from the filename by whitespace). 

890 

891 special, fontfile, and encodingfile can appear in any order. 

892 """ 

893 # If the map file specifies multiple encodings for a font, we 

894 # follow pdfTeX in choosing the last one specified. Such 

895 # entries are probably mistakes but they have occurred. 

896 # https://tex.stackexchange.com/q/10826/ 

897 

898 if not line or line.startswith((b" ", b"%", b"*", b";", b"#")): 

899 return 

900 tfmname = basename = special = encodingfile = fontfile = None 

901 is_subsetted = is_t1 = is_truetype = False 

902 matches = re.finditer(br'"([^"]*)(?:"|$)|(\S+)', line) 

903 for match in matches: 

904 quoted, unquoted = match.groups() 

905 if unquoted: 

906 if unquoted.startswith(b"<<"): # font 

907 fontfile = unquoted[2:] 

908 elif unquoted.startswith(b"<["): # encoding 

909 encodingfile = unquoted[2:] 

910 elif unquoted.startswith(b"<"): # font or encoding 

911 word = ( 

912 # <foo => foo 

913 unquoted[1:] 

914 # < by itself => read the next word 

915 or next(filter(None, next(matches).groups()))) 

916 if word.endswith(b".enc"): 

917 encodingfile = word 

918 else: 

919 fontfile = word 

920 is_subsetted = True 

921 elif tfmname is None: 

922 tfmname = unquoted 

923 elif basename is None: 

924 basename = unquoted 

925 elif quoted: 

926 special = quoted 

927 effects = {} 

928 if special: 

929 words = reversed(special.split()) 

930 for word in words: 

931 if word == b"SlantFont": 

932 effects["slant"] = float(next(words)) 

933 elif word == b"ExtendFont": 

934 effects["extend"] = float(next(words)) 

935 

936 # Verify some properties of the line that would cause it to be ignored 

937 # otherwise. 

938 if fontfile is not None: 

939 if fontfile.endswith((b".ttf", b".ttc")): 

940 is_truetype = True 

941 elif not fontfile.endswith(b".otf"): 

942 is_t1 = True 

943 elif basename is not None: 

944 is_t1 = True 

945 if is_truetype and is_subsetted and encodingfile is None: 

946 return 

947 if not is_t1 and ("slant" in effects or "extend" in effects): 

948 return 

949 if abs(effects.get("slant", 0)) > 1: 

950 return 

951 if abs(effects.get("extend", 0)) > 2: 

952 return 

953 

954 if basename is None: 

955 basename = tfmname 

956 if encodingfile is not None: 

957 encodingfile = find_tex_file(encodingfile) 

958 if fontfile is not None: 

959 fontfile = find_tex_file(fontfile) 

960 self._parsed[tfmname] = PsFont( 

961 texname=tfmname, psname=basename, effects=effects, 

962 encoding=encodingfile, filename=fontfile) 

963 return True 

964 

965 

966def _parse_enc(path): 

967 r""" 

968 Parse a \*.enc file referenced from a psfonts.map style file. 

969 

970 The format supported by this function is a tiny subset of PostScript. 

971 

972 Parameters 

973 ---------- 

974 path : `os.PathLike` 

975 

976 Returns 

977 ------- 

978 list 

979 The nth entry of the list is the PostScript glyph name of the nth 

980 glyph. 

981 """ 

982 no_comments = re.sub("%.*", "", Path(path).read_text(encoding="ascii")) 

983 array = re.search(r"(?s)\[(.*)\]", no_comments).group(1) 

984 lines = [line for line in array.split() if line] 

985 if all(line.startswith("/") for line in lines): 

986 return [line[1:] for line in lines] 

987 else: 

988 raise ValueError(f"Failed to parse {path} as Postscript encoding") 

989 

990 

991class _LuatexKpsewhich: 

992 @lru_cache # A singleton. 

993 def __new__(cls): 

994 self = object.__new__(cls) 

995 self._proc = self._new_proc() 

996 return self 

997 

998 def _new_proc(self): 

999 return subprocess.Popen( 

1000 ["luatex", "--luaonly", 

1001 str(cbook._get_data_path("kpsewhich.lua"))], 

1002 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 

1003 

1004 def search(self, filename): 

1005 if self._proc.poll() is not None: # Dead, restart it. 

1006 self._proc = self._new_proc() 

1007 self._proc.stdin.write(os.fsencode(filename) + b"\n") 

1008 self._proc.stdin.flush() 

1009 out = self._proc.stdout.readline().rstrip() 

1010 return None if out == b"nil" else os.fsdecode(out) 

1011 

1012 

1013@lru_cache 

1014def find_tex_file(filename): 

1015 """ 

1016 Find a file in the texmf tree using kpathsea_. 

1017 

1018 The kpathsea library, provided by most existing TeX distributions, both 

1019 on Unix-like systems and on Windows (MikTeX), is invoked via a long-lived 

1020 luatex process if luatex is installed, or via kpsewhich otherwise. 

1021 

1022 .. _kpathsea: https://www.tug.org/kpathsea/ 

1023 

1024 Parameters 

1025 ---------- 

1026 filename : str or path-like 

1027 

1028 Raises 

1029 ------ 

1030 FileNotFoundError 

1031 If the file is not found. 

1032 """ 

1033 

1034 # we expect these to always be ascii encoded, but use utf-8 

1035 # out of caution 

1036 if isinstance(filename, bytes): 

1037 filename = filename.decode('utf-8', errors='replace') 

1038 

1039 try: 

1040 lk = _LuatexKpsewhich() 

1041 except FileNotFoundError: 

1042 lk = None # Fallback to directly calling kpsewhich, as below. 

1043 

1044 if lk: 

1045 path = lk.search(filename) 

1046 else: 

1047 if sys.platform == 'win32': 

1048 # On Windows only, kpathsea can use utf-8 for cmd args and output. 

1049 # The `command_line_encoding` environment variable is set to force 

1050 # it to always use utf-8 encoding. See Matplotlib issue #11848. 

1051 kwargs = {'env': {**os.environ, 'command_line_encoding': 'utf-8'}, 

1052 'encoding': 'utf-8'} 

1053 else: # On POSIX, run through the equivalent of os.fsdecode(). 

1054 kwargs = {'encoding': sys.getfilesystemencoding(), 

1055 'errors': 'surrogateescape'} 

1056 

1057 try: 

1058 path = (cbook._check_and_log_subprocess(['kpsewhich', filename], 

1059 _log, **kwargs) 

1060 .rstrip('\n')) 

1061 except (FileNotFoundError, RuntimeError): 

1062 path = None 

1063 

1064 if path: 

1065 return path 

1066 else: 

1067 raise FileNotFoundError( 

1068 f"Matplotlib's TeX implementation searched for a file named " 

1069 f"{filename!r} in your texmf tree, but could not find it") 

1070 

1071 

1072@lru_cache 

1073def _fontfile(cls, suffix, texname): 

1074 return cls(find_tex_file(texname + suffix)) 

1075 

1076 

1077_tfmfile = partial(_fontfile, Tfm, ".tfm") 

1078_vffile = partial(_fontfile, Vf, ".vf") 

1079 

1080 

1081if __name__ == '__main__': 

1082 from argparse import ArgumentParser 

1083 import itertools 

1084 

1085 parser = ArgumentParser() 

1086 parser.add_argument("filename") 

1087 parser.add_argument("dpi", nargs="?", type=float, default=None) 

1088 args = parser.parse_args() 

1089 with Dvi(args.filename, args.dpi) as dvi: 

1090 fontmap = PsfontsMap(find_tex_file('pdftex.map')) 

1091 for page in dvi: 

1092 print(f"=== new page === " 

1093 f"(w: {page.width}, h: {page.height}, d: {page.descent})") 

1094 for font, group in itertools.groupby( 

1095 page.text, lambda text: text.font): 

1096 print(f"font: {font.texname.decode('latin-1')!r}\t" 

1097 f"scale: {font._scale / 2 ** 20}") 

1098 print("x", "y", "glyph", "chr", "w", "(glyphs)", sep="\t") 

1099 for text in group: 

1100 print(text.x, text.y, text.glyph, 

1101 chr(text.glyph) if chr(text.glyph).isprintable() 

1102 else ".", 

1103 text.width, sep="\t") 

1104 if page.boxes: 

1105 print("x", "y", "h", "w", "", "(boxes)", sep="\t") 

1106 for box in page.boxes: 

1107 print(box.x, box.y, box.height, box.width, sep="\t")