Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/matplotlib/_afm.py: 63%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

242 statements  

1""" 

2A python interface to Adobe Font Metrics Files. 

3 

4Although a number of other Python implementations exist, and may be more 

5complete than this, it was decided not to go with them because they were 

6either: 

7 

81) copyrighted or used a non-BSD compatible license 

92) had too many dependencies and a free standing lib was needed 

103) did more than needed and it was easier to write afresh rather than 

11 figure out how to get just what was needed. 

12 

13It is pretty easy to use, and has no external dependencies: 

14 

15>>> import matplotlib as mpl 

16>>> from pathlib import Path 

17>>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm') 

18>>> 

19>>> from matplotlib.afm import AFM 

20>>> with afm_path.open('rb') as fh: 

21... afm = AFM(fh) 

22>>> afm.string_width_height('What the heck?') 

23(6220.0, 694) 

24>>> afm.get_fontname() 

25'Times-Roman' 

26>>> afm.get_kern_dist('A', 'f') 

270 

28>>> afm.get_kern_dist('A', 'y') 

29-92.0 

30>>> afm.get_bbox_char('!') 

31[130, -9, 238, 676] 

32 

33As in the Adobe Font Metrics File Format Specification, all dimensions 

34are given in units of 1/1000 of the scale factor (point size) of the font 

35being used. 

36""" 

37 

38from collections import namedtuple 

39import logging 

40import re 

41 

42from ._mathtext_data import uni2type1 

43 

44 

45_log = logging.getLogger(__name__) 

46 

47 

48def _to_int(x): 

49 # Some AFM files have floats where we are expecting ints -- there is 

50 # probably a better way to handle this (support floats, round rather than 

51 # truncate). But I don't know what the best approach is now and this 

52 # change to _to_int should at least prevent Matplotlib from crashing on 

53 # these. JDH (2009-11-06) 

54 return int(float(x)) 

55 

56 

57def _to_float(x): 

58 # Some AFM files use "," instead of "." as decimal separator -- this 

59 # shouldn't be ambiguous (unless someone is wicked enough to use "," as 

60 # thousands separator...). 

61 if isinstance(x, bytes): 

62 # Encoding doesn't really matter -- if we have codepoints >127 the call 

63 # to float() will error anyways. 

64 x = x.decode('latin-1') 

65 return float(x.replace(',', '.')) 

66 

67 

68def _to_str(x): 

69 return x.decode('utf8') 

70 

71 

72def _to_list_of_ints(s): 

73 s = s.replace(b',', b' ') 

74 return [_to_int(val) for val in s.split()] 

75 

76 

77def _to_list_of_floats(s): 

78 return [_to_float(val) for val in s.split()] 

79 

80 

81def _to_bool(s): 

82 if s.lower().strip() in (b'false', b'0', b'no'): 

83 return False 

84 else: 

85 return True 

86 

87 

88def _parse_header(fh): 

89 """ 

90 Read the font metrics header (up to the char metrics) and returns 

91 a dictionary mapping *key* to *val*. *val* will be converted to the 

92 appropriate python type as necessary; e.g.: 

93 

94 * 'False'->False 

95 * '0'->0 

96 * '-168 -218 1000 898'-> [-168, -218, 1000, 898] 

97 

98 Dictionary keys are 

99 

100 StartFontMetrics, FontName, FullName, FamilyName, Weight, 

101 ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition, 

102 UnderlineThickness, Version, Notice, EncodingScheme, CapHeight, 

103 XHeight, Ascender, Descender, StartCharMetrics 

104 """ 

105 header_converters = { 

106 b'StartFontMetrics': _to_float, 

107 b'FontName': _to_str, 

108 b'FullName': _to_str, 

109 b'FamilyName': _to_str, 

110 b'Weight': _to_str, 

111 b'ItalicAngle': _to_float, 

112 b'IsFixedPitch': _to_bool, 

113 b'FontBBox': _to_list_of_ints, 

114 b'UnderlinePosition': _to_float, 

115 b'UnderlineThickness': _to_float, 

116 b'Version': _to_str, 

117 # Some AFM files have non-ASCII characters (which are not allowed by 

118 # the spec). Given that there is actually no public API to even access 

119 # this field, just return it as straight bytes. 

120 b'Notice': lambda x: x, 

121 b'EncodingScheme': _to_str, 

122 b'CapHeight': _to_float, # Is the second version a mistake, or 

123 b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS 

124 b'XHeight': _to_float, 

125 b'Ascender': _to_float, 

126 b'Descender': _to_float, 

127 b'StdHW': _to_float, 

128 b'StdVW': _to_float, 

129 b'StartCharMetrics': _to_int, 

130 b'CharacterSet': _to_str, 

131 b'Characters': _to_int, 

132 } 

133 d = {} 

134 first_line = True 

135 for line in fh: 

136 line = line.rstrip() 

137 if line.startswith(b'Comment'): 

138 continue 

139 lst = line.split(b' ', 1) 

140 key = lst[0] 

141 if first_line: 

142 # AFM spec, Section 4: The StartFontMetrics keyword 

143 # [followed by a version number] must be the first line in 

144 # the file, and the EndFontMetrics keyword must be the 

145 # last non-empty line in the file. We just check the 

146 # first header entry. 

147 if key != b'StartFontMetrics': 

148 raise RuntimeError('Not an AFM file') 

149 first_line = False 

150 if len(lst) == 2: 

151 val = lst[1] 

152 else: 

153 val = b'' 

154 try: 

155 converter = header_converters[key] 

156 except KeyError: 

157 _log.error("Found an unknown keyword in AFM header (was %r)", key) 

158 continue 

159 try: 

160 d[key] = converter(val) 

161 except ValueError: 

162 _log.error('Value error parsing header in AFM: %s, %s', key, val) 

163 continue 

164 if key == b'StartCharMetrics': 

165 break 

166 else: 

167 raise RuntimeError('Bad parse') 

168 return d 

169 

170 

171CharMetrics = namedtuple('CharMetrics', 'width, name, bbox') 

172CharMetrics.__doc__ = """ 

173 Represents the character metrics of a single character. 

174 

175 Notes 

176 ----- 

177 The fields do currently only describe a subset of character metrics 

178 information defined in the AFM standard. 

179 """ 

180CharMetrics.width.__doc__ = """The character width (WX).""" 

181CharMetrics.name.__doc__ = """The character name (N).""" 

182CharMetrics.bbox.__doc__ = """ 

183 The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*).""" 

184 

185 

186def _parse_char_metrics(fh): 

187 """ 

188 Parse the given filehandle for character metrics information and return 

189 the information as dicts. 

190 

191 It is assumed that the file cursor is on the line behind 

192 'StartCharMetrics'. 

193 

194 Returns 

195 ------- 

196 ascii_d : dict 

197 A mapping "ASCII num of the character" to `.CharMetrics`. 

198 name_d : dict 

199 A mapping "character name" to `.CharMetrics`. 

200 

201 Notes 

202 ----- 

203 This function is incomplete per the standard, but thus far parses 

204 all the sample afm files tried. 

205 """ 

206 required_keys = {'C', 'WX', 'N', 'B'} 

207 

208 ascii_d = {} 

209 name_d = {} 

210 for line in fh: 

211 # We are defensively letting values be utf8. The spec requires 

212 # ascii, but there are non-compliant fonts in circulation 

213 line = _to_str(line.rstrip()) # Convert from byte-literal 

214 if line.startswith('EndCharMetrics'): 

215 return ascii_d, name_d 

216 # Split the metric line into a dictionary, keyed by metric identifiers 

217 vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s) 

218 # There may be other metrics present, but only these are needed 

219 if not required_keys.issubset(vals): 

220 raise RuntimeError('Bad char metrics line: %s' % line) 

221 num = _to_int(vals['C']) 

222 wx = _to_float(vals['WX']) 

223 name = vals['N'] 

224 bbox = _to_list_of_floats(vals['B']) 

225 bbox = list(map(int, bbox)) 

226 metrics = CharMetrics(wx, name, bbox) 

227 # Workaround: If the character name is 'Euro', give it the 

228 # corresponding character code, according to WinAnsiEncoding (see PDF 

229 # Reference). 

230 if name == 'Euro': 

231 num = 128 

232 elif name == 'minus': 

233 num = ord("\N{MINUS SIGN}") # 0x2212 

234 if num != -1: 

235 ascii_d[num] = metrics 

236 name_d[name] = metrics 

237 raise RuntimeError('Bad parse') 

238 

239 

240def _parse_kern_pairs(fh): 

241 """ 

242 Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and 

243 values are the kern pair value. For example, a kern pairs line like 

244 ``KPX A y -50`` 

245 

246 will be represented as:: 

247 

248 d[ ('A', 'y') ] = -50 

249 

250 """ 

251 

252 line = next(fh) 

253 if not line.startswith(b'StartKernPairs'): 

254 raise RuntimeError('Bad start of kern pairs data: %s' % line) 

255 

256 d = {} 

257 for line in fh: 

258 line = line.rstrip() 

259 if not line: 

260 continue 

261 if line.startswith(b'EndKernPairs'): 

262 next(fh) # EndKernData 

263 return d 

264 vals = line.split() 

265 if len(vals) != 4 or vals[0] != b'KPX': 

266 raise RuntimeError('Bad kern pairs line: %s' % line) 

267 c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3]) 

268 d[(c1, c2)] = val 

269 raise RuntimeError('Bad kern pairs parse') 

270 

271 

272CompositePart = namedtuple('CompositePart', 'name, dx, dy') 

273CompositePart.__doc__ = """ 

274 Represents the information on a composite element of a composite char.""" 

275CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'.""" 

276CompositePart.dx.__doc__ = """x-displacement of the part from the origin.""" 

277CompositePart.dy.__doc__ = """y-displacement of the part from the origin.""" 

278 

279 

280def _parse_composites(fh): 

281 """ 

282 Parse the given filehandle for composites information return them as a 

283 dict. 

284 

285 It is assumed that the file cursor is on the line behind 'StartComposites'. 

286 

287 Returns 

288 ------- 

289 dict 

290 A dict mapping composite character names to a parts list. The parts 

291 list is a list of `.CompositePart` entries describing the parts of 

292 the composite. 

293 

294 Examples 

295 -------- 

296 A composite definition line:: 

297 

298 CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ; 

299 

300 will be represented as:: 

301 

302 composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0), 

303 CompositePart(name='acute', dx=160, dy=170)] 

304 

305 """ 

306 composites = {} 

307 for line in fh: 

308 line = line.rstrip() 

309 if not line: 

310 continue 

311 if line.startswith(b'EndComposites'): 

312 return composites 

313 vals = line.split(b';') 

314 cc = vals[0].split() 

315 name, _num_parts = cc[1], _to_int(cc[2]) 

316 pccParts = [] 

317 for s in vals[1:-1]: 

318 pcc = s.split() 

319 part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3])) 

320 pccParts.append(part) 

321 composites[name] = pccParts 

322 

323 raise RuntimeError('Bad composites parse') 

324 

325 

326def _parse_optional(fh): 

327 """ 

328 Parse the optional fields for kern pair data and composites. 

329 

330 Returns 

331 ------- 

332 kern_data : dict 

333 A dict containing kerning information. May be empty. 

334 See `._parse_kern_pairs`. 

335 composites : dict 

336 A dict containing composite information. May be empty. 

337 See `._parse_composites`. 

338 """ 

339 optional = { 

340 b'StartKernData': _parse_kern_pairs, 

341 b'StartComposites': _parse_composites, 

342 } 

343 

344 d = {b'StartKernData': {}, 

345 b'StartComposites': {}} 

346 for line in fh: 

347 line = line.rstrip() 

348 if not line: 

349 continue 

350 key = line.split()[0] 

351 

352 if key in optional: 

353 d[key] = optional[key](fh) 

354 

355 return d[b'StartKernData'], d[b'StartComposites'] 

356 

357 

358class AFM: 

359 

360 def __init__(self, fh): 

361 """Parse the AFM file in file object *fh*.""" 

362 self._header = _parse_header(fh) 

363 self._metrics, self._metrics_by_name = _parse_char_metrics(fh) 

364 self._kern, self._composite = _parse_optional(fh) 

365 

366 def get_bbox_char(self, c, isord=False): 

367 if not isord: 

368 c = ord(c) 

369 return self._metrics[c].bbox 

370 

371 def string_width_height(self, s): 

372 """ 

373 Return the string width (including kerning) and string height 

374 as a (*w*, *h*) tuple. 

375 """ 

376 if not len(s): 

377 return 0, 0 

378 total_width = 0 

379 namelast = None 

380 miny = 1e9 

381 maxy = 0 

382 for c in s: 

383 if c == '\n': 

384 continue 

385 wx, name, bbox = self._metrics[ord(c)] 

386 

387 total_width += wx + self._kern.get((namelast, name), 0) 

388 l, b, w, h = bbox 

389 miny = min(miny, b) 

390 maxy = max(maxy, b + h) 

391 

392 namelast = name 

393 

394 return total_width, maxy - miny 

395 

396 def get_str_bbox_and_descent(self, s): 

397 """Return the string bounding box and the maximal descent.""" 

398 if not len(s): 

399 return 0, 0, 0, 0, 0 

400 total_width = 0 

401 namelast = None 

402 miny = 1e9 

403 maxy = 0 

404 left = 0 

405 if not isinstance(s, str): 

406 s = _to_str(s) 

407 for c in s: 

408 if c == '\n': 

409 continue 

410 name = uni2type1.get(ord(c), f"uni{ord(c):04X}") 

411 try: 

412 wx, _, bbox = self._metrics_by_name[name] 

413 except KeyError: 

414 name = 'question' 

415 wx, _, bbox = self._metrics_by_name[name] 

416 total_width += wx + self._kern.get((namelast, name), 0) 

417 l, b, w, h = bbox 

418 left = min(left, l) 

419 miny = min(miny, b) 

420 maxy = max(maxy, b + h) 

421 

422 namelast = name 

423 

424 return left, miny, total_width, maxy - miny, -miny 

425 

426 def get_str_bbox(self, s): 

427 """Return the string bounding box.""" 

428 return self.get_str_bbox_and_descent(s)[:4] 

429 

430 def get_name_char(self, c, isord=False): 

431 """Get the name of the character, i.e., ';' is 'semicolon'.""" 

432 if not isord: 

433 c = ord(c) 

434 return self._metrics[c].name 

435 

436 def get_width_char(self, c, isord=False): 

437 """ 

438 Get the width of the character from the character metric WX field. 

439 """ 

440 if not isord: 

441 c = ord(c) 

442 return self._metrics[c].width 

443 

444 def get_width_from_char_name(self, name): 

445 """Get the width of the character from a type1 character name.""" 

446 return self._metrics_by_name[name].width 

447 

448 def get_height_char(self, c, isord=False): 

449 """Get the bounding box (ink) height of character *c* (space is 0).""" 

450 if not isord: 

451 c = ord(c) 

452 return self._metrics[c].bbox[-1] 

453 

454 def get_kern_dist(self, c1, c2): 

455 """ 

456 Return the kerning pair distance (possibly 0) for chars *c1* and *c2*. 

457 """ 

458 name1, name2 = self.get_name_char(c1), self.get_name_char(c2) 

459 return self.get_kern_dist_from_name(name1, name2) 

460 

461 def get_kern_dist_from_name(self, name1, name2): 

462 """ 

463 Return the kerning pair distance (possibly 0) for chars 

464 *name1* and *name2*. 

465 """ 

466 return self._kern.get((name1, name2), 0) 

467 

468 def get_fontname(self): 

469 """Return the font name, e.g., 'Times-Roman'.""" 

470 return self._header[b'FontName'] 

471 

472 @property 

473 def postscript_name(self): # For consistency with FT2Font. 

474 return self.get_fontname() 

475 

476 def get_fullname(self): 

477 """Return the font full name, e.g., 'Times-Roman'.""" 

478 name = self._header.get(b'FullName') 

479 if name is None: # use FontName as a substitute 

480 name = self._header[b'FontName'] 

481 return name 

482 

483 def get_familyname(self): 

484 """Return the font family name, e.g., 'Times'.""" 

485 name = self._header.get(b'FamilyName') 

486 if name is not None: 

487 return name 

488 

489 # FamilyName not specified so we'll make a guess 

490 name = self.get_fullname() 

491 extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|' 

492 r'light|ultralight|extra|condensed))+$') 

493 return re.sub(extras, '', name) 

494 

495 @property 

496 def family_name(self): 

497 """The font family name, e.g., 'Times'.""" 

498 return self.get_familyname() 

499 

500 def get_weight(self): 

501 """Return the font weight, e.g., 'Bold' or 'Roman'.""" 

502 return self._header[b'Weight'] 

503 

504 def get_angle(self): 

505 """Return the fontangle as float.""" 

506 return self._header[b'ItalicAngle'] 

507 

508 def get_capheight(self): 

509 """Return the cap height as float.""" 

510 return self._header[b'CapHeight'] 

511 

512 def get_xheight(self): 

513 """Return the xheight as float.""" 

514 return self._header[b'XHeight'] 

515 

516 def get_underline_thickness(self): 

517 """Return the underline thickness as float.""" 

518 return self._header[b'UnderlineThickness'] 

519 

520 def get_horizontal_stem_width(self): 

521 """ 

522 Return the standard horizontal stem width as float, or *None* if 

523 not specified in AFM file. 

524 """ 

525 return self._header.get(b'StdHW', None) 

526 

527 def get_vertical_stem_width(self): 

528 """ 

529 Return the standard vertical stem width as float, or *None* if 

530 not specified in AFM file. 

531 """ 

532 return self._header.get(b'StdVW', None)