Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/matplotlib/

1"""

2A python interface to Adobe Font Metrics Files.

4Although a number of other Python implementations exist, and may be more

5complete than this, it was decided not to go with them because they were

6either:

81) copyrighted or used a non-BSD compatible license

92) had too many dependencies and a free standing lib was needed

103) did more than needed and it was easier to write afresh rather than

11 figure out how to get just what was needed.

13It is pretty easy to use, and has no external dependencies:

15>>> import matplotlib as mpl

16>>> from pathlib import Path

17>>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm')

18>>>

19>>> from matplotlib.afm import AFM

20>>> with afm_path.open('rb') as fh:

21... afm = AFM(fh)

22>>> afm.string_width_height('What the heck?')

23(6220.0, 694)

24>>> afm.get_fontname()

25'Times-Roman'

26>>> afm.get_kern_dist('A', 'f')

270

28>>> afm.get_kern_dist('A', 'y')

29-92.0

30>>> afm.get_bbox_char('!')

31[130, -9, 238, 676]

33As in the Adobe Font Metrics File Format Specification, all dimensions

34are given in units of 1/1000 of the scale factor (point size) of the font

35being used.

36"""

38from collections import namedtuple

39import logging

40import re

42from ._mathtext_data import uni2type1

45_log = logging.getLogger(__name__)

48def _to_int(x):

49 # Some AFM files have floats where we are expecting ints -- there is

50 # probably a better way to handle this (support floats, round rather than

51 # truncate). But I don't know what the best approach is now and this

52 # change to _to_int should at least prevent Matplotlib from crashing on

53 # these. JDH (2009-11-06)

54 return int(float(x))

57def _to_float(x):

58 # Some AFM files use "," instead of "." as decimal separator -- this

59 # shouldn't be ambiguous (unless someone is wicked enough to use "," as

60 # thousands separator...).

61 if isinstance(x, bytes):

62 # Encoding doesn't really matter -- if we have codepoints >127 the call

63 # to float() will error anyways.

64 x = x.decode('latin-1')

65 return float(x.replace(',', '.'))

68def _to_str(x):

69 return x.decode('utf8')

72def _to_list_of_ints(s):

73 s = s.replace(b',', b' ')

74 return [_to_int(val) for val in s.split()]

77def _to_list_of_floats(s):

78 return [_to_float(val) for val in s.split()]

81def _to_bool(s):

82 if s.lower().strip() in (b'false', b'0', b'no'):

83 return False

84 else:

85 return True

88def _parse_header(fh):

89 """

90 Read the font metrics header (up to the char metrics) and returns

91 a dictionary mapping *key* to *val*. *val* will be converted to the

92 appropriate python type as necessary; e.g.:

94 * 'False'->False

95 * '0'->0

96 * '-168 -218 1000 898'-> [-168, -218, 1000, 898]

98 Dictionary keys are

100 StartFontMetrics, FontName, FullName, FamilyName, Weight,

101 ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,

102 UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,

103 XHeight, Ascender, Descender, StartCharMetrics

104 """

105 header_converters = {

106 b'StartFontMetrics': _to_float,

107 b'FontName': _to_str,

108 b'FullName': _to_str,

109 b'FamilyName': _to_str,

110 b'Weight': _to_str,

111 b'ItalicAngle': _to_float,

112 b'IsFixedPitch': _to_bool,

113 b'FontBBox': _to_list_of_ints,

114 b'UnderlinePosition': _to_float,

115 b'UnderlineThickness': _to_float,

116 b'Version': _to_str,

117 # Some AFM files have non-ASCII characters (which are not allowed by

118 # the spec). Given that there is actually no public API to even access

119 # this field, just return it as straight bytes.

120 b'Notice': lambda x: x,

121 b'EncodingScheme': _to_str,

122 b'CapHeight': _to_float, # Is the second version a mistake, or

123 b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS

124 b'XHeight': _to_float,

125 b'Ascender': _to_float,

126 b'Descender': _to_float,

127 b'StdHW': _to_float,

128 b'StdVW': _to_float,

129 b'StartCharMetrics': _to_int,

130 b'CharacterSet': _to_str,

131 b'Characters': _to_int,

132 }

133 d = {}

134 first_line = True

135 for line in fh:

136 line = line.rstrip()

137 if line.startswith(b'Comment'):

138 continue

139 lst = line.split(b' ', 1)

140 key = lst[0]

141 if first_line:

142 # AFM spec, Section 4: The StartFontMetrics keyword

143 # [followed by a version number] must be the first line in

144 # the file, and the EndFontMetrics keyword must be the

145 # last non-empty line in the file. We just check the

146 # first header entry.

147 if key != b'StartFontMetrics':

148 raise RuntimeError('Not an AFM file')

149 first_line = False

150 if len(lst) == 2:

151 val = lst[1]

152 else:

153 val = b''

154 try:

155 converter = header_converters[key]

156 except KeyError:

157 _log.error("Found an unknown keyword in AFM header (was %r)", key)

158 continue

159 try:

160 d[key] = converter(val)

161 except ValueError:

162 _log.error('Value error parsing header in AFM: %s, %s', key, val)

163 continue

164 if key == b'StartCharMetrics':

165 break

166 else:

167 raise RuntimeError('Bad parse')

168 return d

169

170

171CharMetrics = namedtuple('CharMetrics', 'width, name, bbox')

172CharMetrics.__doc__ = """

173 Represents the character metrics of a single character.

174

175 Notes

176 -----

177 The fields do currently only describe a subset of character metrics

178 information defined in the AFM standard.

179 """

180CharMetrics.width.__doc__ = """The character width (WX)."""

181CharMetrics.name.__doc__ = """The character name (N)."""

182CharMetrics.bbox.__doc__ = """

183 The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""

184

185

186def _parse_char_metrics(fh):

187 """

188 Parse the given filehandle for character metrics information and return

189 the information as dicts.

190

191 It is assumed that the file cursor is on the line behind

192 'StartCharMetrics'.

193

194 Returns

195 -------

196 ascii_d : dict

197 A mapping "ASCII num of the character" to `.CharMetrics`.

198 name_d : dict

199 A mapping "character name" to `.CharMetrics`.

200

201 Notes

202 -----

203 This function is incomplete per the standard, but thus far parses

204 all the sample afm files tried.

205 """

206 required_keys = {'C', 'WX', 'N', 'B'}

207

208 ascii_d = {}

209 name_d = {}

210 for line in fh:

211 # We are defensively letting values be utf8. The spec requires

212 # ascii, but there are non-compliant fonts in circulation

213 line = _to_str(line.rstrip()) # Convert from byte-literal

214 if line.startswith('EndCharMetrics'):

215 return ascii_d, name_d

216 # Split the metric line into a dictionary, keyed by metric identifiers

217 vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s)

218 # There may be other metrics present, but only these are needed

219 if not required_keys.issubset(vals):

220 raise RuntimeError('Bad char metrics line: %s' % line)

221 num = _to_int(vals['C'])

222 wx = _to_float(vals['WX'])

223 name = vals['N']

224 bbox = _to_list_of_floats(vals['B'])

225 bbox = list(map(int, bbox))

226 metrics = CharMetrics(wx, name, bbox)

227 # Workaround: If the character name is 'Euro', give it the

228 # corresponding character code, according to WinAnsiEncoding (see PDF

229 # Reference).

230 if name == 'Euro':

231 num = 128

232 elif name == 'minus':

233 num = ord("\N{MINUS SIGN}") # 0x2212

234 if num != -1:

235 ascii_d[num] = metrics

236 name_d[name] = metrics

237 raise RuntimeError('Bad parse')

238

239

240def _parse_kern_pairs(fh):

241 """

242 Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and

243 values are the kern pair value. For example, a kern pairs line like

244 ``KPX A y -50``

245

246 will be represented as::

247

248 d[ ('A', 'y') ] = -50

249

250 """

251

252 line = next(fh)

253 if not line.startswith(b'StartKernPairs'):

254 raise RuntimeError('Bad start of kern pairs data: %s' % line)

255

256 d = {}

257 for line in fh:

258 line = line.rstrip()

259 if not line:

260 continue

261 if line.startswith(b'EndKernPairs'):

262 next(fh) # EndKernData

263 return d

264 vals = line.split()

265 if len(vals) != 4 or vals[0] != b'KPX':

266 raise RuntimeError('Bad kern pairs line: %s' % line)

267 c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])

268 d[(c1, c2)] = val

269 raise RuntimeError('Bad kern pairs parse')

270

271

272CompositePart = namedtuple('CompositePart', 'name, dx, dy')

273CompositePart.__doc__ = """

274 Represents the information on a composite element of a composite char."""

275CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'."""

276CompositePart.dx.__doc__ = """x-displacement of the part from the origin."""

277CompositePart.dy.__doc__ = """y-displacement of the part from the origin."""

278

279

280def _parse_composites(fh):

281 """

282 Parse the given filehandle for composites information return them as a

283 dict.

284

285 It is assumed that the file cursor is on the line behind 'StartComposites'.

286

287 Returns

288 -------

289 dict

290 A dict mapping composite character names to a parts list. The parts

291 list is a list of `.CompositePart` entries describing the parts of

292 the composite.

293

294 Examples

295 --------

296 A composite definition line::

297

298 CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;

299

300 will be represented as::

301

302 composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0),

303 CompositePart(name='acute', dx=160, dy=170)]

304

305 """

306 composites = {}

307 for line in fh:

308 line = line.rstrip()

309 if not line:

310 continue

311 if line.startswith(b'EndComposites'):

312 return composites

313 vals = line.split(b';')

314 cc = vals[0].split()

315 name, _num_parts = cc[1], _to_int(cc[2])

316 pccParts = []

317 for s in vals[1:-1]:

318 pcc = s.split()

319 part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3]))

320 pccParts.append(part)

321 composites[name] = pccParts

322

323 raise RuntimeError('Bad composites parse')

324

325

326def _parse_optional(fh):

327 """

328 Parse the optional fields for kern pair data and composites.

329

330 Returns

331 -------

332 kern_data : dict

333 A dict containing kerning information. May be empty.

334 See `._parse_kern_pairs`.

335 composites : dict

336 A dict containing composite information. May be empty.

337 See `._parse_composites`.

338 """

339 optional = {

340 b'StartKernData': _parse_kern_pairs,

341 b'StartComposites': _parse_composites,

342 }

343

344 d = {b'StartKernData': {},

345 b'StartComposites': {}}

346 for line in fh:

347 line = line.rstrip()

348 if not line:

349 continue

350 key = line.split()[0]

351

352 if key in optional:

353 d[key] = optional[key](fh)

354

355 return d[b'StartKernData'], d[b'StartComposites']

356

357

358class AFM:

359

360 def __init__(self, fh):

361 """Parse the AFM file in file object *fh*."""

362 self._header = _parse_header(fh)

363 self._metrics, self._metrics_by_name = _parse_char_metrics(fh)

364 self._kern, self._composite = _parse_optional(fh)

365

366 def get_bbox_char(self, c, isord=False):

367 if not isord:

368 c = ord(c)

369 return self._metrics[c].bbox

370

371 def string_width_height(self, s):

372 """

373 Return the string width (including kerning) and string height

374 as a (*w*, *h*) tuple.

375 """

376 if not len(s):

377 return 0, 0

378 total_width = 0

379 namelast = None

380 miny = 1e9

381 maxy = 0

382 for c in s:

383 if c == '\n':

384 continue

385 wx, name, bbox = self._metrics[ord(c)]

386

387 total_width += wx + self._kern.get((namelast, name), 0)

388 l, b, w, h = bbox

389 miny = min(miny, b)

390 maxy = max(maxy, b + h)

391

392 namelast = name

393

394 return total_width, maxy - miny

395

396 def get_str_bbox_and_descent(self, s):

397 """Return the string bounding box and the maximal descent."""

398 if not len(s):

399 return 0, 0, 0, 0, 0

400 total_width = 0

401 namelast = None

402 miny = 1e9

403 maxy = 0

404 left = 0

405 if not isinstance(s, str):

406 s = _to_str(s)

407 for c in s:

408 if c == '\n':

409 continue

410 name = uni2type1.get(ord(c), f"uni{ord(c):04X}")

411 try:

412 wx, _, bbox = self._metrics_by_name[name]

413 except KeyError:

414 name = 'question'

415 wx, _, bbox = self._metrics_by_name[name]

416 total_width += wx + self._kern.get((namelast, name), 0)

417 l, b, w, h = bbox

418 left = min(left, l)

419 miny = min(miny, b)

420 maxy = max(maxy, b + h)

421

422 namelast = name

423

424 return left, miny, total_width, maxy - miny, -miny

425

426 def get_str_bbox(self, s):

427 """Return the string bounding box."""

428 return self.get_str_bbox_and_descent(s)[:4]

429

430 def get_name_char(self, c, isord=False):

431 """Get the name of the character, i.e., ';' is 'semicolon'."""

432 if not isord:

433 c = ord(c)

434 return self._metrics[c].name

435

436 def get_width_char(self, c, isord=False):

437 """

438 Get the width of the character from the character metric WX field.

439 """

440 if not isord:

441 c = ord(c)

442 return self._metrics[c].width

443

444 def get_width_from_char_name(self, name):

445 """Get the width of the character from a type1 character name."""

446 return self._metrics_by_name[name].width

447

448 def get_height_char(self, c, isord=False):

449 """Get the bounding box (ink) height of character *c* (space is 0)."""

450 if not isord:

451 c = ord(c)

452 return self._metrics[c].bbox[-1]

453

454 def get_kern_dist(self, c1, c2):

455 """

456 Return the kerning pair distance (possibly 0) for chars *c1* and *c2*.

457 """

458 name1, name2 = self.get_name_char(c1), self.get_name_char(c2)

459 return self.get_kern_dist_from_name(name1, name2)

460

461 def get_kern_dist_from_name(self, name1, name2):

462 """

463 Return the kerning pair distance (possibly 0) for chars

464 *name1* and *name2*.

465 """

466 return self._kern.get((name1, name2), 0)

467

468 def get_fontname(self):

469 """Return the font name, e.g., 'Times-Roman'."""

470 return self._header[b'FontName']

471

472 @property

473 def postscript_name(self): # For consistency with FT2Font.

474 return self.get_fontname()

475

476 def get_fullname(self):

477 """Return the font full name, e.g., 'Times-Roman'."""

478 name = self._header.get(b'FullName')

479 if name is None: # use FontName as a substitute

480 name = self._header[b'FontName']

481 return name

482

483 def get_familyname(self):

484 """Return the font family name, e.g., 'Times'."""

485 name = self._header.get(b'FamilyName')

486 if name is not None:

487 return name

488

489 # FamilyName not specified so we'll make a guess

490 name = self.get_fullname()

492 r'light|ultralight|extra|condensed))+$')

493 return re.sub(extras, '', name)

494

495 @property

496 def family_name(self):

497 """The font family name, e.g., 'Times'."""

498 return self.get_familyname()

499

500 def get_weight(self):

501 """Return the font weight, e.g., 'Bold' or 'Roman'."""

502 return self._header[b'Weight']

503

504 def get_angle(self):

505 """Return the fontangle as float."""

506 return self._header[b'ItalicAngle']

507

508 def get_capheight(self):

509 """Return the cap height as float."""

510 return self._header[b'CapHeight']

511

512 def get_xheight(self):

513 """Return the xheight as float."""

514 return self._header[b'XHeight']

515

516 def get_underline_thickness(self):

517 """Return the underline thickness as float."""

518 return self._header[b'UnderlineThickness']

519

520 def get_horizontal_stem_width(self):

521 """

522 Return the standard horizontal stem width as float, or *None* if

523 not specified in AFM file.

524 """

525 return self._header.get(b'StdHW', None)

526

527 def get_vertical_stem_width(self):

528 """

529 Return the standard vertical stem width as float, or *None* if

530 not specified in AFM file.

531 """

532 return self._header.get(b'StdVW', None)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/matplotlib/_afm.py: 63%

242 statements