Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/matplotlib/_afm.py: 63%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2A python interface to Adobe Font Metrics Files.
4Although a number of other Python implementations exist, and may be more
5complete than this, it was decided not to go with them because they were
6either:
81) copyrighted or used a non-BSD compatible license
92) had too many dependencies and a free standing lib was needed
103) did more than needed and it was easier to write afresh rather than
11 figure out how to get just what was needed.
13It is pretty easy to use, and has no external dependencies:
15>>> import matplotlib as mpl
16>>> from pathlib import Path
17>>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm')
18>>>
19>>> from matplotlib.afm import AFM
20>>> with afm_path.open('rb') as fh:
21... afm = AFM(fh)
22>>> afm.string_width_height('What the heck?')
23(6220.0, 694)
24>>> afm.get_fontname()
25'Times-Roman'
26>>> afm.get_kern_dist('A', 'f')
270
28>>> afm.get_kern_dist('A', 'y')
29-92.0
30>>> afm.get_bbox_char('!')
31[130, -9, 238, 676]
33As in the Adobe Font Metrics File Format Specification, all dimensions
34are given in units of 1/1000 of the scale factor (point size) of the font
35being used.
36"""
38from collections import namedtuple
39import logging
40import re
42from ._mathtext_data import uni2type1
45_log = logging.getLogger(__name__)
48def _to_int(x):
49 # Some AFM files have floats where we are expecting ints -- there is
50 # probably a better way to handle this (support floats, round rather than
51 # truncate). But I don't know what the best approach is now and this
52 # change to _to_int should at least prevent Matplotlib from crashing on
53 # these. JDH (2009-11-06)
54 return int(float(x))
57def _to_float(x):
58 # Some AFM files use "," instead of "." as decimal separator -- this
59 # shouldn't be ambiguous (unless someone is wicked enough to use "," as
60 # thousands separator...).
61 if isinstance(x, bytes):
62 # Encoding doesn't really matter -- if we have codepoints >127 the call
63 # to float() will error anyways.
64 x = x.decode('latin-1')
65 return float(x.replace(',', '.'))
68def _to_str(x):
69 return x.decode('utf8')
72def _to_list_of_ints(s):
73 s = s.replace(b',', b' ')
74 return [_to_int(val) for val in s.split()]
77def _to_list_of_floats(s):
78 return [_to_float(val) for val in s.split()]
81def _to_bool(s):
82 if s.lower().strip() in (b'false', b'0', b'no'):
83 return False
84 else:
85 return True
88def _parse_header(fh):
89 """
90 Read the font metrics header (up to the char metrics) and returns
91 a dictionary mapping *key* to *val*. *val* will be converted to the
92 appropriate python type as necessary; e.g.:
94 * 'False'->False
95 * '0'->0
96 * '-168 -218 1000 898'-> [-168, -218, 1000, 898]
98 Dictionary keys are
100 StartFontMetrics, FontName, FullName, FamilyName, Weight,
101 ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
102 UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
103 XHeight, Ascender, Descender, StartCharMetrics
104 """
105 header_converters = {
106 b'StartFontMetrics': _to_float,
107 b'FontName': _to_str,
108 b'FullName': _to_str,
109 b'FamilyName': _to_str,
110 b'Weight': _to_str,
111 b'ItalicAngle': _to_float,
112 b'IsFixedPitch': _to_bool,
113 b'FontBBox': _to_list_of_ints,
114 b'UnderlinePosition': _to_float,
115 b'UnderlineThickness': _to_float,
116 b'Version': _to_str,
117 # Some AFM files have non-ASCII characters (which are not allowed by
118 # the spec). Given that there is actually no public API to even access
119 # this field, just return it as straight bytes.
120 b'Notice': lambda x: x,
121 b'EncodingScheme': _to_str,
122 b'CapHeight': _to_float, # Is the second version a mistake, or
123 b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
124 b'XHeight': _to_float,
125 b'Ascender': _to_float,
126 b'Descender': _to_float,
127 b'StdHW': _to_float,
128 b'StdVW': _to_float,
129 b'StartCharMetrics': _to_int,
130 b'CharacterSet': _to_str,
131 b'Characters': _to_int,
132 }
133 d = {}
134 first_line = True
135 for line in fh:
136 line = line.rstrip()
137 if line.startswith(b'Comment'):
138 continue
139 lst = line.split(b' ', 1)
140 key = lst[0]
141 if first_line:
142 # AFM spec, Section 4: The StartFontMetrics keyword
143 # [followed by a version number] must be the first line in
144 # the file, and the EndFontMetrics keyword must be the
145 # last non-empty line in the file. We just check the
146 # first header entry.
147 if key != b'StartFontMetrics':
148 raise RuntimeError('Not an AFM file')
149 first_line = False
150 if len(lst) == 2:
151 val = lst[1]
152 else:
153 val = b''
154 try:
155 converter = header_converters[key]
156 except KeyError:
157 _log.error("Found an unknown keyword in AFM header (was %r)", key)
158 continue
159 try:
160 d[key] = converter(val)
161 except ValueError:
162 _log.error('Value error parsing header in AFM: %s, %s', key, val)
163 continue
164 if key == b'StartCharMetrics':
165 break
166 else:
167 raise RuntimeError('Bad parse')
168 return d
171CharMetrics = namedtuple('CharMetrics', 'width, name, bbox')
172CharMetrics.__doc__ = """
173 Represents the character metrics of a single character.
175 Notes
176 -----
177 The fields do currently only describe a subset of character metrics
178 information defined in the AFM standard.
179 """
180CharMetrics.width.__doc__ = """The character width (WX)."""
181CharMetrics.name.__doc__ = """The character name (N)."""
182CharMetrics.bbox.__doc__ = """
183 The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""
186def _parse_char_metrics(fh):
187 """
188 Parse the given filehandle for character metrics information and return
189 the information as dicts.
191 It is assumed that the file cursor is on the line behind
192 'StartCharMetrics'.
194 Returns
195 -------
196 ascii_d : dict
197 A mapping "ASCII num of the character" to `.CharMetrics`.
198 name_d : dict
199 A mapping "character name" to `.CharMetrics`.
201 Notes
202 -----
203 This function is incomplete per the standard, but thus far parses
204 all the sample afm files tried.
205 """
206 required_keys = {'C', 'WX', 'N', 'B'}
208 ascii_d = {}
209 name_d = {}
210 for line in fh:
211 # We are defensively letting values be utf8. The spec requires
212 # ascii, but there are non-compliant fonts in circulation
213 line = _to_str(line.rstrip()) # Convert from byte-literal
214 if line.startswith('EndCharMetrics'):
215 return ascii_d, name_d
216 # Split the metric line into a dictionary, keyed by metric identifiers
217 vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s)
218 # There may be other metrics present, but only these are needed
219 if not required_keys.issubset(vals):
220 raise RuntimeError('Bad char metrics line: %s' % line)
221 num = _to_int(vals['C'])
222 wx = _to_float(vals['WX'])
223 name = vals['N']
224 bbox = _to_list_of_floats(vals['B'])
225 bbox = list(map(int, bbox))
226 metrics = CharMetrics(wx, name, bbox)
227 # Workaround: If the character name is 'Euro', give it the
228 # corresponding character code, according to WinAnsiEncoding (see PDF
229 # Reference).
230 if name == 'Euro':
231 num = 128
232 elif name == 'minus':
233 num = ord("\N{MINUS SIGN}") # 0x2212
234 if num != -1:
235 ascii_d[num] = metrics
236 name_d[name] = metrics
237 raise RuntimeError('Bad parse')
240def _parse_kern_pairs(fh):
241 """
242 Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
243 values are the kern pair value. For example, a kern pairs line like
244 ``KPX A y -50``
246 will be represented as::
248 d[ ('A', 'y') ] = -50
250 """
252 line = next(fh)
253 if not line.startswith(b'StartKernPairs'):
254 raise RuntimeError('Bad start of kern pairs data: %s' % line)
256 d = {}
257 for line in fh:
258 line = line.rstrip()
259 if not line:
260 continue
261 if line.startswith(b'EndKernPairs'):
262 next(fh) # EndKernData
263 return d
264 vals = line.split()
265 if len(vals) != 4 or vals[0] != b'KPX':
266 raise RuntimeError('Bad kern pairs line: %s' % line)
267 c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])
268 d[(c1, c2)] = val
269 raise RuntimeError('Bad kern pairs parse')
272CompositePart = namedtuple('CompositePart', 'name, dx, dy')
273CompositePart.__doc__ = """
274 Represents the information on a composite element of a composite char."""
275CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'."""
276CompositePart.dx.__doc__ = """x-displacement of the part from the origin."""
277CompositePart.dy.__doc__ = """y-displacement of the part from the origin."""
280def _parse_composites(fh):
281 """
282 Parse the given filehandle for composites information return them as a
283 dict.
285 It is assumed that the file cursor is on the line behind 'StartComposites'.
287 Returns
288 -------
289 dict
290 A dict mapping composite character names to a parts list. The parts
291 list is a list of `.CompositePart` entries describing the parts of
292 the composite.
294 Examples
295 --------
296 A composite definition line::
298 CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;
300 will be represented as::
302 composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0),
303 CompositePart(name='acute', dx=160, dy=170)]
305 """
306 composites = {}
307 for line in fh:
308 line = line.rstrip()
309 if not line:
310 continue
311 if line.startswith(b'EndComposites'):
312 return composites
313 vals = line.split(b';')
314 cc = vals[0].split()
315 name, _num_parts = cc[1], _to_int(cc[2])
316 pccParts = []
317 for s in vals[1:-1]:
318 pcc = s.split()
319 part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3]))
320 pccParts.append(part)
321 composites[name] = pccParts
323 raise RuntimeError('Bad composites parse')
326def _parse_optional(fh):
327 """
328 Parse the optional fields for kern pair data and composites.
330 Returns
331 -------
332 kern_data : dict
333 A dict containing kerning information. May be empty.
334 See `._parse_kern_pairs`.
335 composites : dict
336 A dict containing composite information. May be empty.
337 See `._parse_composites`.
338 """
339 optional = {
340 b'StartKernData': _parse_kern_pairs,
341 b'StartComposites': _parse_composites,
342 }
344 d = {b'StartKernData': {},
345 b'StartComposites': {}}
346 for line in fh:
347 line = line.rstrip()
348 if not line:
349 continue
350 key = line.split()[0]
352 if key in optional:
353 d[key] = optional[key](fh)
355 return d[b'StartKernData'], d[b'StartComposites']
358class AFM:
360 def __init__(self, fh):
361 """Parse the AFM file in file object *fh*."""
362 self._header = _parse_header(fh)
363 self._metrics, self._metrics_by_name = _parse_char_metrics(fh)
364 self._kern, self._composite = _parse_optional(fh)
366 def get_bbox_char(self, c, isord=False):
367 if not isord:
368 c = ord(c)
369 return self._metrics[c].bbox
371 def string_width_height(self, s):
372 """
373 Return the string width (including kerning) and string height
374 as a (*w*, *h*) tuple.
375 """
376 if not len(s):
377 return 0, 0
378 total_width = 0
379 namelast = None
380 miny = 1e9
381 maxy = 0
382 for c in s:
383 if c == '\n':
384 continue
385 wx, name, bbox = self._metrics[ord(c)]
387 total_width += wx + self._kern.get((namelast, name), 0)
388 l, b, w, h = bbox
389 miny = min(miny, b)
390 maxy = max(maxy, b + h)
392 namelast = name
394 return total_width, maxy - miny
396 def get_str_bbox_and_descent(self, s):
397 """Return the string bounding box and the maximal descent."""
398 if not len(s):
399 return 0, 0, 0, 0, 0
400 total_width = 0
401 namelast = None
402 miny = 1e9
403 maxy = 0
404 left = 0
405 if not isinstance(s, str):
406 s = _to_str(s)
407 for c in s:
408 if c == '\n':
409 continue
410 name = uni2type1.get(ord(c), f"uni{ord(c):04X}")
411 try:
412 wx, _, bbox = self._metrics_by_name[name]
413 except KeyError:
414 name = 'question'
415 wx, _, bbox = self._metrics_by_name[name]
416 total_width += wx + self._kern.get((namelast, name), 0)
417 l, b, w, h = bbox
418 left = min(left, l)
419 miny = min(miny, b)
420 maxy = max(maxy, b + h)
422 namelast = name
424 return left, miny, total_width, maxy - miny, -miny
426 def get_str_bbox(self, s):
427 """Return the string bounding box."""
428 return self.get_str_bbox_and_descent(s)[:4]
430 def get_name_char(self, c, isord=False):
431 """Get the name of the character, i.e., ';' is 'semicolon'."""
432 if not isord:
433 c = ord(c)
434 return self._metrics[c].name
436 def get_width_char(self, c, isord=False):
437 """
438 Get the width of the character from the character metric WX field.
439 """
440 if not isord:
441 c = ord(c)
442 return self._metrics[c].width
444 def get_width_from_char_name(self, name):
445 """Get the width of the character from a type1 character name."""
446 return self._metrics_by_name[name].width
448 def get_height_char(self, c, isord=False):
449 """Get the bounding box (ink) height of character *c* (space is 0)."""
450 if not isord:
451 c = ord(c)
452 return self._metrics[c].bbox[-1]
454 def get_kern_dist(self, c1, c2):
455 """
456 Return the kerning pair distance (possibly 0) for chars *c1* and *c2*.
457 """
458 name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
459 return self.get_kern_dist_from_name(name1, name2)
461 def get_kern_dist_from_name(self, name1, name2):
462 """
463 Return the kerning pair distance (possibly 0) for chars
464 *name1* and *name2*.
465 """
466 return self._kern.get((name1, name2), 0)
468 def get_fontname(self):
469 """Return the font name, e.g., 'Times-Roman'."""
470 return self._header[b'FontName']
472 @property
473 def postscript_name(self): # For consistency with FT2Font.
474 return self.get_fontname()
476 def get_fullname(self):
477 """Return the font full name, e.g., 'Times-Roman'."""
478 name = self._header.get(b'FullName')
479 if name is None: # use FontName as a substitute
480 name = self._header[b'FontName']
481 return name
483 def get_familyname(self):
484 """Return the font family name, e.g., 'Times'."""
485 name = self._header.get(b'FamilyName')
486 if name is not None:
487 return name
489 # FamilyName not specified so we'll make a guess
490 name = self.get_fullname()
491 extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|'
492 r'light|ultralight|extra|condensed))+$')
493 return re.sub(extras, '', name)
495 @property
496 def family_name(self):
497 """The font family name, e.g., 'Times'."""
498 return self.get_familyname()
500 def get_weight(self):
501 """Return the font weight, e.g., 'Bold' or 'Roman'."""
502 return self._header[b'Weight']
504 def get_angle(self):
505 """Return the fontangle as float."""
506 return self._header[b'ItalicAngle']
508 def get_capheight(self):
509 """Return the cap height as float."""
510 return self._header[b'CapHeight']
512 def get_xheight(self):
513 """Return the xheight as float."""
514 return self._header[b'XHeight']
516 def get_underline_thickness(self):
517 """Return the underline thickness as float."""
518 return self._header[b'UnderlineThickness']
520 def get_horizontal_stem_width(self):
521 """
522 Return the standard horizontal stem width as float, or *None* if
523 not specified in AFM file.
524 """
525 return self._header.get(b'StdHW', None)
527 def get_vertical_stem_width(self):
528 """
529 Return the standard vertical stem width as float, or *None* if
530 not specified in AFM file.
531 """
532 return self._header.get(b'StdVW', None)