1"""
2 pygments.formatter
3 ~~~~~~~~~~~~~~~~~~
4
5 Base formatter class.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import codecs
12
13from pygments.util import get_bool_opt
14from pygments.styles import get_style_by_name
15
16__all__ = ['Formatter']
17
18
19def _lookup_style(style):
20 if isinstance(style, str):
21 return get_style_by_name(style)
22 return style
23
24
25class Formatter:
26 """
27 Converts a token stream to text.
28
29 Formatters should have attributes to help selecting them. These
30 are similar to the corresponding :class:`~pygments.lexer.Lexer`
31 attributes.
32
33 .. autoattribute:: name
34 :no-value:
35
36 .. autoattribute:: aliases
37 :no-value:
38
39 .. autoattribute:: filenames
40 :no-value:
41
42 You can pass options as keyword arguments to the constructor.
43 All formatters accept these basic options:
44
45 ``style``
46 The style to use, can be a string or a Style subclass
47 (default: "default"). Not used by e.g. the
48 TerminalFormatter.
49 ``full``
50 Tells the formatter to output a "full" document, i.e.
51 a complete self-contained document. This doesn't have
52 any effect for some formatters (default: false).
53 ``title``
54 If ``full`` is true, the title that should be used to
55 caption the document (default: '').
56 ``encoding``
57 If given, must be an encoding name. This will be used to
58 convert the Unicode token strings to byte strings in the
59 output. If it is "" or None, Unicode strings will be written
60 to the output file, which most file-like objects do not
61 support (default: None).
62 ``outencoding``
63 Overrides ``encoding`` if given.
64
65 """
66
67 #: Full name for the formatter, in human-readable form.
68 name = None
69
70 #: A list of short, unique identifiers that can be used to lookup
71 #: the formatter from a list, e.g. using :func:`.get_formatter_by_name()`.
72 aliases = []
73
74 #: A list of fnmatch patterns that match filenames for which this
75 #: formatter can produce output. The patterns in this list should be unique
76 #: among all formatters.
77 filenames = []
78
79 #: If True, this formatter outputs Unicode strings when no encoding
80 #: option is given.
81 unicodeoutput = True
82
83 def __init__(self, **options):
84 """
85 As with lexers, this constructor takes arbitrary optional arguments,
86 and if you override it, you should first process your own options, then
87 call the base class implementation.
88 """
89 self.style = _lookup_style(options.get('style', 'default'))
90 self.full = get_bool_opt(options, 'full', False)
91 self.title = options.get('title', '')
92 self.encoding = options.get('encoding', None) or None
93 if self.encoding in ('guess', 'chardet'):
94 # can happen for e.g. pygmentize -O encoding=guess
95 self.encoding = 'utf-8'
96 self.encoding = options.get('outencoding') or self.encoding
97 self.options = options
98
99 def get_style_defs(self, arg=''):
100 """
101 This method must return statements or declarations suitable to define
102 the current style for subsequent highlighted text (e.g. CSS classes
103 in the `HTMLFormatter`).
104
105 The optional argument `arg` can be used to modify the generation and
106 is formatter dependent (it is standardized because it can be given on
107 the command line).
108
109 This method is called by the ``-S`` :doc:`command-line option <cmdline>`,
110 the `arg` is then given by the ``-a`` option.
111 """
112 return ''
113
114 def format(self, tokensource, outfile):
115 """
116 This method must format the tokens from the `tokensource` iterable and
117 write the formatted version to the file object `outfile`.
118
119 Formatter options can control how exactly the tokens are converted.
120 """
121 if self.encoding:
122 # wrap the outfile in a StreamWriter
123 outfile = codecs.lookup(self.encoding)[3](outfile)
124 return self.format_unencoded(tokensource, outfile)
125
126 # Allow writing Formatter[str] or Formatter[bytes]. That's equivalent to
127 # Formatter. This helps when using third-party type stubs from typeshed.
128 def __class_getitem__(cls, name):
129 return cls