1# $Id$
2# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
5
6"""
7Transforms needed by most or all documents:
8
9- `Decorations`: Generate a document's header & footer.
10- `ExposeInternals`: Expose internal attributes.
11- `Messages`: Placement of system messages generated after parsing.
12- `FilterMessages`: Remove system messages below verbosity threshold.
13- `TestMessages`: Like `Messages`, used on test runs.
14- `StripComments`: Remove comment elements from the document tree.
15- `StripClassesAndElements`: Remove elements with classes
16 in `self.document.settings.strip_elements_with_classes`
17 and class values in `self.document.settings.strip_classes`.
18- `SmartQuotes`: Replace ASCII quotation marks with typographic form.
19- `Validate`: Validate the document tree, report violations as warning.
20"""
21
22from __future__ import annotations
23
24__docformat__ = 'reStructuredText'
25
26import re
27import time
28from docutils import nodes, utils
29from docutils.transforms import Transform
30from docutils.utils import smartquotes
31
32
33class Decorations(Transform):
34
35 """
36 Populate a document's decoration element (header, footer).
37 """
38
39 default_priority = 820
40
41 def apply(self) -> None:
42 header_nodes = self.generate_header()
43 if header_nodes:
44 decoration = self.document.get_decoration()
45 header = decoration.get_header()
46 header.extend(header_nodes)
47 footer_nodes = self.generate_footer()
48 if footer_nodes:
49 decoration = self.document.get_decoration()
50 footer = decoration.get_footer()
51 footer.extend(footer_nodes)
52
53 def generate_header(self):
54 return None
55
56 def generate_footer(self):
57 # @@@ Text is hard-coded for now.
58 # Should be made dynamic (language-dependent).
59 # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
60 # for the datestamp?
61 # See https://sourceforge.net/p/docutils/patches/132/
62 # and https://reproducible-builds.org/specs/source-date-epoch/
63 settings = self.document.settings
64 if (settings.generator or settings.datestamp
65 or settings.source_link or settings.source_url):
66 text = []
67 if (settings.source_link and settings._source
68 or settings.source_url):
69 if settings.source_url:
70 source = settings.source_url
71 else:
72 source = utils.relative_path(settings.output_path,
73 settings._source)
74 text.extend([
75 nodes.reference('', 'View document source',
76 refuri=source),
77 nodes.Text('.\n')])
78 if settings.datestamp:
79 datestamp = time.strftime(settings.datestamp, time.gmtime())
80 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
81 if settings.generator:
82 text.extend([
83 nodes.Text('Generated by '),
84 nodes.reference('', 'Docutils',
85 refuri='https://docutils.sourceforge.io/'),
86 nodes.Text(' from '),
87 nodes.reference('', 'reStructuredText',
88 refuri='https://docutils.sourceforge.io/'
89 'rst.html'),
90 nodes.Text(' source.\n')])
91 return [nodes.paragraph('', '', *text)]
92 else:
93 return None
94
95
96class ExposeInternals(Transform):
97
98 """
99 Expose internal attributes if ``expose_internals`` setting is set.
100 """
101
102 default_priority = 840
103
104 def not_Text(self, node) -> bool:
105 return not isinstance(node, nodes.Text)
106
107 def apply(self) -> None:
108 if self.document.settings.expose_internals:
109 for node in self.document.findall(self.not_Text):
110 for att in self.document.settings.expose_internals:
111 value = getattr(node, att, None)
112 if value is not None:
113 node['internal:' + att] = value
114
115
116class Messages(Transform):
117
118 """Handle "loose" messages.
119
120 Place system messages generated by parsing or transforms that are not
121 attached to the document tree into a dedicated section of the document.
122 """
123
124 default_priority = 860
125
126 def apply(self) -> None:
127 messages = [*self.document.parse_messages,
128 *self.document.transform_messages]
129 loose_messages = [msg for msg in messages if not msg.parent]
130 if loose_messages:
131 section = nodes.section(classes=['system-messages'])
132 # @@@ get this from the language module?
133 section += nodes.title('', 'Docutils System Messages')
134 section += loose_messages
135 self.document.transform_messages[:] = []
136 self.document += section
137
138
139class FilterMessages(Transform):
140
141 """
142 Remove system messages below verbosity threshold.
143
144 Also convert <problematic> nodes referencing removed messages
145 to <Text> nodes and remove "System Messages" section if empty.
146 """
147
148 default_priority = 870
149
150 def apply(self) -> None:
151 removed_ids = [] # IDs of removed system messages
152 for node in tuple(self.document.findall(nodes.system_message)):
153 if node['level'] < self.document.reporter.report_level:
154 node.parent.remove(node)
155 for _id in node['ids']:
156 self.document.ids.pop(_id, None) # remove ID registration
157 removed_ids.append(_id)
158 for node in tuple(self.document.findall(nodes.problematic)):
159 if 'refid' in node and node['refid'] in removed_ids:
160 node.parent.replace(node, nodes.Text(node.astext()))
161 for node in self.document.findall(nodes.section):
162 if "system-messages" in node['classes'] and len(node) == 1:
163 node.parent.remove(node)
164
165
166class TestMessages(Transform):
167
168 """
169 Append all post-parse system messages to the end of the document.
170
171 Used for testing purposes.
172 """
173
174 # marker for pytest to ignore this class during test discovery
175 __test__ = False
176
177 default_priority = 880
178
179 def apply(self) -> None:
180 for msg in self.document.transform_messages:
181 if not msg.parent:
182 self.document += msg
183
184
185class StripComments(Transform):
186
187 """
188 Remove comment elements from the document tree (only if the
189 ``strip_comments`` setting is enabled).
190 """
191
192 default_priority = 740
193
194 def apply(self) -> None:
195 if self.document.settings.strip_comments:
196 for node in tuple(self.document.findall(nodes.comment)):
197 node.parent.remove(node)
198
199
200class StripClassesAndElements(Transform):
201
202 """
203 Remove from the document tree all elements with classes in
204 `self.document.settings.strip_elements_with_classes` and all "classes"
205 attribute values in `self.document.settings.strip_classes`.
206 """
207
208 default_priority = 420
209
210 def apply(self) -> None:
211 if self.document.settings.strip_elements_with_classes:
212 self.strip_elements = {*self.document.settings
213 .strip_elements_with_classes}
214 # Iterate over a tuple as removing the current node
215 # corrupts the iterator returned by `iter`:
216 for node in tuple(self.document.findall(self.check_classes)):
217 node.parent.remove(node)
218
219 if not self.document.settings.strip_classes:
220 return
221 strip_classes = self.document.settings.strip_classes
222 for node in self.document.findall(nodes.Element):
223 for class_value in strip_classes:
224 try:
225 node['classes'].remove(class_value)
226 except ValueError:
227 pass
228
229 def check_classes(self, node) -> bool:
230 if not isinstance(node, nodes.Element):
231 return False
232 for class_value in node['classes'][:]:
233 if class_value in self.strip_elements:
234 return True
235 return False
236
237
238class SmartQuotes(Transform):
239
240 """
241 Replace ASCII quotation marks with typographic form.
242
243 Also replace multiple dashes with em-dash/en-dash characters.
244 """
245
246 default_priority = 855
247
248 nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
249 """Do not apply "smartquotes" to instances of these block-level nodes."""
250
251 literal_nodes = (nodes.FixedTextElement, nodes.Special,
252 nodes.image, nodes.literal, nodes.math,
253 nodes.raw, nodes.problematic)
254 """Do not apply smartquotes to instances of these inline nodes."""
255
256 smartquotes_action = 'qDe'
257 """Setting to select smartquote transformations.
258
259 The default 'qDe' educates normal quote characters: (", '),
260 em- and en-dashes (---, --) and ellipses (...).
261 """
262
263 def __init__(self, document, startnode) -> None:
264 Transform.__init__(self, document, startnode=startnode)
265 self.unsupported_languages = set()
266
267 def get_tokens(self, txtnodes):
268 # A generator that yields ``(texttype, nodetext)`` tuples for a list
269 # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
270 for node in txtnodes:
271 if (isinstance(node.parent, self.literal_nodes)
272 or isinstance(node.parent.parent, self.literal_nodes)):
273 yield 'literal', str(node)
274 else:
275 # SmartQuotes uses backslash escapes instead of null-escapes
276 # Insert backslashes before escaped "active" characters.
277 txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
278 yield 'plain', txt
279
280 def apply(self) -> None:
281 smart_quotes = self.document.settings.setdefault('smart_quotes',
282 False)
283 if not smart_quotes:
284 return
285 try:
286 alternative = smart_quotes.startswith('alt')
287 except AttributeError:
288 alternative = False
289
290 document_language = self.document.settings.language_code
291 lc_smartquotes = self.document.settings.smartquotes_locales
292 if lc_smartquotes:
293 smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
294
295 # "Educate" quotes in normal text. Handle each block of text
296 # (TextElement node) as a unit to keep context around inline nodes:
297 for node in self.document.findall(nodes.TextElement):
298 # skip preformatted text blocks and special elements:
299 if isinstance(node, self.nodes_to_skip):
300 continue
301 # nested TextElements are not "block-level" elements:
302 if isinstance(node.parent, nodes.TextElement):
303 continue
304
305 # list of text nodes in the "text block":
306 txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
307 if not isinstance(txtnode.parent,
308 nodes.option_string)]
309
310 # language: use typographical quotes for language "lang"
311 lang = node.get_language_code(document_language)
312 # use alternative form if `smart-quotes` setting starts with "alt":
313 if alternative:
314 if '-x-altquot' in lang:
315 lang = lang.replace('-x-altquot', '')
316 else:
317 lang += '-x-altquot'
318 # drop unsupported subtags:
319 for tag in utils.normalize_language_tag(lang):
320 if tag in smartquotes.smartchars.quotes:
321 lang = tag
322 break
323 else: # language not supported -- keep ASCII quotes
324 if lang not in self.unsupported_languages:
325 self.document.reporter.warning(
326 'No smart quotes defined for language "%s".' % lang,
327 base_node=node)
328 self.unsupported_languages.add(lang)
329 lang = ''
330
331 # Iterator educating quotes in plain text:
332 # (see "utils/smartquotes.py" for the attribute setting)
333 teacher = smartquotes.educate_tokens(
334 self.get_tokens(txtnodes),
335 attr=self.smartquotes_action, language=lang)
336
337 for txtnode, newtext in zip(txtnodes, teacher):
338 txtnode.parent.replace(txtnode, nodes.Text(newtext))
339
340 self.unsupported_languages.clear()
341
342
343class Validate(Transform):
344
345 """
346 Validate the document tree, report violations as warning.
347 """
348
349 default_priority = 835 # between misc.Transitions and universal.Messages
350
351 def apply(self) -> None:
352 if not getattr(self.document.settings, 'validate', False):
353 return
354 for node in self.document.findall():
355 try:
356 node.validate(recursive=False)
357 except nodes.ValidationError as e:
358 self.document.reporter.warning(
359 str(e), base_node=e.problematic_element or node)
360 # TODO: append a link to the Document Tree documentation?
361 # nodes.paragraph('', 'See ',
362 # nodes.reference('', 'doctree.html#document',
363 # refuri='https://docutils.sourceforge.io/'
364 # 'docs/ref/doctree.html#document'),