1# $Id$
2# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
5
6"""
7Transforms needed by most or all documents:
8
9- `Decorations`: Generate a document's header & footer.
10- `ExposeInternals`: Expose internal attributes.
11- `Messages`: Placement of system messages generated after parsing.
12- `FilterMessages`: Remove system messages below verbosity threshold.
13- `TestMessages`: Like `Messages`, used on test runs.
14- `StripComments`: Remove comment elements from the document tree.
15- `StripClassesAndElements`: Remove elements with classes
16 in `self.document.settings.strip_elements_with_classes`
17 and class values in `self.document.settings.strip_classes`.
18- `SmartQuotes`: Replace ASCII quotation marks with typographic form.
19- `Validate`: Validate the document tree, report violations as warning.
20"""
21
22__docformat__ = 'reStructuredText'
23
24import re
25import time
26from docutils import nodes, utils
27from docutils.transforms import Transform
28from docutils.utils import smartquotes
29
30
31class Decorations(Transform):
32
33 """
34 Populate a document's decoration element (header, footer).
35 """
36
37 default_priority = 820
38
39 def apply(self) -> None:
40 header_nodes = self.generate_header()
41 if header_nodes:
42 decoration = self.document.get_decoration()
43 header = decoration.get_header()
44 header.extend(header_nodes)
45 footer_nodes = self.generate_footer()
46 if footer_nodes:
47 decoration = self.document.get_decoration()
48 footer = decoration.get_footer()
49 footer.extend(footer_nodes)
50
51 def generate_header(self):
52 return None
53
54 def generate_footer(self):
55 # @@@ Text is hard-coded for now.
56 # Should be made dynamic (language-dependent).
57 # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
58 # for the datestamp?
59 # See https://sourceforge.net/p/docutils/patches/132/
60 # and https://reproducible-builds.org/specs/source-date-epoch/
61 settings = self.document.settings
62 if (settings.generator or settings.datestamp
63 or settings.source_link or settings.source_url):
64 text = []
65 if (settings.source_link and settings._source
66 or settings.source_url):
67 if settings.source_url:
68 source = settings.source_url
69 else:
70 source = utils.relative_path(settings._destination,
71 settings._source)
72 text.extend([
73 nodes.reference('', 'View document source',
74 refuri=source),
75 nodes.Text('.\n')])
76 if settings.datestamp:
77 datestamp = time.strftime(settings.datestamp, time.gmtime())
78 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
79 if settings.generator:
80 text.extend([
81 nodes.Text('Generated by '),
82 nodes.reference('', 'Docutils',
83 refuri='https://docutils.sourceforge.io/'),
84 nodes.Text(' from '),
85 nodes.reference('', 'reStructuredText',
86 refuri='https://docutils.sourceforge.io/'
87 'rst.html'),
88 nodes.Text(' source.\n')])
89 return [nodes.paragraph('', '', *text)]
90 else:
91 return None
92
93
94class ExposeInternals(Transform):
95
96 """
97 Expose internal attributes if ``expose_internals`` setting is set.
98 """
99
100 default_priority = 840
101
102 def not_Text(self, node) -> bool:
103 return not isinstance(node, nodes.Text)
104
105 def apply(self) -> None:
106 if self.document.settings.expose_internals:
107 for node in self.document.findall(self.not_Text):
108 for att in self.document.settings.expose_internals:
109 value = getattr(node, att, None)
110 if value is not None:
111 node['internal:' + att] = value
112
113
114class Messages(Transform):
115
116 """Handle "loose" messages.
117
118 Place system messages generated by parsing or transforms that are not
119 attached to the document tree into a dedicated section of the document.
120 """
121
122 default_priority = 860
123
124 def apply(self) -> None:
125 messages = [*self.document.parse_messages,
126 *self.document.transform_messages]
127 loose_messages = [msg for msg in messages if not msg.parent]
128 if loose_messages:
129 section = nodes.section(classes=['system-messages'])
130 # @@@ get this from the language module?
131 section += nodes.title('', 'Docutils System Messages')
132 section += loose_messages
133 self.document.transform_messages[:] = []
134 self.document += section
135
136
137class FilterMessages(Transform):
138
139 """
140 Remove system messages below verbosity threshold.
141
142 Also convert <problematic> nodes referencing removed messages
143 to <Text> nodes and remove "System Messages" section if empty.
144 """
145
146 default_priority = 870
147
148 def apply(self) -> None:
149 removed_ids = [] # IDs of removed system messages
150 for node in tuple(self.document.findall(nodes.system_message)):
151 if node['level'] < self.document.reporter.report_level:
152 node.parent.remove(node)
153 for _id in node['ids']:
154 self.document.ids.pop(_id, None) # remove ID registration
155 removed_ids.append(_id)
156 for node in tuple(self.document.findall(nodes.problematic)):
157 if 'refid' in node and node['refid'] in removed_ids:
158 node.parent.replace(node, nodes.Text(node.astext()))
159 for node in self.document.findall(nodes.section):
160 if "system-messages" in node['classes'] and len(node) == 1:
161 node.parent.remove(node)
162
163
164class TestMessages(Transform):
165
166 """
167 Append all post-parse system messages to the end of the document.
168
169 Used for testing purposes.
170 """
171
172 # marker for pytest to ignore this class during test discovery
173 __test__ = False
174
175 default_priority = 880
176
177 def apply(self) -> None:
178 for msg in self.document.transform_messages:
179 if not msg.parent:
180 self.document += msg
181
182
183class StripComments(Transform):
184
185 """
186 Remove comment elements from the document tree (only if the
187 ``strip_comments`` setting is enabled).
188 """
189
190 default_priority = 740
191
192 def apply(self) -> None:
193 if self.document.settings.strip_comments:
194 for node in tuple(self.document.findall(nodes.comment)):
195 node.parent.remove(node)
196
197
198class StripClassesAndElements(Transform):
199
200 """
201 Remove from the document tree all elements with classes in
202 `self.document.settings.strip_elements_with_classes` and all "classes"
203 attribute values in `self.document.settings.strip_classes`.
204 """
205
206 default_priority = 420
207
208 def apply(self) -> None:
209 if self.document.settings.strip_elements_with_classes:
210 self.strip_elements = {*self.document.settings
211 .strip_elements_with_classes}
212 # Iterate over a tuple as removing the current node
213 # corrupts the iterator returned by `iter`:
214 for node in tuple(self.document.findall(self.check_classes)):
215 node.parent.remove(node)
216
217 if not self.document.settings.strip_classes:
218 return
219 strip_classes = self.document.settings.strip_classes
220 for node in self.document.findall(nodes.Element):
221 for class_value in strip_classes:
222 try:
223 node['classes'].remove(class_value)
224 except ValueError:
225 pass
226
227 def check_classes(self, node) -> bool:
228 if not isinstance(node, nodes.Element):
229 return False
230 for class_value in node['classes'][:]:
231 if class_value in self.strip_elements:
232 return True
233 return False
234
235
236class SmartQuotes(Transform):
237
238 """
239 Replace ASCII quotation marks with typographic form.
240
241 Also replace multiple dashes with em-dash/en-dash characters.
242 """
243
244 default_priority = 855
245
246 nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
247 """Do not apply "smartquotes" to instances of these block-level nodes."""
248
249 literal_nodes = (nodes.FixedTextElement, nodes.Special,
250 nodes.image, nodes.literal, nodes.math,
251 nodes.raw, nodes.problematic)
252 """Do not apply smartquotes to instances of these inline nodes."""
253
254 smartquotes_action = 'qDe'
255 """Setting to select smartquote transformations.
256
257 The default 'qDe' educates normal quote characters: (", '),
258 em- and en-dashes (---, --) and ellipses (...).
259 """
260
261 def __init__(self, document, startnode) -> None:
262 Transform.__init__(self, document, startnode=startnode)
263 self.unsupported_languages = set()
264
265 def get_tokens(self, txtnodes):
266 # A generator that yields ``(texttype, nodetext)`` tuples for a list
267 # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
268 for node in txtnodes:
269 if (isinstance(node.parent, self.literal_nodes)
270 or isinstance(node.parent.parent, self.literal_nodes)):
271 yield 'literal', str(node)
272 else:
273 # SmartQuotes uses backslash escapes instead of null-escapes
274 # Insert backslashes before escaped "active" characters.
275 txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
276 yield 'plain', txt
277
278 def apply(self) -> None:
279 smart_quotes = self.document.settings.setdefault('smart_quotes',
280 False)
281 if not smart_quotes:
282 return
283 try:
284 alternative = smart_quotes.startswith('alt')
285 except AttributeError:
286 alternative = False
287
288 document_language = self.document.settings.language_code
289 lc_smartquotes = self.document.settings.smartquotes_locales
290 if lc_smartquotes:
291 smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
292
293 # "Educate" quotes in normal text. Handle each block of text
294 # (TextElement node) as a unit to keep context around inline nodes:
295 for node in self.document.findall(nodes.TextElement):
296 # skip preformatted text blocks and special elements:
297 if isinstance(node, self.nodes_to_skip):
298 continue
299 # nested TextElements are not "block-level" elements:
300 if isinstance(node.parent, nodes.TextElement):
301 continue
302
303 # list of text nodes in the "text block":
304 txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
305 if not isinstance(txtnode.parent,
306 nodes.option_string)]
307
308 # language: use typographical quotes for language "lang"
309 lang = node.get_language_code(document_language)
310 # use alternative form if `smart-quotes` setting starts with "alt":
311 if alternative:
312 if '-x-altquot' in lang:
313 lang = lang.replace('-x-altquot', '')
314 else:
315 lang += '-x-altquot'
316 # drop unsupported subtags:
317 for tag in utils.normalize_language_tag(lang):
318 if tag in smartquotes.smartchars.quotes:
319 lang = tag
320 break
321 else: # language not supported -- keep ASCII quotes
322 if lang not in self.unsupported_languages:
323 self.document.reporter.warning(
324 'No smart quotes defined for language "%s".' % lang,
325 base_node=node)
326 self.unsupported_languages.add(lang)
327 lang = ''
328
329 # Iterator educating quotes in plain text:
330 # (see "utils/smartquotes.py" for the attribute setting)
331 teacher = smartquotes.educate_tokens(
332 self.get_tokens(txtnodes),
333 attr=self.smartquotes_action, language=lang)
334
335 for txtnode, newtext in zip(txtnodes, teacher):
336 txtnode.parent.replace(txtnode, nodes.Text(newtext))
337
338 self.unsupported_languages.clear()
339
340
341class Validate(Transform):
342
343 """
344 Validate the document tree, report violations as warning.
345 """
346
347 default_priority = 835 # between misc.Transitions and universal.Messages
348
349 def apply(self) -> None:
350 if not getattr(self.document.settings, 'validate', False):
351 return
352 for node in self.document.findall():
353 try:
354 node.validate(recursive=False)
355 except nodes.ValidationError as e:
356 self.document.reporter.warning(
357 str(e), base_node=e.problematic_element or node)
358 # TODO: append a link to the Document Tree documentation?
359 # nodes.paragraph('', 'See ',
360 # nodes.reference('', 'doctree.html#document',
361 # refuri='https://docutils.sourceforge.io/'
362 # 'docs/ref/doctree.html#document'),