1# $Id: misc.py 10302 2026-03-27 08:40:56Z milde $
2# Authors: David Goodger <goodger@python.org>; Dethe Elza
3# Copyright: This module has been placed in the public domain.
4
5"""Miscellaneous directives."""
6
7from __future__ import annotations
8
9__docformat__ = 'reStructuredText'
10
11import re
12import time
13from pathlib import Path
14from urllib.request import urlopen
15from urllib.error import URLError
16
17from docutils import frontend, io, nodes, statemachine, utils
18from docutils.parsers.rst import Directive, convert_directive_function
19from docutils.parsers.rst import directives, roles, states
20from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
21from docutils.transforms import misc
22
23TYPE_CHECKING = False
24if TYPE_CHECKING:
25 from docutils.nodes import StrPath
26
27
28def adapt_path(path: str, source='', root_prefix='') -> str:
29 # Adapt path to files to include or embed.
30 # `root_prefix` is prepended to absolute paths (cf. root_prefix setting),
31 # `source` is the `current_source` of the including directive (which may
32 # be a file included by the main document).
33 if root_prefix and path.startswith('/'):
34 base = Path(root_prefix)
35 path = path[1:]
36 else:
37 base = Path(source).parent
38 # pepend "base" and convert to relative path for shorter system messages
39 return utils.relative_path(None, base/path)
40
41
42class Include(Directive):
43
44 """
45 Include content read from a separate source file.
46
47 Content may be parsed by the parser, or included as a literal
48 block. The encoding of the included file can be specified. Only
49 a part of the given file argument may be included by specifying
50 start and end line or text to match before and/or after the text
51 to be used.
52
53 https://docutils.sourceforge.io/docs/ref/rst/directives.html#include
54 """
55
56 required_arguments = 1
57 optional_arguments = 0
58 final_argument_whitespace = True
59 option_spec = {'literal': directives.flag,
60 'code': directives.unchanged,
61 'encoding': directives.encoding,
62 'parser': directives.parser_name,
63 'tab-width': int,
64 'start-line': int,
65 'end-line': int,
66 'start-after': directives.unchanged,
67 'end-before': directives.unchanged,
68 # ignored except for 'literal' or 'code':
69 'number-lines': directives.value_or((None,), int),
70 'class': directives.class_option,
71 'name': directives.unchanged}
72
73 standard_include_path = Path(states.__file__).parent / 'include'
74
75 def run(self) -> list:
76 """Include a file as part of the content of this reST file.
77
78 Depending on the options, the file content (or a clipping) is
79 converted to nodes and returned or inserted into the input stream.
80 """
81 self.settings = settings = self.state.document.settings
82 if not settings.file_insertion_enabled:
83 raise self.warning('"%s" directive disabled.' % self.name)
84 self.tab_width = self.options.get('tab-width', settings.tab_width)
85 self.clip_options = (self.options.get('start-line', None),
86 self.options.get('end-line', None),
87 self.options.get('start-after', None),
88 self.options.get('end-before', None))
89 path = directives.path(self.arguments[0])
90 if path.startswith('<') and path.endswith('>'):
91 path = '/' + path[1:-1]
92 root_prefix = self.standard_include_path
93 else:
94 root_prefix = settings.root_prefix
95 path = adapt_path(path,
96 self.state.document.current_source,
97 root_prefix)
98 self.options['source'] = path
99
100 inputstring = self.read_file(path)
101
102 if 'literal' in self.options:
103 return self.as_literal_block(inputstring)
104 if 'code' in self.options:
105 return self.as_code_block(inputstring)
106 if 'parser' in self.options:
107 return self.custom_parse(inputstring)
108 self.insert_into_input_lines(inputstring)
109 return []
110
111 def read_file(self, path: StrPath) -> str:
112 """Read text file at `path`. Clip and return content.
113
114 Provisional.
115 """
116 encoding = self.options.get('encoding', self.settings.input_encoding)
117 error_handler = self.settings.input_encoding_error_handler
118 try:
119 include_file = io.FileInput(source_path=path,
120 encoding=encoding,
121 error_handler=error_handler)
122 except UnicodeEncodeError:
123 raise self.error(f'Problems with "{self.name}" directive path:\n'
124 f'Cannot encode input file path "{path}" '
125 '(wrong locale?).')
126 except OSError as error:
127 raise self.error(f'Problems with "{self.name}" directive path:\n'
128 f'{io.error_string(error)}.')
129 else:
130 self.settings.record_dependencies.add(path)
131 try:
132 text = include_file.read()
133 except UnicodeError as error:
134 raise self.error(f'Problem with "{self.name}" directive:\n'
135 + io.error_string(error))
136 # Clip to-be-included content
137 startline, endline, starttext, endtext = self.clip_options
138 if startline or (endline is not None):
139 lines = text.splitlines()
140 text = '\n'.join(lines[startline:endline])
141 # start-after/end-before: no restrictions on newlines in match-text,
142 # and no restrictions on matching inside lines vs. line boundaries
143 # exception: emtpy string matches an empty line
144 if starttext == "":
145 # skip content before an empty line
146 starttext = '\n\n'
147 if starttext:
148 # skip content in text before *and incl.* a matching text
149 after_index = text.find(starttext)
150 if after_index < 0:
151 raise self.error('Problem with "start-after" option of '
152 f'"{self.name}" directive:\nText not found.')
153 else:
154 text = text[after_index + len(starttext):]
155 if endtext == "":
156 # skip content after an empty line
157 before_index = text.find('\n\n')
158 if before_index > 0:
159 text = text[:before_index+1]
160 elif endtext:
161 # skip content in text after *and incl.* a matching text
162 before_index = text.find(endtext)
163 if before_index < 0:
164 raise self.error('Problem with "end-before" option of '
165 f'"{self.name}" directive:\nText not found.')
166 text = text[:before_index]
167 return text
168
169 def as_literal_block(self, text: str) -> list[nodes.literal_block]:
170 """Return list with literal_block containing `text`.
171
172 Provisional
173 """
174 source = self.options['source']
175 # Convert tabs to spaces unless `tab_width` is negative.
176 if self.tab_width >= 0:
177 text = text.expandtabs(self.tab_width)
178 literal_block = nodes.literal_block(
179 '', source=source, classes=self.options.get('class', []))
180 literal_block.source = source
181 literal_block.line = self.options.get('start-line', 0) + 1
182 self.add_name(literal_block)
183 if 'number-lines' in self.options:
184 firstline = self.options['number-lines']
185 if firstline is None:
186 firstline = 1
187 text = text.removesuffix('\n')
188 lastline = firstline + len(text.splitlines())
189 tokens = NumberLines([([], text)], firstline, lastline)
190 for classes, value in tokens:
191 if classes:
192 literal_block += nodes.inline('', value, classes=classes)
193 else:
194 literal_block += nodes.Text(value)
195 else:
196 literal_block += nodes.Text(text)
197 return [literal_block]
198
199 def as_code_block(self, text: str) -> list[nodes.literal_block]:
200 """Pass `text` to the `CodeBlock` directive class.
201
202 Provisional.
203 """
204 # convert tabs to spaces unless `tab_width` is negative:
205 if self.tab_width >= 0:
206 text = text.expandtabs(self.tab_width)
207 codeblock = CodeBlock(self.name,
208 [self.options.pop('code')], # pass as argument
209 self.options,
210 [text.removesuffix('\n')], # content
211 self.lineno,
212 self.content_offset,
213 self.block_text,
214 self.state,
215 self.state_machine,
216 )
217 return codeblock.run()
218
219 def custom_parse(self, text: str) -> list:
220 """Parse with custom parser.
221
222 Parse with ``self.options['parser']`` into a new (dummy) document,
223 apply the parser's default transforms,
224 return child elements.
225
226 Provisional.
227 """
228 parser = self.options['parser']()
229 settings = frontend.get_default_settings(parser)
230 # update with current document settings
231 for k, v in self.settings.__dict__.items():
232 setattr(settings, k, v)
233 settings._source = self.options['source']
234 document = utils.new_document(settings._source, settings)
235 document.include_log = self.state.document.include_log
236 document.names = self.state.document.names
237 document.ids = self.state.document.ids
238 document.nameids = self.state.document.nameids
239 document.nametypes = self.state.document.nametypes
240 parser.parse(text, document)
241 self.state.document.parse_messages.extend(document.parse_messages)
242 # clean up doctree and complete parsing
243 document.transformer.populate_from_components((parser,))
244 document.transformer.apply_transforms()
245 self.state.document.transform_messages.extend(
246 document.transform_messages)
247 return document.children
248
249 def insert_into_input_lines(self, text: str) -> None:
250 """Insert file content into the rST input of the calling parser.
251
252 Provisional.
253 """
254 source = self.options['source']
255 textlines = statemachine.string2lines(text, self.tab_width,
256 convert_whitespace=True)
257 # Sanity checks:
258 # excessively long lines
259 for i, line in enumerate(textlines):
260 if len(line) > self.settings.line_length_limit:
261 line_no = i + 1 + self.options.get('start-line', 0)
262 raise self.warning(f'"{source}": line {line_no} exceeds the'
263 ' line-length-limit.')
264 # circular inclusion
265 include_log = self.state.document.include_log
266 if not include_log: # new document, initialize with document source
267 current_source = utils.relative_path(
268 None, self.state.document.current_source)
269 include_log.append((current_source, (None, None, None, None)))
270 if (source, self.clip_options) in include_log:
271 source_chain = (pth for (pth, opt) in reversed(include_log))
272 inclusion_chain = '\n> '.join((source, *source_chain))
273 raise self.warning(f'circular inclusion in "{self.name}"'
274 f' directive:\n{inclusion_chain}')
275 include_log.append((source, self.clip_options))
276 # marker for removing log entry (cf. parsers.rst.states.Body.comment())
277 textlines += ['', f'.. end of inclusion from "{source}"']
278
279 self.state_machine.insert_input(textlines, source)
280 # TODO: if startline != 0, line numbers are wrong.
281
282
283class Raw(Directive):
284
285 """
286 Pass through content unchanged
287
288 Content is included in output based on type argument
289
290 Content may be included inline (content section of directive) or
291 imported from a file or url.
292 """
293
294 required_arguments = 1
295 optional_arguments = 0
296 final_argument_whitespace = True
297 option_spec = {'file': directives.path,
298 'url': directives.uri,
299 'encoding': directives.encoding,
300 'class': directives.class_option}
301 has_content = True
302
303 def run(self):
304 settings = self.state.document.settings
305 if (not settings.raw_enabled
306 or (not settings.file_insertion_enabled
307 and ('file' in self.options or 'url' in self.options))):
308 raise self.warning('"%s" directive disabled.' % self.name)
309 attributes = {'format': ' '.join(self.arguments[0].lower().split())}
310 encoding = self.options.get('encoding', settings.input_encoding)
311 error_handler = settings.input_encoding_error_handler
312 if self.content:
313 if 'file' in self.options or 'url' in self.options:
314 raise self.error(
315 '"%s" directive may not both specify an external file '
316 'and have content.' % self.name)
317 text = '\n'.join(self.content)
318 elif 'file' in self.options:
319 if 'url' in self.options:
320 raise self.error(
321 'The "file" and "url" options may not be simultaneously '
322 'specified for the "%s" directive.' % self.name)
323 path = adapt_path(self.options['file'],
324 self.state.document.current_source,
325 settings.root_prefix)
326 try:
327 raw_file = io.FileInput(source_path=path,
328 encoding=encoding,
329 error_handler=error_handler)
330 except OSError as error:
331 raise self.error(f'Problems with "{self.name}" directive '
332 f'path:\n{io.error_string(error)}.')
333 else:
334 # TODO: currently, raw input files are recorded as
335 # dependencies even if not used for the chosen output format.
336 settings.record_dependencies.add(path)
337 try:
338 text = raw_file.read()
339 except UnicodeError as error:
340 raise self.error(f'Problem with "{self.name}" directive:\n'
341 + io.error_string(error))
342 attributes['source'] = path
343 elif 'url' in self.options:
344 source = self.options['url']
345 try:
346 raw_text = urlopen(source).read()
347 except (URLError, OSError) as error:
348 raise self.error(f'Problems with "{self.name}" directive URL '
349 f'"{self.options["url"]}":\n'
350 f'{io.error_string(error)}.')
351 raw_file = io.StringInput(source=raw_text, source_path=source,
352 encoding=encoding,
353 error_handler=error_handler)
354 try:
355 text = raw_file.read()
356 except UnicodeError as error:
357 raise self.error(f'Problem with "{self.name}" directive:\n'
358 + io.error_string(error))
359 attributes['source'] = source
360 else:
361 # This will always fail because there is no content.
362 self.assert_has_content()
363 raw_node = nodes.raw('', text, classes=self.options.get('class', []),
364 **attributes)
365 (raw_node.source,
366 raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
367 return [raw_node]
368
369
370class Replace(Directive):
371
372 has_content = True
373
374 def run(self):
375 if not isinstance(self.state, states.SubstitutionDef):
376 raise self.error(
377 'Invalid context: the "%s" directive can only be used within '
378 'a substitution definition.' % self.name)
379 self.assert_has_content()
380 text = '\n'.join(self.content)
381 element = nodes.Element(text)
382 self.state.nested_parse(self.content, self.content_offset,
383 element)
384 # element might contain [paragraph] + system_message(s)
385 node = None
386 messages = []
387 for elem in element:
388 if not node and isinstance(elem, nodes.paragraph):
389 node = elem
390 elif isinstance(elem, nodes.system_message):
391 elem['backrefs'] = []
392 messages.append(elem)
393 else:
394 return [
395 self.reporter.error(
396 f'Error in "{self.name}" directive: may contain '
397 'a single paragraph only.', line=self.lineno)]
398 if node:
399 return messages + node.children
400 return messages
401
402
403class Unicode(Directive):
404
405 r"""
406 Convert Unicode character codes (numbers) to characters. Codes may be
407 decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
408 ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
409 entities (e.g. ``☮``). Text following ".." is a comment and is
410 ignored. Spaces are ignored, and any other text remains as-is.
411 """
412
413 required_arguments = 1
414 optional_arguments = 0
415 final_argument_whitespace = True
416 option_spec = {'trim': directives.flag,
417 'ltrim': directives.flag,
418 'rtrim': directives.flag}
419
420 comment_pattern = re.compile(r'( |\n|^)\.\. ')
421
422 def run(self):
423 if not isinstance(self.state, states.SubstitutionDef):
424 raise self.error(
425 'Invalid context: the "%s" directive can only be used within '
426 'a substitution definition.' % self.name)
427 substitution_definition = self.state_machine.node
428 if 'trim' in self.options:
429 substitution_definition.attributes['ltrim'] = 1
430 substitution_definition.attributes['rtrim'] = 1
431 if 'ltrim' in self.options:
432 substitution_definition.attributes['ltrim'] = 1
433 if 'rtrim' in self.options:
434 substitution_definition.attributes['rtrim'] = 1
435 codes = self.comment_pattern.split(self.arguments[0])[0].split()
436 element = nodes.Element()
437 for code in codes:
438 try:
439 decoded = directives.unicode_code(code)
440 except ValueError as error:
441 raise self.error('Invalid character code: %s\n%s'
442 % (code, io.error_string(error)))
443 element += nodes.Text(decoded)
444 return element.children
445
446
447class Class(Directive):
448
449 """
450 Set a "class" attribute on the directive content or the next element.
451 When applied to the next element, a "pending" element is inserted, and a
452 transform does the work later.
453 """
454
455 required_arguments = 1
456 optional_arguments = 0
457 final_argument_whitespace = True
458 has_content = True
459
460 def run(self):
461 try:
462 class_value = directives.class_option(self.arguments[0])
463 except ValueError:
464 raise self.error(
465 'Invalid class attribute value for "%s" directive: "%s".'
466 % (self.name, self.arguments[0]))
467 node_list = []
468 if self.content:
469 container = nodes.Element()
470 self.state.nested_parse(self.content, self.content_offset,
471 container)
472 for node in container:
473 node['classes'].extend(class_value)
474 node_list.extend(container.children)
475 else:
476 pending = nodes.pending(
477 misc.ClassAttribute,
478 {'class': class_value, 'directive': self.name},
479 self.block_text)
480 self.state_machine.document.note_pending(pending)
481 node_list.append(pending)
482 return node_list
483
484
485class Role(Directive):
486
487 has_content = True
488
489 argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
490 % ((states.Inliner.simplename,) * 2))
491
492 def run(self):
493 """Dynamically create and register a custom interpreted text role."""
494 if self.content_offset > self.lineno or not self.content:
495 raise self.error('"%s" directive requires arguments on the first '
496 'line.' % self.name)
497 args = self.content[0]
498 match = self.argument_pattern.match(args)
499 if not match:
500 raise self.error('"%s" directive arguments not valid role names: '
501 '"%s".' % (self.name, args))
502 new_role_name = match.group(1)
503 base_role_name = match.group(3)
504 messages = []
505 if base_role_name:
506 base_role, messages = roles.role(
507 base_role_name, self.state_machine.language, self.lineno,
508 self.state.reporter)
509 if base_role is None:
510 error = self.state.reporter.error(
511 'Unknown interpreted text role "%s".' % base_role_name,
512 nodes.literal_block(self.block_text, self.block_text),
513 line=self.lineno)
514 return messages + [error]
515 else:
516 base_role = roles.generic_custom_role
517 assert not hasattr(base_role, 'arguments'), (
518 'Supplemental directive arguments for "%s" directive not '
519 'supported (specified by "%r" role).' % (self.name, base_role))
520 try:
521 converted_role = convert_directive_function(base_role)
522 (arguments, options, content, content_offset
523 ) = self.state.parse_directive_block(
524 self.content[1:], self.content_offset,
525 converted_role, option_presets={})
526 except states.MarkupError as detail:
527 error = self.reporter.error(
528 'Error in "%s" directive:\n%s.' % (self.name, detail),
529 nodes.literal_block(self.block_text, self.block_text),
530 line=self.lineno)
531 return messages + [error]
532 if 'class' not in options:
533 try:
534 options['class'] = directives.class_option(new_role_name)
535 except ValueError as detail:
536 error = self.reporter.error(
537 'Invalid argument for "%s" directive:\n%s.'
538 % (self.name, detail),
539 nodes.literal_block(self.block_text, self.block_text),
540 line=self.lineno)
541 return messages + [error]
542 role = roles.CustomRole(new_role_name, base_role, options, content)
543 roles.register_local_role(new_role_name, role)
544 return messages
545
546
547class DefaultRole(Directive):
548
549 """Set the default interpreted text role."""
550
551 optional_arguments = 1
552 final_argument_whitespace = False
553
554 def run(self):
555 if not self.arguments:
556 if '' in roles._roles:
557 # restore the "default" default role
558 del roles._roles['']
559 return []
560 role_name = self.arguments[0]
561 role, messages = roles.role(role_name, self.state_machine.language,
562 self.lineno, self.state.reporter)
563 if role is None:
564 error = self.state.reporter.error(
565 'Unknown interpreted text role "%s".' % role_name,
566 nodes.literal_block(self.block_text, self.block_text),
567 line=self.lineno)
568 return messages + [error]
569 roles._roles[''] = role
570 return messages
571
572
573class Title(Directive):
574
575 required_arguments = 1
576 optional_arguments = 0
577 final_argument_whitespace = True
578
579 def run(self):
580 self.state_machine.document['title'] = self.arguments[0]
581 return []
582
583
584class MetaBody(states.SpecializedBody):
585
586 def field_marker(self, match, context, next_state):
587 """Meta element."""
588 node, blank_finish = self.parsemeta(match)
589 self.parent += node
590 return [], next_state, []
591
592 def parsemeta(self, match):
593 name = self.parse_field_marker(match)
594 name = nodes.unescape(utils.escape2null(name))
595 (indented, indent, line_offset, blank_finish
596 ) = self.state_machine.get_first_known_indented(match.end())
597 node = nodes.meta()
598 node['content'] = nodes.unescape(utils.escape2null(
599 ' '.join(indented)))
600 if not indented:
601 line = self.state_machine.line
602 msg = self.reporter.info(
603 'No content for meta tag "%s".' % name,
604 nodes.literal_block(line, line))
605 return msg, blank_finish
606 tokens = name.split()
607 try:
608 attname, val = utils.extract_name_value(tokens[0])[0]
609 node[attname.lower()] = val
610 except utils.NameValueError:
611 node['name'] = tokens[0]
612 for token in tokens[1:]:
613 try:
614 attname, val = utils.extract_name_value(token)[0]
615 node[attname.lower()] = val
616 except utils.NameValueError as detail:
617 line = self.state_machine.line
618 msg = self.reporter.error(
619 'Error parsing meta tag attribute "%s": %s.'
620 % (token, detail), nodes.literal_block(line, line))
621 return msg, blank_finish
622 return node, blank_finish
623
624
625class Meta(Directive):
626
627 has_content = True
628
629 SMkwargs = {'state_classes': (MetaBody,)}
630
631 def run(self):
632 self.assert_has_content()
633 node = nodes.Element()
634 new_line_offset, blank_finish = self.state.nested_list_parse(
635 self.content, self.content_offset, node,
636 initial_state='MetaBody', blank_finish=True,
637 state_machine_kwargs=self.SMkwargs)
638 if (new_line_offset - self.content_offset) != len(self.content):
639 # incomplete parse of block?
640 error = self.reporter.error(
641 'Invalid meta directive.',
642 nodes.literal_block(self.block_text, self.block_text),
643 line=self.lineno)
644 node += error
645 # insert at begin of document
646 index = self.state.document.first_child_not_matching_class(
647 (nodes.Titular, nodes.meta)) or 0
648 self.state.document[index:index] = node.children
649 return []
650
651
652class Date(Directive):
653
654 has_content = True
655
656 def run(self):
657 if not isinstance(self.state, states.SubstitutionDef):
658 raise self.error(
659 'Invalid context: the "%s" directive can only be used within '
660 'a substitution definition.' % self.name)
661 format_str = '\n'.join(self.content) or '%Y-%m-%d'
662 # @@@
663 # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
664 # Pro: Docutils-generated documentation
665 # can easily be part of `reproducible software builds`__
666 #
667 # __ https://reproducible-builds.org/
668 #
669 # Con: Changes the specs, hard to predict behaviour,
670 #
671 # See also the discussion about \date \time \year in TeX
672 # http://tug.org/pipermail/tex-k/2016-May/002704.html
673 # source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
674 # if (source_date_epoch):
675 # text = time.strftime(format_str,
676 # time.gmtime(int(source_date_epoch)))
677 # else:
678 text = time.strftime(format_str)
679 return [nodes.Text(text)]
680
681
682class TestDirective(Directive):
683
684 """This directive is useful only for testing purposes."""
685
686 optional_arguments = 1
687 final_argument_whitespace = True
688 option_spec = {'option': directives.unchanged_required}
689 has_content = True
690
691 def run(self):
692 if self.content:
693 text = '\n'.join(self.content)
694 info = self.reporter.info(
695 'Directive processed. Type="%s", arguments=%r, options=%r, '
696 'content:' % (self.name, self.arguments, self.options),
697 nodes.literal_block(text, text), line=self.lineno)
698 else:
699 info = self.reporter.info(
700 'Directive processed. Type="%s", arguments=%r, options=%r, '
701 'content: None' % (self.name, self.arguments, self.options),
702 line=self.lineno)
703 return [info]