1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6Miscellaneous utilities for the documentation utilities.
7"""
8
9from __future__ import annotations
10
11__docformat__ = 'reStructuredText'
12
13import itertools
14import os
15import os.path
16import re
17import sys
18import unicodedata
19import warnings
20from pathlib import PurePath, Path
21from typing import TYPE_CHECKING
22
23from docutils import ApplicationError, DataError
24from docutils import io, nodes
25# for backwards compatibility
26from docutils.nodes import unescape # noqa: F401 (imported but unused)
27
28if TYPE_CHECKING:
29 from collections.abc import Callable, Sequence, Iterable
30 from typing import Any, Final, Literal, TextIO
31 if sys.version_info[:2] >= (3, 12):
32 from typing import TypeAlias
33 else:
34 from typing_extensions import TypeAlias
35
36 from docutils.nodes import Node, StrPath
37 from docutils.frontend import Values
38
39 _ObserverFunc: TypeAlias = Callable[[nodes.system_message], None]
40
41
42class SystemMessage(ApplicationError):
43
44 def __init__(self, system_message: nodes.system_message, level: int,
45 ) -> None:
46 Exception.__init__(self, system_message.astext())
47 self.level = level
48
49
50class SystemMessagePropagation(ApplicationError):
51 pass
52
53
54class Reporter:
55
56 """
57 Info/warning/error reporter and ``system_message`` element generator.
58
59 Five levels of system messages are defined, along with corresponding
60 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
61
62 There is typically one Reporter object per process. A Reporter object is
63 instantiated with thresholds for reporting (generating warnings) and
64 halting processing (raising exceptions), a switch to turn debug output on
65 or off, and an I/O stream for warnings. These are stored as instance
66 attributes.
67
68 When a system message is generated, its level is compared to the stored
69 thresholds, and a warning or error is generated as appropriate. Debug
70 messages are produced if the stored debug switch is on, independently of
71 other thresholds. Message output is sent to the stored warning stream if
72 not set to ''.
73
74 The Reporter class also employs a modified form of the "Observer" pattern
75 [GoF95]_ to track system messages generated. The `attach_observer` method
76 should be called before parsing, with a bound method or function which
77 accepts system messages. The observer can be removed with
78 `detach_observer`, and another added in its place.
79
80 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
81 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
82 1995.
83 """
84
85 # Reporter.get_source_and_line is patched in by ``RSTState.runtime_init``
86 get_source_and_line: Callable[[int|None], tuple[StrPath|None, int|None]]
87
88 levels: Final[Sequence[str]] = (
89 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SEVERE')
90 """List of names for system message levels, indexed by level."""
91
92 # system message level constants:
93 DEBUG_LEVEL: Final = 0
94 INFO_LEVEL: Final = 1
95 WARNING_LEVEL: Final = 2
96 ERROR_LEVEL: Final = 3
97 SEVERE_LEVEL: Final = 4
98
99 def __init__(
100 self,
101 source: StrPath,
102 report_level: int,
103 halt_level: int,
104 stream: io.ErrorOutput|TextIO|str|Literal[False]|None = None,
105 debug: bool = False,
106 encoding: str|None = None,
107 error_handler: str = 'backslashreplace',
108 ) -> None:
109 """Low level instantiating. See also `new_reporter().`.
110
111 :Parameters:
112 - `source`: The path to or description of the source data.
113 - `report_level`: The level at or above which warning output will
114 be sent to `stream`.
115 - `halt_level`: The level at or above which `SystemMessage`
116 exceptions will be raised, halting execution.
117 - `debug`: Show debug (level=0) system messages?
118 - `stream`: Where warning output is sent. Can be file-like (has a
119 ``.write`` method), a string (file name, opened for writing),
120 '' (empty string) or `False` (for discarding all stream messages)
121 or `None` (implies `sys.stderr`; default).
122 - `encoding`: The output encoding.
123 - `error_handler`: The error handler for stderr output encoding.
124 """
125
126 self.source = source
127 """The path to or description of the source data."""
128
129 self.error_handler = error_handler
130 """The character encoding error handler."""
131
132 self.debug_flag = debug
133 """Show debug (level=0) system messages?"""
134
135 self.report_level = report_level
136 """The level at or above which warning output will be sent
137 to `self.stream`."""
138
139 self.halt_level = halt_level
140 """The level at or above which `SystemMessage` exceptions
141 will be raised, halting execution."""
142
143 if not isinstance(stream, io.ErrorOutput):
144 stream = io.ErrorOutput(stream, encoding, error_handler)
145
146 self.stream: io.ErrorOutput = stream
147 """Where warning output is sent."""
148
149 self.encoding: str = encoding or getattr(stream, 'encoding', 'ascii')
150 """The output character encoding."""
151
152 self.observers: list[_ObserverFunc] = []
153 """List of bound methods or functions to call with each system_message
154 created."""
155
156 self.max_level: int = -1
157 """The highest level system message generated so far."""
158
159 def attach_observer(self, observer: _ObserverFunc) -> None:
160 """
161 The `observer` parameter is a function or bound method which takes one
162 argument, a `nodes.system_message` instance.
163 """
164 self.observers.append(observer)
165
166 def detach_observer(self, observer: _ObserverFunc) -> None:
167 self.observers.remove(observer)
168
169 def notify_observers(self, message: nodes.system_message) -> None:
170 for observer in self.observers:
171 observer(message)
172
173 def system_message(self,
174 level: int,
175 message: str,
176 *children: Node,
177 **kwargs: Any
178 ) -> nodes.system_message:
179 """
180 Return a system_message object.
181
182 Raise an exception or generate a warning if appropriate.
183 """
184 # `message` can be a `str` or `Exception` instance.
185 if isinstance(message, Exception):
186 message = str(message)
187
188 attributes = kwargs.copy()
189 if 'base_node' in kwargs:
190 source, line = get_source_line(kwargs['base_node'])
191 del attributes['base_node']
192 if source is not None:
193 attributes.setdefault('source', source)
194 if line is not None:
195 attributes.setdefault('line', line)
196 # assert source is not None, "line- but no source-argument"
197 if 'source' not in attributes:
198 # 'line' is absolute line number
199 try:
200 source, line = self.get_source_and_line(attributes.get('line'))
201 except AttributeError:
202 source, line = None, None
203 if source is not None:
204 attributes['source'] = source
205 if line is not None:
206 attributes['line'] = line
207 # assert attributes['line'] is not None, (message, kwargs)
208 # assert attributes['source'] is not None, (message, kwargs)
209 attributes.setdefault('source', self.source)
210
211 msg = nodes.system_message(message, level=level,
212 type=self.levels[level],
213 *children, **attributes)
214 if self.stream and (level >= self.report_level
215 or self.debug_flag and level == self.DEBUG_LEVEL
216 or level >= self.halt_level):
217 self.stream.write(msg.astext() + '\n')
218 if level >= self.halt_level:
219 raise SystemMessage(msg, level)
220 if level > self.DEBUG_LEVEL or self.debug_flag:
221 self.notify_observers(msg)
222 self.max_level = max(level, self.max_level)
223 return msg
224
225 def debug(self, *args: Node, **kwargs: Any) -> nodes.system_message:
226 """
227 Level-0, "DEBUG": an internal reporting issue.
228
229 Typically, there is no effect on the processing. Level-0 system
230 messages are handled separately from the others.
231 """
232 if self.debug_flag:
233 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
234
235 def info(self, *args: Node, **kwargs: Any) -> nodes.system_message:
236 """
237 Level-1, "INFO": a minor issue that can be ignored.
238
239 Typically, there is no effect on processing and level-1 system
240 messages are not reported.
241 """
242 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
243
244 def warning(self, *args: Node, **kwargs: Any) -> nodes.system_message:
245 """
246 Level-2, "WARNING": an issue that should be addressed.
247
248 If ignored, there may be unpredictable problems with the output.
249 """
250 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
251
252 def error(self, *args: Node, **kwargs: Any) -> nodes.system_message:
253 """
254 Level-3, "ERROR": an error that should be addressed.
255
256 If ignored, the output will contain errors.
257 """
258 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
259
260 def severe(self, *args: Node, **kwargs: Any) -> nodes.system_message:
261 """
262 Level-4, "SEVERE": a severe error that must be addressed.
263
264 If ignored, the output will contain severe errors. Typically level-4
265 system messages are turned into exceptions which halt processing.
266 """
267 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
268
269
270class ExtensionOptionError(DataError): pass # NoQA: E701
271class BadOptionError(ExtensionOptionError): pass # NoQA: E701
272class BadOptionDataError(ExtensionOptionError): pass # NoQA: E701
273class DuplicateOptionError(ExtensionOptionError): pass # NoQA: E701
274
275
276def extract_extension_options(field_list: nodes.field_list,
277 options_spec: dict[str, Callable[object], Any],
278 ) -> dict[str, Any]:
279 """
280 Return a dictionary mapping extension option names to converted values.
281
282 :Parameters:
283 - `field_list`: A flat field list without field arguments, where each
284 field body consists of a single paragraph only.
285 - `options_spec`: Dictionary mapping known option names to a
286 conversion function such as `int` or `float`.
287
288 :Exceptions:
289 - `KeyError` for unknown option names.
290 - `ValueError` for invalid option values (raised by the conversion
291 function).
292 - `TypeError` for invalid option value types (raised by conversion
293 function).
294 - `DuplicateOptionError` for duplicate options.
295 - `BadOptionError` for invalid fields.
296 - `BadOptionDataError` for invalid option data (missing name,
297 missing data, bad quotes, etc.).
298 """
299 option_list = extract_options(field_list)
300 return assemble_option_dict(option_list, options_spec)
301
302
303def extract_options(field_list: nodes.field_list
304 ) -> list[tuple[str, str|None]]:
305 """
306 Return a list of option (name, value) pairs from field names & bodies.
307
308 :Parameter:
309 `field_list`: A flat field list, where each field name is a single
310 word and each field body consists of a single paragraph only.
311
312 :Exceptions:
313 - `BadOptionError` for invalid fields.
314 - `BadOptionDataError` for invalid option data (missing name,
315 missing data, bad quotes, etc.).
316 """
317 option_list = []
318 for field in field_list:
319 if len(field[0].astext().split()) != 1:
320 raise BadOptionError(
321 'extension option field name may not contain multiple words')
322 name = str(field[0].astext().lower())
323 body = field[1]
324 if len(body) == 0:
325 data = None
326 elif (len(body) > 1
327 or not isinstance(body[0], nodes.paragraph)
328 or len(body[0]) != 1
329 or not isinstance(body[0][0], nodes.Text)):
330 raise BadOptionDataError(
331 'extension option field body may contain\n'
332 'a single paragraph only (option "%s")' % name)
333 else:
334 data = body[0][0].astext()
335 option_list.append((name, data))
336 return option_list
337
338
339def assemble_option_dict(option_list: list[tuple[str, str|None]],
340 options_spec: dict[str, Callable[object], Any],
341 ) -> dict[str, Any]:
342 """
343 Return a mapping of option names to values.
344
345 :Parameters:
346 - `option_list`: A list of (name, value) pairs (the output of
347 `extract_options()`).
348 - `options_spec`: Dictionary mapping known option names to a
349 conversion function such as `int` or `float`.
350
351 :Exceptions:
352 - `KeyError` for unknown option names.
353 - `DuplicateOptionError` for duplicate options.
354 - `ValueError` for invalid option values (raised by conversion
355 function).
356 - `TypeError` for invalid option value types (raised by conversion
357 function).
358 """
359 options = {}
360 for name, value in option_list:
361 convertor = options_spec[name] # raises KeyError if unknown
362 if convertor is None:
363 raise KeyError(name) # or if explicitly disabled
364 if name in options:
365 raise DuplicateOptionError('duplicate option "%s"' % name)
366 try:
367 options[name] = convertor(value)
368 except (ValueError, TypeError) as detail:
369 raise detail.__class__('(option: "%s"; value: %r)\n%s'
370 % (name, value, ' '.join(detail.args)))
371 return options
372
373
374class NameValueError(DataError): pass
375
376
377def decode_path(path: str|bytes|None) -> str:
378 """
379 Ensure `path` is Unicode. Return `str` instance.
380
381 Decode file/path string in a failsafe manner if not already done.
382
383 Deprecated.
384 """
385 # TODO: is this still required with Python 3?
386 if isinstance(path, str):
387 return path
388 if path is None:
389 return ''
390 try:
391 path = path.decode(sys.getfilesystemencoding(), 'strict')
392 except AttributeError:
393 raise ValueError('`path` value must be a String or ``None``, '
394 f'not {path!r}')
395 except UnicodeDecodeError:
396 try:
397 path = path.decode('utf-8', 'strict')
398 except UnicodeDecodeError:
399 path = path.decode('ascii', 'replace')
400 return path
401
402
403def extract_name_value(line):
404 """
405 Return a list of (name, value) from a line of the form "name=value ...".
406
407 :Exception:
408 `NameValueError` for invalid input (missing name, missing data, bad
409 quotes, etc.).
410 """
411 attlist = []
412 while line:
413 equals = line.find('=')
414 if equals == -1:
415 raise NameValueError('missing "="')
416 attname = line[:equals].strip()
417 if equals == 0 or not attname:
418 raise NameValueError(
419 'missing attribute name before "="')
420 line = line[equals+1:].lstrip()
421 if not line:
422 raise NameValueError(
423 'missing value after "%s="' % attname)
424 if line[0] in '\'"':
425 endquote = line.find(line[0], 1)
426 if endquote == -1:
427 raise NameValueError(
428 'attribute "%s" missing end quote (%s)'
429 % (attname, line[0]))
430 if len(line) > endquote + 1 and line[endquote + 1].strip():
431 raise NameValueError(
432 'attribute "%s" end quote (%s) not followed by '
433 'whitespace' % (attname, line[0]))
434 data = line[1:endquote]
435 line = line[endquote+1:].lstrip()
436 else:
437 space = line.find(' ')
438 if space == -1:
439 data = line
440 line = ''
441 else:
442 data = line[:space]
443 line = line[space+1:].lstrip()
444 attlist.append((attname.lower(), data))
445 return attlist
446
447
448def new_reporter(source_path: StrPath, settings: Values) -> Reporter:
449 """
450 Return a new Reporter object.
451
452 :Parameters:
453 `source` : string
454 The path to or description of the source text of the document.
455 `settings` : optparse.Values object
456 Runtime settings.
457 """
458 reporter = Reporter(
459 source_path, settings.report_level, settings.halt_level,
460 stream=settings.warning_stream, debug=settings.debug,
461 encoding=settings.error_encoding,
462 error_handler=settings.error_encoding_error_handler)
463 return reporter
464
465
466def new_document(source_path: StrPath, settings: Values|None = None
467 ) -> nodes.document:
468 """
469 Return a new empty document object.
470
471 :Parameters:
472 `source_path` : string
473 The path to or description of the source text of the document.
474 `settings` : optparse.Values object
475 Runtime settings. If none are provided, a default core set will
476 be used. If you will use the document object with any Docutils
477 components, you must provide their default settings as well.
478
479 For example, if parsing rST, at least provide the rst-parser
480 settings, obtainable as follows:
481
482 Defaults for parser component::
483
484 settings = docutils.frontend.get_default_settings(
485 docutils.parsers.rst.Parser)
486
487 Defaults and configuration file customizations::
488
489 settings = docutils.core.Publisher(
490 parser=docutils.parsers.rst.Parser).get_settings()
491
492 """
493 # Import at top of module would lead to circular dependency!
494 from docutils import frontend
495 if settings is None:
496 settings = frontend.get_default_settings()
497 source_path = decode_path(source_path)
498 reporter = new_reporter(source_path, settings)
499 document = nodes.document(settings, reporter, source=source_path)
500 document.note_source(source_path, -1)
501 return document
502
503
504def clean_rcs_keywords(
505 paragraph: nodes.paragraph,
506 keyword_substitutions: Sequence[tuple[re.Pattern[[str], str]]],
507 ) -> None:
508 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
509 textnode = paragraph[0]
510 for pattern, substitution in keyword_substitutions:
511 match = pattern.search(textnode)
512 if match:
513 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
514 return
515
516
517def relative_path(source: StrPath|None, target: StrPath) -> str:
518 """
519 Build and return a path to `target`, relative to `source` (both files).
520
521 The return value is a `str` suitable to be included in `source`
522 as a reference to `target`.
523
524 :Parameters:
525 `source` : path-like object or None
526 Path of a file in the start directory for the relative path
527 (the file does not need to exist).
528 The value ``None`` is replaced with "<cwd>/dummy_file".
529 `target` : path-like object
530 End point of the returned relative path.
531
532 Differences to `os.path.relpath()`:
533
534 * Inverse argument order.
535 * `source` is assumed to be a FILE in the start directory (add a "dummy"
536 file name to obtain the path relative from a directory)
537 while `os.path.relpath()` expects a DIRECTORY as `start` argument.
538 * Always use Posix path separator ("/") for the output.
539 * Use `os.sep` for parsing the input
540 (changing the value of `os.sep` is ignored by `os.relpath()`).
541 * If there is no common prefix, return the absolute path to `target`.
542
543 Differences to `pathlib.PurePath.relative_to(other)`:
544
545 * pathlib offers an object oriented interface.
546 * `source` expects path to a FILE while `other` expects a DIRECTORY.
547 * `target` defaults to the cwd, no default value for `other`.
548 * `relative_path()` always returns a path (relative or absolute),
549 while `PurePath.relative_to()` raises a ValueError
550 if `target` is not a subpath of `other` (no ".." inserted).
551 """
552 source_parts = os.path.abspath(source or type(target)('dummy_file')
553 ).split(os.sep)
554 target_parts = os.path.abspath(target).split(os.sep)
555 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
556 if source_parts[:2] != target_parts[:2]:
557 # Nothing in common between paths.
558 # Return absolute path, using '/' for URLs:
559 return '/'.join(target_parts)
560 source_parts.reverse()
561 target_parts.reverse()
562 while (source_parts and target_parts
563 and source_parts[-1] == target_parts[-1]):
564 # Remove path components in common:
565 source_parts.pop()
566 target_parts.pop()
567 target_parts.reverse()
568 parts = ['..'] * (len(source_parts) - 1) + target_parts
569 return '/'.join(parts)
570
571
572def get_stylesheet_reference(settings: Values,
573 relative_to: StrPath|None = None
574 ) -> str:
575 """
576 Retrieve a stylesheet reference from the settings object.
577
578 Deprecated. Use get_stylesheet_list() instead to
579 enable specification of multiple stylesheets as a comma-separated
580 list.
581 """
582 warnings.warn('utils.get_stylesheet_reference()'
583 ' is obsoleted by utils.get_stylesheet_list()'
584 ' and will be removed in Docutils 2.0.',
585 DeprecationWarning, stacklevel=2)
586 if settings.stylesheet_path:
587 assert not settings.stylesheet, (
588 'stylesheet and stylesheet_path are mutually exclusive.')
589 if relative_to is None:
590 relative_to = settings._destination
591 return relative_path(relative_to, settings.stylesheet_path)
592 else:
593 return settings.stylesheet
594
595
596# Return 'stylesheet' or 'stylesheet_path' arguments as list.
597#
598# The original settings arguments are kept unchanged: you can test
599# with e.g. ``if settings.stylesheet_path: ...``.
600#
601# Differences to the depracated `get_stylesheet_reference()`:
602# * return value is a list
603# * no re-writing of the path (and therefore no optional argument)
604# (if required, use ``utils.relative_path(source, target)``
605# in the calling script)
606def get_stylesheet_list(settings: Values) -> list[str]:
607 """Retrieve list of stylesheet references from the settings object."""
608 assert not (settings.stylesheet and settings.stylesheet_path), (
609 'stylesheet and stylesheet_path are mutually exclusive.')
610 stylesheets = settings.stylesheet_path or settings.stylesheet or []
611 # programmatically set default may be string with comma separated list:
612 if not isinstance(stylesheets, list):
613 stylesheets = [path.strip() for path in stylesheets.split(',')]
614 if settings.stylesheet_path:
615 # expand relative paths if found in stylesheet-dirs:
616 stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs)
617 for path in stylesheets]
618 return stylesheets
619
620
621def find_file_in_dirs(path: StrPath, dirs: Iterable[StrPath]) -> str:
622 """
623 Search for `path` in the list of directories `dirs`.
624
625 Return the first expansion that matches an existing file.
626 """
627 path = Path(path)
628 if path.is_absolute():
629 return path.as_posix()
630 for d in dirs:
631 f = Path(d).expanduser() / path
632 if f.exists():
633 return f.as_posix()
634 return path.as_posix()
635
636
637def get_trim_footnote_ref_space(settings: Values) -> bool:
638 """
639 Return whether or not to trim footnote space.
640
641 If trim_footnote_reference_space is not None, return it.
642
643 If trim_footnote_reference_space is None, return False unless the
644 footnote reference style is 'superscript'.
645 """
646 if settings.setdefault('trim_footnote_reference_space', None) is None:
647 return getattr(settings, 'footnote_references', None) == 'superscript'
648 else:
649 return settings.trim_footnote_reference_space
650
651
652def get_source_line(node: Node) -> tuple[StrPath|None, int|None]:
653 """
654 Return the "source" and "line" attributes from the `node` given or from
655 its closest ancestor.
656 """
657 while node:
658 if node.source or node.line:
659 return node.source, node.line
660 node = node.parent
661 return None, None
662
663
664def escape2null(text: str) -> str:
665 """Return a string with escape-backslashes converted to nulls."""
666 parts = []
667 start = 0
668 while True:
669 found = text.find('\\', start)
670 if found == -1:
671 parts.append(text[start:])
672 return ''.join(parts)
673 parts.extend((text[start:found],
674 '\x00' + text[found + 1:found + 2]))
675 start = found + 2 # skip character after escape
676
677
678def split_escaped_whitespace(text: str) -> list[str]:
679 """
680 Split `text` on escaped whitespace (null+space or null+newline).
681 Return a list of strings.
682 """
683 strings = text.split('\x00 ')
684 strings = [string.split('\x00\n') for string in strings]
685 # flatten list of lists of strings to list of strings:
686 return list(itertools.chain(*strings))
687
688
689def strip_combining_chars(text: str) -> str:
690 return ''.join(c for c in text if not unicodedata.combining(c))
691
692
693def find_combining_chars(text: str) -> list[int]:
694 """Return indices of all combining chars in Unicode string `text`.
695
696 >>> from docutils.utils import find_combining_chars
697 >>> find_combining_chars('A t̆ab̆lĕ')
698 [3, 6, 9]
699
700 """
701 return [i for i, c in enumerate(text) if unicodedata.combining(c)]
702
703
704def column_indices(text: str) -> list[int]:
705 """Indices of Unicode string `text` when skipping combining characters.
706
707 >>> from docutils.utils import column_indices
708 >>> column_indices('A t̆ab̆lĕ')
709 [0, 1, 2, 4, 5, 7, 8]
710
711 """
712 # TODO: account for asian wide chars here instead of using dummy
713 # replacements in the tableparser?
714 string_indices = list(range(len(text)))
715 for index in find_combining_chars(text):
716 string_indices[index] = None
717 return [i for i in string_indices if i is not None]
718
719
720east_asian_widths = {'W': 2, # Wide
721 'F': 2, # Full-width (wide)
722 'Na': 1, # Narrow
723 'H': 1, # Half-width (narrow)
724 'N': 1, # Neutral (not East Asian, treated as narrow)
725 'A': 1, # Ambiguous (s/b wide in East Asian context,
726 } # narrow otherwise, but that doesn't work)
727"""Mapping of result codes from `unicodedata.east_asian_widt()` to character
728column widths."""
729
730
731def column_width(text: str) -> int:
732 """Return the column width of text.
733
734 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
735 """
736 width = sum(east_asian_widths[unicodedata.east_asian_width(c)]
737 for c in text)
738 # correction for combining chars:
739 width -= len(find_combining_chars(text))
740 return width
741
742
743def uniq(L: list) -> list:
744 r = []
745 for item in L:
746 if item not in r:
747 r.append(item)
748 return r
749
750
751def normalize_language_tag(tag: str) -> list[str]:
752 """Return a list of normalized combinations for a `BCP 47` language tag.
753
754 Example:
755
756 >>> from docutils.utils import normalize_language_tag
757 >>> normalize_language_tag('de_AT-1901')
758 ['de-at-1901', 'de-at', 'de-1901', 'de']
759 >>> normalize_language_tag('de-CH-x_altquot')
760 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
761
762 """
763 # normalize:
764 tag = tag.lower().replace('-', '_')
765 # split (except singletons, which mark the following tag as non-standard):
766 tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag)
767 subtags = list(tag.split('_'))
768 base_tag = (subtags.pop(0),)
769 # find all combinations of subtags
770 taglist = ['-'.join(base_tag + tags)
771 for n in range(len(subtags), 0, -1)
772 for tags in itertools.combinations(subtags, n)
773 ]
774 taglist += base_tag
775 return taglist
776
777
778def xml_declaration(encoding: str|Literal['unicode']|None = None) -> str:
779 """Return an XML text declaration.
780
781 Include an encoding declaration, if `encoding`
782 is not 'unicode', '', or None.
783 """
784 if encoding and encoding.lower() != 'unicode':
785 encoding_declaration = f' encoding="{encoding}"'
786 else:
787 encoding_declaration = ''
788 return f'<?xml version="1.0"{encoding_declaration}?>\n'
789
790
791class DependencyList:
792
793 """
794 List of dependencies, with file recording support.
795
796 Note that the output file is not automatically closed. You have
797 to explicitly call the close() method.
798 """
799
800 def __init__(self,
801 output_file: Literal['-'] | StrPath | None = None,
802 dependencies: Iterable[StrPath] = ()
803 ) -> None:
804 """
805 Initialize the dependency list, automatically setting the
806 output file to `output_file` (see `set_output()`) and adding
807 all supplied dependencies.
808
809 If output_file is None, no file output is done when calling add().
810 """
811 self.set_output(output_file)
812 self.add(*dependencies)
813
814 def set_output(self, output_file: Literal['-']|StrPath|None) -> None:
815 """
816 Set the output file and clear the list of already added
817 dependencies.
818
819 The specified file is immediately overwritten.
820
821 If `output_file` is '-', the output will be written to stdout.
822 The empty string or None stop output.
823 """
824 if output_file == '-':
825 self.file = sys.stdout
826 elif output_file:
827 self.file = open(output_file, 'w', encoding='utf-8')
828 else:
829 self.file = None
830 self.list = []
831
832 def add(self, *paths: StrPath) -> None:
833 """
834 Append `path` to `self.list` unless it is already there.
835
836 Also append to `self.file` unless it is already there
837 or `self.file is `None`.
838 """
839 for path in paths:
840 if isinstance(path, PurePath):
841 path = path.as_posix() # use '/' as separator
842 if path not in self.list:
843 self.list.append(path)
844 if self.file is not None:
845 self.file.write(path+'\n')
846
847 def close(self) -> None:
848 """
849 Close the output file.
850 """
851 if self.file is not sys.stdout:
852 self.file.close()
853 self.file = None
854
855 def __repr__(self) -> str:
856 try:
857 output_file = self.file.name
858 except AttributeError:
859 output_file = None
860 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)