1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6Miscellaneous utilities for the documentation utilities.
7"""
8
9from __future__ import annotations
10
11__docformat__ = 'reStructuredText'
12
13import itertools
14import os
15import os.path
16import re
17import sys
18import unicodedata
19import warnings
20from pathlib import PurePath, Path
21
22from docutils import ApplicationError, DataError
23from docutils import io, nodes
24# for backwards compatibility
25from docutils.nodes import unescape # noqa: F401 (imported but unused)
26
27TYPE_CHECKING = False
28if TYPE_CHECKING:
29 from collections.abc import Callable, Sequence, Iterable
30 from typing import Any, Final, Literal, TextIO
31
32 from docutils.utils._typing import TypeAlias
33
34 from docutils.nodes import StrPath
35 from docutils.frontend import Values
36
37 _ObserverFunc: TypeAlias = Callable[[nodes.system_message], None]
38
39
40class SystemMessage(ApplicationError):
41
42 def __init__(self, system_message: nodes.system_message, level: int,
43 ) -> None:
44 Exception.__init__(self, system_message.astext())
45 self.level = level
46
47
48class SystemMessagePropagation(ApplicationError):
49 pass
50
51
52class Reporter:
53
54 """
55 Info/warning/error reporter and ``system_message`` element generator.
56
57 Five levels of system messages are defined, along with corresponding
58 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
59
60 There is typically one Reporter object per process. A Reporter object is
61 instantiated with thresholds for reporting (generating warnings) and
62 halting processing (raising exceptions), a switch to turn debug output on
63 or off, and an I/O stream for warnings. These are stored as instance
64 attributes.
65
66 When a system message is generated, its level is compared to the stored
67 thresholds, and a warning or error is generated as appropriate. Debug
68 messages are produced if the stored debug switch is on, independently of
69 other thresholds. Message output is sent to the stored warning stream if
70 not set to ''.
71
72 The Reporter class also employs a modified form of the "Observer" pattern
73 [GoF95]_ to track system messages generated. The `attach_observer` method
74 should be called before parsing, with a bound method or function which
75 accepts system messages. The observer can be removed with
76 `detach_observer`, and another added in its place.
77
78 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
79 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
80 1995.
81 """
82
83 # Reporter.get_source_and_line is patched in by ``RSTState.runtime_init``
84 get_source_and_line: Callable[[int|None], tuple[StrPath|None, int|None]]
85
86 levels: Final[Sequence[str]] = (
87 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SEVERE')
88 """List of names for system message levels, indexed by level."""
89
90 # system message level constants:
91 DEBUG_LEVEL: Final = 0
92 INFO_LEVEL: Final = 1
93 WARNING_LEVEL: Final = 2
94 ERROR_LEVEL: Final = 3
95 SEVERE_LEVEL: Final = 4
96
97 def __init__(
98 self,
99 source: StrPath,
100 report_level: int,
101 halt_level: int,
102 stream: io.ErrorOutput|TextIO|str|Literal[False]|None = None,
103 debug: bool = False,
104 encoding: str|None = None,
105 error_handler: str = 'backslashreplace',
106 ) -> None:
107 """Low level instantiating. See also `new_reporter().`.
108
109 :Parameters:
110 - `source`: The path to or description of the source data.
111 - `report_level`: The level at or above which warning output will
112 be sent to `stream`.
113 - `halt_level`: The level at or above which `SystemMessage`
114 exceptions will be raised, halting execution.
115 - `debug`: Show debug (level=0) system messages?
116 - `stream`: Where warning output is sent. Can be file-like (has a
117 ``.write`` method), a string (file name, opened for writing),
118 '' (empty string) or `False` (for discarding all stream messages)
119 or `None` (implies `sys.stderr`; default).
120 - `encoding`: The output encoding.
121 - `error_handler`: The error handler for stderr output encoding.
122 """
123
124 self.source = source
125 """The path to or description of the source data."""
126
127 self.error_handler = error_handler
128 """The character encoding error handler."""
129
130 self.debug_flag = debug
131 """Show debug (level=0) system messages?"""
132
133 self.report_level = report_level
134 """The level at or above which warning output will be sent
135 to `self.stream`."""
136
137 self.halt_level = halt_level
138 """The level at or above which `SystemMessage` exceptions
139 will be raised, halting execution."""
140
141 if not isinstance(stream, io.ErrorOutput):
142 stream = io.ErrorOutput(stream, encoding, error_handler)
143
144 self.stream: io.ErrorOutput = stream
145 """Where warning output is sent."""
146
147 self.encoding: str = encoding or getattr(stream, 'encoding', 'ascii')
148 """The output character encoding."""
149
150 self.observers: list[_ObserverFunc] = []
151 """List of bound methods or functions to call with each system_message
152 created."""
153
154 self.max_level: int = -1
155 """The highest level system message generated so far."""
156
157 def attach_observer(self, observer: _ObserverFunc) -> None:
158 """
159 The `observer` parameter is a function or bound method which takes one
160 argument, a `nodes.system_message` instance.
161 """
162 self.observers.append(observer)
163
164 def detach_observer(self, observer: _ObserverFunc) -> None:
165 self.observers.remove(observer)
166
167 def notify_observers(self, message: nodes.system_message) -> None:
168 for observer in self.observers:
169 observer(message)
170
171 def system_message(self,
172 level: int,
173 message: str,
174 *children,
175 **kwargs: Any
176 ) -> nodes.system_message:
177 """
178 Return a system_message object.
179
180 Raise an exception or generate a warning if appropriate.
181 """
182 # `message` can be a `str` or `Exception` instance.
183 if isinstance(message, Exception):
184 message = str(message)
185
186 attributes = kwargs.copy()
187 if 'base_node' in kwargs:
188 source, line = get_source_line(kwargs['base_node'])
189 del attributes['base_node']
190 if source is not None:
191 attributes.setdefault('source', source)
192 if line is not None:
193 attributes.setdefault('line', line)
194 # assert source is not None, "line- but no source-argument"
195 if 'source' not in attributes:
196 # 'line' is absolute line number
197 try:
198 source, line = self.get_source_and_line(attributes.get('line'))
199 except AttributeError:
200 source, line = None, None
201 if source is not None:
202 attributes['source'] = source
203 if line is not None:
204 attributes['line'] = line
205 # assert attributes['line'] is not None, (message, kwargs)
206 # assert attributes['source'] is not None, (message, kwargs)
207 attributes.setdefault('source', self.source)
208
209 msg = nodes.system_message(message, level=level,
210 type=self.levels[level],
211 *children, **attributes)
212 if self.stream and (level >= self.report_level
213 or self.debug_flag and level == self.DEBUG_LEVEL
214 or level >= self.halt_level):
215 self.stream.write(msg.astext() + '\n')
216 if level >= self.halt_level:
217 raise SystemMessage(msg, level)
218 if level > self.DEBUG_LEVEL or self.debug_flag:
219 self.notify_observers(msg)
220 self.max_level = max(level, self.max_level)
221 return msg
222
223 def debug(self, *args, **kwargs: Any) -> nodes.system_message:
224 """
225 Level-0, "DEBUG": an internal reporting issue.
226
227 Typically, there is no effect on the processing. Level-0 system
228 messages are handled separately from the others.
229 """
230 if self.debug_flag:
231 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
232
233 def info(self, *args, **kwargs: Any) -> nodes.system_message:
234 """
235 Level-1, "INFO": a minor issue that can be ignored.
236
237 Typically, there is no effect on processing and level-1 system
238 messages are not reported.
239 """
240 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
241
242 def warning(self, *args, **kwargs: Any) -> nodes.system_message:
243 """
244 Level-2, "WARNING": an issue that should be addressed.
245
246 If ignored, there may be unpredictable problems with the output.
247 """
248 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
249
250 def error(self, *args, **kwargs: Any) -> nodes.system_message:
251 """
252 Level-3, "ERROR": an error that should be addressed.
253
254 If ignored, the output will contain errors.
255 """
256 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
257
258 def severe(self, *args, **kwargs: Any) -> nodes.system_message:
259 """
260 Level-4, "SEVERE": a severe error that must be addressed.
261
262 If ignored, the output will contain severe errors. Typically level-4
263 system messages are turned into exceptions which halt processing.
264 """
265 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
266
267
268class ExtensionOptionError(DataError): pass # NoQA: E701
269class BadOptionError(ExtensionOptionError): pass # NoQA: E701
270class BadOptionDataError(ExtensionOptionError): pass # NoQA: E701
271class DuplicateOptionError(ExtensionOptionError): pass # NoQA: E701
272
273
274def extract_extension_options(field_list: nodes.field_list,
275 options_spec: dict[str, Callable[object], Any],
276 ) -> dict[str, Any]:
277 """
278 Return a dictionary mapping extension option names to converted values.
279
280 :Parameters:
281 - `field_list`: A flat field list without field arguments, where each
282 field body consists of a single paragraph only.
283 - `options_spec`: Dictionary mapping known option names to a
284 conversion function such as `int` or `float`.
285
286 :Exceptions:
287 - `KeyError` for unknown option names.
288 - `ValueError` for invalid option values (raised by the conversion
289 function).
290 - `TypeError` for invalid option value types (raised by conversion
291 function).
292 - `DuplicateOptionError` for duplicate options.
293 - `BadOptionError` for invalid fields.
294 - `BadOptionDataError` for invalid option data (missing name,
295 missing data, bad quotes, etc.).
296 """
297 option_list = extract_options(field_list)
298 return assemble_option_dict(option_list, options_spec)
299
300
301def extract_options(field_list: nodes.field_list
302 ) -> list[tuple[str, str|None]]:
303 """
304 Return a list of option (name, value) pairs from field names & bodies.
305
306 :Parameter:
307 `field_list`: A flat field list, where each field name is a single
308 word and each field body consists of a single paragraph only.
309
310 :Exceptions:
311 - `BadOptionError` for invalid fields.
312 - `BadOptionDataError` for invalid option data (missing name,
313 missing data, bad quotes, etc.).
314 """
315 option_list = []
316 for field in field_list:
317 if len(field[0].astext().split()) != 1:
318 raise BadOptionError(
319 'extension option field name may not contain multiple words')
320 name = str(field[0].astext().lower())
321 body = field[1]
322 if len(body) == 0:
323 data = None
324 elif (len(body) > 1
325 or not isinstance(body[0], nodes.paragraph)
326 or len(body[0]) != 1
327 or not isinstance(body[0][0], nodes.Text)):
328 raise BadOptionDataError(
329 'extension option field body may contain\n'
330 'a single paragraph only (option "%s")' % name)
331 else:
332 data = body[0][0].astext()
333 option_list.append((name, data))
334 return option_list
335
336
337def assemble_option_dict(option_list: list[tuple[str, str|None]],
338 options_spec: dict[str, Callable[object], Any],
339 ) -> dict[str, Any]:
340 """
341 Return a mapping of option names to values.
342
343 :Parameters:
344 - `option_list`: A list of (name, value) pairs (the output of
345 `extract_options()`).
346 - `options_spec`: Dictionary mapping known option names to a
347 conversion function such as `int` or `float`.
348
349 :Exceptions:
350 - `KeyError` for unknown option names.
351 - `DuplicateOptionError` for duplicate options.
352 - `ValueError` for invalid option values (raised by conversion
353 function).
354 - `TypeError` for invalid option value types (raised by conversion
355 function).
356 """
357 options = {}
358 for name, value in option_list:
359 convertor = options_spec[name] # raises KeyError if unknown
360 if convertor is None:
361 raise KeyError(name) # or if explicitly disabled
362 if name in options:
363 raise DuplicateOptionError('duplicate option "%s"' % name)
364 try:
365 options[name] = convertor(value)
366 except (ValueError, TypeError) as detail:
367 raise detail.__class__('(option: "%s"; value: %r)\n%s'
368 % (name, value, ' '.join(detail.args)))
369 return options
370
371
372class NameValueError(DataError): pass
373
374
375def decode_path(path: str|bytes|None) -> str:
376 """
377 Ensure `path` is Unicode. Return `str` instance.
378
379 Decode file/path string in a failsafe manner if not already done.
380
381 Deprecated. Will be removed in Docutils 1.0.
382 """
383 if isinstance(path, str):
384 return path
385 if path is None:
386 return ''
387 try:
388 path = path.decode(sys.getfilesystemencoding(), 'strict')
389 except AttributeError:
390 raise ValueError('`path` value must be a String or ``None``, '
391 f'not {path!r}')
392 except UnicodeDecodeError:
393 try:
394 path = path.decode('utf-8', 'strict')
395 except UnicodeDecodeError:
396 path = path.decode('ascii', 'replace')
397 return path
398
399
400def extract_name_value(line):
401 """
402 Return a list of (name, value) from a line of the form "name=value ...".
403
404 :Exception:
405 `NameValueError` for invalid input (missing name, missing data, bad
406 quotes, etc.).
407 """
408 attlist = []
409 while line:
410 equals_index = line.find('=')
411 if equals_index == -1:
412 raise NameValueError('missing "="')
413 attname = line[:equals_index].strip()
414 if equals_index == 0 or not attname:
415 raise NameValueError('missing attribute name before "="')
416 line = line[equals_index+1:].lstrip()
417 if not line:
418 raise NameValueError(f'missing value after "{attname}="')
419 if line[0] in '\'"':
420 endquote_index = line.find(line[0], 1)
421 if endquote_index == -1:
422 raise NameValueError(
423 f'attribute "{attname}" missing end quote ({line[0]})')
424 if (len(line) > endquote_index + 1
425 and line[endquote_index + 1].strip()):
426 raise NameValueError(f'attribute "{attname}" end quote '
427 f'({line[0]}) not followed by whitespace')
428 data = line[1:endquote_index]
429 line = line[endquote_index+1:].lstrip()
430 else:
431 space_index = line.find(' ')
432 if space_index == -1:
433 data = line
434 line = ''
435 else:
436 data = line[:space_index]
437 line = line[space_index+1:].lstrip()
438 attlist.append((attname.lower(), data))
439 return attlist
440
441
442def new_reporter(source_path: StrPath, settings: Values) -> Reporter:
443 """
444 Return a new Reporter object.
445
446 :Parameters:
447 `source` : string
448 The path to or description of the source text of the document.
449 `settings` : optparse.Values object
450 Runtime settings.
451 """
452 reporter = Reporter(
453 source_path, settings.report_level, settings.halt_level,
454 stream=settings.warning_stream, debug=settings.debug,
455 encoding=settings.error_encoding,
456 error_handler=settings.error_encoding_error_handler)
457 return reporter
458
459
460def new_document(source_path: StrPath, settings: Values|None = None
461 ) -> nodes.document:
462 """
463 Return a new empty document object.
464
465 :Parameters:
466 `source_path` : str or pathlib.Path
467 The path to or description of the source text of the document.
468 `settings` : optparse.Values object
469 Runtime settings. If none are provided, a default core set will
470 be used. If you will use the document object with any Docutils
471 components, you must provide their default settings as well.
472
473 For example, if parsing rST, at least provide the rst-parser
474 settings, obtainable as follows:
475
476 Defaults for parser component::
477
478 settings = docutils.frontend.get_default_settings(
479 docutils.parsers.rst.Parser)
480
481 Defaults and configuration file customizations::
482
483 settings = docutils.core.Publisher(
484 parser=docutils.parsers.rst.Parser).get_settings()
485
486 """
487 # Import at top of module would lead to circular dependency!
488 from docutils import frontend
489 if settings is None:
490 settings = frontend.get_default_settings()
491 reporter = new_reporter(source_path, settings)
492 document = nodes.document(settings, reporter, source=source_path)
493 document.note_source(source_path, -1)
494 return document
495
496
497def clean_rcs_keywords(
498 paragraph: nodes.paragraph,
499 keyword_substitutions: Sequence[tuple[re.Pattern[[str], str]]],
500) -> None:
501 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
502 textnode = paragraph[0]
503 for pattern, substitution in keyword_substitutions:
504 match = pattern.search(textnode)
505 if match:
506 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
507 return
508
509
510def relative_path(source: StrPath|None, target: StrPath) -> str:
511 """
512 Build and return a path to `target`, relative to `source` (both files).
513
514 The return value is a `str` suitable to be included in `source`
515 as a reference to `target`.
516
517 :Parameters:
518 `source` : path-like object or None
519 Path of a file in the start directory for the relative path
520 (the file does not need to exist).
521 The value ``None`` is replaced with "<cwd>/dummy_file".
522 `target` : path-like object
523 End point of the returned relative path.
524
525 Differences to `os.path.relpath()`:
526
527 * Inverse argument order.
528 * `source` is assumed to be a FILE in the start directory (add a "dummy"
529 file name to obtain the path relative from a directory)
530 while `os.path.relpath()` expects a DIRECTORY as `start` argument.
531 * Always use Posix path separator ("/") for the output.
532 * Use `os.sep` for parsing the input
533 (changing the value of `os.sep` is ignored by `os.relpath()`).
534 * If there is no common prefix, return the absolute path to `target`.
535
536 Differences to `pathlib.PurePath.relative_to(other)`:
537
538 * pathlib offers an object oriented interface.
539 * `source` expects path to a FILE while `other` expects a DIRECTORY.
540 * `target` defaults to the cwd, no default value for `other`.
541 * `relative_path()` always returns a path (relative or absolute),
542 while `PurePath.relative_to()` raises a ValueError
543 if `target` is not a subpath of `other` (no ".." inserted).
544 """
545 source_parts = os.path.abspath(source or type(target)('dummy_file')
546 ).split(os.sep)
547 target_parts = os.path.abspath(target).split(os.sep)
548 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
549 if source_parts[:2] != target_parts[:2]:
550 # Nothing in common between paths.
551 # Return absolute path, using '/' for URLs:
552 return '/'.join(target_parts)
553 source_parts.reverse()
554 target_parts.reverse()
555 while (source_parts and target_parts
556 and source_parts[-1] == target_parts[-1]):
557 # Remove path components in common:
558 source_parts.pop()
559 target_parts.pop()
560 target_parts.reverse()
561 parts = ['..'] * (len(source_parts) - 1) + target_parts
562 return '/'.join(parts)
563
564
565def get_stylesheet_reference(settings: Values,
566 relative_to: StrPath|None = None
567 ) -> str:
568 """
569 Retrieve a stylesheet reference from the settings object.
570
571 Deprecated. Will be removed in Docutils 1.0.
572 Use get_stylesheet_list() instead to enable specification of multiple
573 stylesheets as a comma-separated list.
574 """
575 warnings.warn('utils.get_stylesheet_reference()'
576 ' is obsoleted by utils.get_stylesheet_list()'
577 ' and will be removed in Docutils 2.0.',
578 DeprecationWarning, stacklevel=2)
579 if settings.stylesheet_path:
580 assert not settings.stylesheet, (
581 'stylesheet and stylesheet_path are mutually exclusive.')
582 if relative_to is None:
583 relative_to = settings.output_path
584 return relative_path(relative_to, settings.stylesheet_path)
585 else:
586 return settings.stylesheet
587
588
589# Return 'stylesheet' or 'stylesheet_path' arguments as list.
590#
591# The original settings arguments are kept unchanged: you can test
592# with e.g. ``if settings.stylesheet_path: ...``.
593#
594# Differences to the depracated `get_stylesheet_reference()`:
595# * return value is a list
596# * no re-writing of the path (and therefore no optional argument)
597# (if required, use ``utils.relative_path(source, target)``
598# in the calling script)
599def get_stylesheet_list(settings: Values) -> list[str]:
600 """Retrieve list of stylesheet references from the settings object."""
601 assert not (settings.stylesheet and settings.stylesheet_path), (
602 'stylesheet and stylesheet_path are mutually exclusive.')
603 stylesheets = settings.stylesheet_path or settings.stylesheet or []
604 # programmatically set default may be string with comma separated list:
605 if not isinstance(stylesheets, list):
606 stylesheets = [path.strip() for path in stylesheets.split(',')]
607 if settings.stylesheet_path:
608 # expand relative paths if found in stylesheet-dirs:
609 stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs)
610 for path in stylesheets]
611 return stylesheets
612
613
614def find_file_in_dirs(path: StrPath, dirs: Iterable[StrPath]) -> str:
615 """
616 Search for `path` in the list of directories `dirs`.
617
618 Return the first expansion that matches an existing file.
619 """
620 path = Path(path)
621 if path.is_absolute():
622 return path.as_posix()
623 for d in dirs:
624 f = Path(d).expanduser() / path
625 if f.exists():
626 return f.as_posix()
627 return path.as_posix()
628
629
630def get_trim_footnote_ref_space(settings: Values) -> bool:
631 """
632 Return whether or not to trim footnote space.
633
634 If trim_footnote_reference_space is not None, return it.
635
636 If trim_footnote_reference_space is None, return False unless the
637 footnote reference style is 'superscript'.
638 """
639 if settings.setdefault('trim_footnote_reference_space', None) is None:
640 return getattr(settings, 'footnote_references', None) == 'superscript'
641 else:
642 return settings.trim_footnote_reference_space
643
644
645def get_source_line(node) -> tuple[StrPath|None, int|None]:
646 """
647 Return the "source" and "line" attributes from the `node` given or from
648 its closest ancestor.
649 """
650 while node:
651 if node.source or node.line:
652 return node.source, node.line
653 node = node.parent
654 return None, None
655
656
657def escape2null(text: str) -> str:
658 """Return a string with escape-backslashes converted to nulls."""
659 parts = []
660 start = 0
661 while True:
662 bs_index = text.find('\\', start)
663 if bs_index == -1:
664 parts.append(text[start:])
665 return ''.join(parts)
666 parts.extend((text[start:bs_index],
667 '\x00' + text[bs_index + 1:bs_index + 2]))
668 start = bs_index + 2 # skip character after escape
669
670
671def split_escaped_whitespace(text: str) -> list[str]:
672 """
673 Split `text` on escaped whitespace (null+space or null+newline).
674 Return a list of strings.
675 """
676 strings = text.split('\x00 ')
677 strings = [string.split('\x00\n') for string in strings]
678 # flatten list of lists of strings to list of strings:
679 return list(itertools.chain(*strings))
680
681
682def strip_combining_chars(text: str) -> str:
683 return ''.join(c for c in text if not unicodedata.combining(c))
684
685
686def find_combining_chars(text: str) -> list[int]:
687 """Return indices of all combining chars in Unicode string `text`.
688
689 >>> from docutils.utils import find_combining_chars
690 >>> find_combining_chars('A t̆ab̆lĕ')
691 [3, 6, 9]
692
693 """
694 return [i for i, c in enumerate(text) if unicodedata.combining(c)]
695
696
697def column_indices(text: str) -> list[int]:
698 """Indices of Unicode string `text` when skipping combining characters.
699
700 >>> from docutils.utils import column_indices
701 >>> column_indices('A t̆ab̆lĕ')
702 [0, 1, 2, 4, 5, 7, 8]
703
704 """
705 # TODO: account for asian wide chars here instead of using dummy
706 # replacements in the tableparser?
707 string_indices = list(range(len(text)))
708 for index in find_combining_chars(text):
709 string_indices[index] = None
710 return [i for i in string_indices if i is not None]
711
712
713east_asian_widths = {'W': 2, # Wide
714 'F': 2, # Full-width (wide)
715 'Na': 1, # Narrow
716 'H': 1, # Half-width (narrow)
717 'N': 1, # Neutral (not East Asian, treated as narrow)
718 'A': 1, # Ambiguous (s/b wide in East Asian context,
719 } # narrow otherwise, but that doesn't work)
720"""Mapping of result codes from `unicodedata.east_asian_widt()` to character
721column widths."""
722
723
724def column_width(text: str) -> int:
725 """Return the column width of text.
726
727 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
728 """
729 width = sum(east_asian_widths[unicodedata.east_asian_width(c)]
730 for c in text)
731 # correction for combining chars:
732 width -= len(find_combining_chars(text))
733 return width
734
735
736def uniq(L: list) -> list:
737 r = []
738 for item in L:
739 if item not in r:
740 r.append(item)
741 return r
742
743
744def normalize_language_tag(tag: str) -> list[str]:
745 """Return a list of normalized combinations for a `BCP 47` language tag.
746
747 Example:
748
749 >>> from docutils.utils import normalize_language_tag
750 >>> normalize_language_tag('de_AT-1901')
751 ['de-at-1901', 'de-at', 'de-1901', 'de']
752 >>> normalize_language_tag('de-CH-x_altquot')
753 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
754
755 """
756 # normalize:
757 tag = tag.lower().replace('-', '_')
758 # split (except singletons, which mark the following tag as non-standard):
759 tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag)
760 subtags = list(tag.split('_'))
761 base_tag = (subtags.pop(0),)
762 # find all combinations of subtags
763 taglist = ['-'.join(base_tag + tags)
764 for n in range(len(subtags), 0, -1)
765 for tags in itertools.combinations(subtags, n)
766 ]
767 taglist += base_tag
768 return taglist
769
770
771def xml_declaration(encoding: str|Literal['unicode']|None = None) -> str:
772 """Return an XML text declaration.
773
774 Include an encoding declaration, if `encoding`
775 is not 'unicode', '', or None.
776 """
777 if encoding and encoding.lower() != 'unicode':
778 encoding_declaration = f' encoding="{encoding}"'
779 else:
780 encoding_declaration = ''
781 return f'<?xml version="1.0"{encoding_declaration}?>\n'
782
783
784class DependencyList:
785
786 """
787 List of dependencies, with file recording support.
788
789 Note that the output file is not automatically closed. You have
790 to explicitly call the close() method.
791 """
792
793 def __init__(self,
794 output_file: Literal['-'] | StrPath | None = None,
795 dependencies: Iterable[StrPath] = ()
796 ) -> None:
797 """
798 Initialize the dependency list, automatically setting the
799 output file to `output_file` (see `set_output()`) and adding
800 all supplied dependencies.
801
802 If output_file is None, no file output is done when calling add().
803 """
804 self.set_output(output_file)
805 self.add(*dependencies)
806
807 def set_output(self, output_file: Literal['-']|StrPath|None) -> None:
808 """
809 Set the output file and clear the list of already added
810 dependencies.
811
812 The specified file is immediately overwritten.
813
814 If `output_file` is '-', the output will be written to stdout.
815 The empty string or None stop output.
816 """
817 if output_file == '-':
818 self.file = sys.stdout
819 elif output_file:
820 self.file = open(output_file, 'w', encoding='utf-8')
821 else:
822 self.file = None
823 self.list = []
824
825 def add(self, *paths: StrPath) -> None:
826 """
827 Append `path` to `self.list` unless it is already there.
828
829 Also append to `self.file` unless it is already there
830 or `self.file is `None`.
831 """
832 for path in paths:
833 if isinstance(path, PurePath):
834 path = path.as_posix() # use '/' as separator
835 if path not in self.list:
836 self.list.append(path)
837 if self.file is not None:
838 self.file.write(path+'\n')
839
840 def close(self) -> None:
841 """
842 Close the output file.
843 """
844 if self.file is not sys.stdout:
845 self.file.close()
846 self.file = None
847
848 def __repr__(self) -> str:
849 try:
850 output_file = self.file.name
851 except AttributeError:
852 output_file = None
853 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)