1# $Id$ 
    2# Author: David Goodger <goodger@python.org> 
    3# Copyright: This module has been placed in the public domain. 
    4 
    5""" 
    6Miscellaneous utilities for the documentation utilities. 
    7""" 
    8 
    9__docformat__ = 'reStructuredText' 
    10 
    11import sys 
    12import os 
    13import os.path 
    14from pathlib import PurePath, Path 
    15import re 
    16import itertools 
    17import warnings 
    18import unicodedata 
    19 
    20from docutils import ApplicationError, DataError, __version_info__ 
    21from docutils import io, nodes 
    22# for backwards compatibility 
    23from docutils.nodes import unescape  # noqa: F401 
    24 
    25 
    26class SystemMessage(ApplicationError): 
    27 
    28    def __init__(self, system_message, level): 
    29        Exception.__init__(self, system_message.astext()) 
    30        self.level = level 
    31 
    32 
    33class SystemMessagePropagation(ApplicationError): 
    34    pass 
    35 
    36 
    37class Reporter: 
    38 
    39    """ 
    40    Info/warning/error reporter and ``system_message`` element generator. 
    41 
    42    Five levels of system messages are defined, along with corresponding 
    43    methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. 
    44 
    45    There is typically one Reporter object per process.  A Reporter object is 
    46    instantiated with thresholds for reporting (generating warnings) and 
    47    halting processing (raising exceptions), a switch to turn debug output on 
    48    or off, and an I/O stream for warnings.  These are stored as instance 
    49    attributes. 
    50 
    51    When a system message is generated, its level is compared to the stored 
    52    thresholds, and a warning or error is generated as appropriate.  Debug 
    53    messages are produced if the stored debug switch is on, independently of 
    54    other thresholds.  Message output is sent to the stored warning stream if 
    55    not set to ''. 
    56 
    57    The Reporter class also employs a modified form of the "Observer" pattern 
    58    [GoF95]_ to track system messages generated.  The `attach_observer` method 
    59    should be called before parsing, with a bound method or function which 
    60    accepts system messages.  The observer can be removed with 
    61    `detach_observer`, and another added in its place. 
    62 
    63    .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of 
    64       Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA, 
    65       1995. 
    66    """ 
    67 
    68    levels = 'DEBUG INFO WARNING ERROR SEVERE'.split() 
    69    """List of names for system message levels, indexed by level.""" 
    70 
    71    # system message level constants: 
    72    (DEBUG_LEVEL, 
    73     INFO_LEVEL, 
    74     WARNING_LEVEL, 
    75     ERROR_LEVEL, 
    76     SEVERE_LEVEL) = range(5) 
    77 
    78    def __init__(self, source, report_level, halt_level, stream=None, 
    79                 debug=False, encoding=None, error_handler='backslashreplace'): 
    80        """ 
    81        :Parameters: 
    82            - `source`: The path to or description of the source data. 
    83            - `report_level`: The level at or above which warning output will 
    84              be sent to `stream`. 
    85            - `halt_level`: The level at or above which `SystemMessage` 
    86              exceptions will be raised, halting execution. 
    87            - `debug`: Show debug (level=0) system messages? 
    88            - `stream`: Where warning output is sent.  Can be file-like (has a 
    89              ``.write`` method), a string (file name, opened for writing), 
    90              '' (empty string) or `False` (for discarding all stream messages) 
    91              or `None` (implies `sys.stderr`; default). 
    92            - `encoding`: The output encoding. 
    93            - `error_handler`: The error handler for stderr output encoding. 
    94        """ 
    95 
    96        self.source = source 
    97        """The path to or description of the source data.""" 
    98 
    99        self.error_handler = error_handler 
    100        """The character encoding error handler.""" 
    101 
    102        self.debug_flag = debug 
    103        """Show debug (level=0) system messages?""" 
    104 
    105        self.report_level = report_level 
    106        """The level at or above which warning output will be sent 
    107        to `self.stream`.""" 
    108 
    109        self.halt_level = halt_level 
    110        """The level at or above which `SystemMessage` exceptions 
    111        will be raised, halting execution.""" 
    112 
    113        if not isinstance(stream, io.ErrorOutput): 
    114            stream = io.ErrorOutput(stream, encoding, error_handler) 
    115 
    116        self.stream = stream 
    117        """Where warning output is sent.""" 
    118 
    119        self.encoding = encoding or getattr(stream, 'encoding', 'ascii') 
    120        """The output character encoding.""" 
    121 
    122        self.observers = [] 
    123        """List of bound methods or functions to call with each system_message 
    124        created.""" 
    125 
    126        self.max_level = -1 
    127        """The highest level system message generated so far.""" 
    128 
    129    def set_conditions(self, category, report_level, halt_level, 
    130                       stream=None, debug=False): 
    131        warnings.warn('docutils.utils.Reporter.set_conditions() deprecated; ' 
    132                      'Will be removed in Docutils 0.21 or later. ' 
    133                      'Set attributes via configuration settings or directly.', 
    134                      DeprecationWarning, stacklevel=2) 
    135        self.report_level = report_level 
    136        self.halt_level = halt_level 
    137        if not isinstance(stream, io.ErrorOutput): 
    138            stream = io.ErrorOutput(stream, self.encoding, self.error_handler) 
    139        self.stream = stream 
    140        self.debug_flag = debug 
    141 
    142    def attach_observer(self, observer): 
    143        """ 
    144        The `observer` parameter is a function or bound method which takes one 
    145        argument, a `nodes.system_message` instance. 
    146        """ 
    147        self.observers.append(observer) 
    148 
    149    def detach_observer(self, observer): 
    150        self.observers.remove(observer) 
    151 
    152    def notify_observers(self, message): 
    153        for observer in self.observers: 
    154            observer(message) 
    155 
    156    def system_message(self, level, message, *children, **kwargs): 
    157        """ 
    158        Return a system_message object. 
    159 
    160        Raise an exception or generate a warning if appropriate. 
    161        """ 
    162        # `message` can be a `str` or `Exception` instance. 
    163        if isinstance(message, Exception): 
    164            message = str(message) 
    165 
    166        attributes = kwargs.copy() 
    167        if 'base_node' in kwargs: 
    168            source, line = get_source_line(kwargs['base_node']) 
    169            del attributes['base_node'] 
    170            if source is not None: 
    171                attributes.setdefault('source', source) 
    172            if line is not None: 
    173                attributes.setdefault('line', line) 
    174                # assert source is not None, "line- but no source-argument" 
    175        if 'source' not in attributes: 
    176            # 'line' is absolute line number 
    177            try: 
    178                source, line = self.get_source_and_line(attributes.get('line')) 
    179            except AttributeError: 
    180                source, line = None, None 
    181            if source is not None: 
    182                attributes['source'] = source 
    183            if line is not None: 
    184                attributes['line'] = line 
    185        # assert attributes['line'] is not None, (message, kwargs) 
    186        # assert attributes['source'] is not None, (message, kwargs) 
    187        attributes.setdefault('source', self.source) 
    188 
    189        msg = nodes.system_message(message, level=level, 
    190                                   type=self.levels[level], 
    191                                   *children, **attributes) 
    192        if self.stream and (level >= self.report_level 
    193                            or self.debug_flag and level == self.DEBUG_LEVEL 
    194                            or level >= self.halt_level): 
    195            self.stream.write(msg.astext() + '\n') 
    196        if level >= self.halt_level: 
    197            raise SystemMessage(msg, level) 
    198        if level > self.DEBUG_LEVEL or self.debug_flag: 
    199            self.notify_observers(msg) 
    200        self.max_level = max(level, self.max_level) 
    201        return msg 
    202 
    203    def debug(self, *args, **kwargs): 
    204        """ 
    205        Level-0, "DEBUG": an internal reporting issue. Typically, there is no 
    206        effect on the processing. Level-0 system messages are handled 
    207        separately from the others. 
    208        """ 
    209        if self.debug_flag: 
    210            return self.system_message(self.DEBUG_LEVEL, *args, **kwargs) 
    211 
    212    def info(self, *args, **kwargs): 
    213        """ 
    214        Level-1, "INFO": a minor issue that can be ignored. Typically there is 
    215        no effect on processing, and level-1 system messages are not reported. 
    216        """ 
    217        return self.system_message(self.INFO_LEVEL, *args, **kwargs) 
    218 
    219    def warning(self, *args, **kwargs): 
    220        """ 
    221        Level-2, "WARNING": an issue that should be addressed. If ignored, 
    222        there may be unpredictable problems with the output. 
    223        """ 
    224        return self.system_message(self.WARNING_LEVEL, *args, **kwargs) 
    225 
    226    def error(self, *args, **kwargs): 
    227        """ 
    228        Level-3, "ERROR": an error that should be addressed. If ignored, the 
    229        output will contain errors. 
    230        """ 
    231        return self.system_message(self.ERROR_LEVEL, *args, **kwargs) 
    232 
    233    def severe(self, *args, **kwargs): 
    234        """ 
    235        Level-4, "SEVERE": a severe error that must be addressed. If ignored, 
    236        the output will contain severe errors. Typically level-4 system 
    237        messages are turned into exceptions which halt processing. 
    238        """ 
    239        return self.system_message(self.SEVERE_LEVEL, *args, **kwargs) 
    240 
    241 
    242class ExtensionOptionError(DataError): pass 
    243class BadOptionError(ExtensionOptionError): pass 
    244class BadOptionDataError(ExtensionOptionError): pass 
    245class DuplicateOptionError(ExtensionOptionError): pass 
    246 
    247 
    248def extract_extension_options(field_list, options_spec): 
    249    """ 
    250    Return a dictionary mapping extension option names to converted values. 
    251 
    252    :Parameters: 
    253        - `field_list`: A flat field list without field arguments, where each 
    254          field body consists of a single paragraph only. 
    255        - `options_spec`: Dictionary mapping known option names to a 
    256          conversion function such as `int` or `float`. 
    257 
    258    :Exceptions: 
    259        - `KeyError` for unknown option names. 
    260        - `ValueError` for invalid option values (raised by the conversion 
    261           function). 
    262        - `TypeError` for invalid option value types (raised by conversion 
    263           function). 
    264        - `DuplicateOptionError` for duplicate options. 
    265        - `BadOptionError` for invalid fields. 
    266        - `BadOptionDataError` for invalid option data (missing name, 
    267          missing data, bad quotes, etc.). 
    268    """ 
    269    option_list = extract_options(field_list) 
    270    return assemble_option_dict(option_list, options_spec) 
    271 
    272 
    273def extract_options(field_list): 
    274    """ 
    275    Return a list of option (name, value) pairs from field names & bodies. 
    276 
    277    :Parameter: 
    278        `field_list`: A flat field list, where each field name is a single 
    279        word and each field body consists of a single paragraph only. 
    280 
    281    :Exceptions: 
    282        - `BadOptionError` for invalid fields. 
    283        - `BadOptionDataError` for invalid option data (missing name, 
    284          missing data, bad quotes, etc.). 
    285    """ 
    286    option_list = [] 
    287    for field in field_list: 
    288        if len(field[0].astext().split()) != 1: 
    289            raise BadOptionError( 
    290                'extension option field name may not contain multiple words') 
    291        name = str(field[0].astext().lower()) 
    292        body = field[1] 
    293        if len(body) == 0: 
    294            data = None 
    295        elif (len(body) > 1 
    296              or not isinstance(body[0], nodes.paragraph) 
    297              or len(body[0]) != 1 
    298              or not isinstance(body[0][0], nodes.Text)): 
    299            raise BadOptionDataError( 
    300                  'extension option field body may contain\n' 
    301                  'a single paragraph only (option "%s")' % name) 
    302        else: 
    303            data = body[0][0].astext() 
    304        option_list.append((name, data)) 
    305    return option_list 
    306 
    307 
    308def assemble_option_dict(option_list, options_spec): 
    309    """ 
    310    Return a mapping of option names to values. 
    311 
    312    :Parameters: 
    313        - `option_list`: A list of (name, value) pairs (the output of 
    314          `extract_options()`). 
    315        - `options_spec`: Dictionary mapping known option names to a 
    316          conversion function such as `int` or `float`. 
    317 
    318    :Exceptions: 
    319        - `KeyError` for unknown option names. 
    320        - `DuplicateOptionError` for duplicate options. 
    321        - `ValueError` for invalid option values (raised by conversion 
    322           function). 
    323        - `TypeError` for invalid option value types (raised by conversion 
    324           function). 
    325    """ 
    326    options = {} 
    327    for name, value in option_list: 
    328        convertor = options_spec[name]  # raises KeyError if unknown 
    329        if convertor is None: 
    330            raise KeyError(name)        # or if explicitly disabled 
    331        if name in options: 
    332            raise DuplicateOptionError('duplicate option "%s"' % name) 
    333        try: 
    334            options[name] = convertor(value) 
    335        except (ValueError, TypeError) as detail: 
    336            raise detail.__class__('(option: "%s"; value: %r)\n%s' 
    337                                   % (name, value, ' '.join(detail.args))) 
    338    return options 
    339 
    340 
    341class NameValueError(DataError): pass 
    342 
    343 
    344def decode_path(path): 
    345    """ 
    346    Ensure `path` is Unicode. Return `str` instance. 
    347 
    348    Decode file/path string in a failsafe manner if not already done. 
    349    """ 
    350    # TODO: is this still required with Python 3? 
    351    if isinstance(path, str): 
    352        return path 
    353    try: 
    354        path = path.decode(sys.getfilesystemencoding(), 'strict') 
    355    except AttributeError:  # default value None has no decode method 
    356        if not path: 
    357            return '' 
    358        raise ValueError('`path` value must be a String or ``None``, ' 
    359                         f'not {path!r}') 
    360    except UnicodeDecodeError: 
    361        try: 
    362            path = path.decode('utf-8', 'strict') 
    363        except UnicodeDecodeError: 
    364            path = path.decode('ascii', 'replace') 
    365    return path 
    366 
    367 
    368def extract_name_value(line): 
    369    """ 
    370    Return a list of (name, value) from a line of the form "name=value ...". 
    371 
    372    :Exception: 
    373        `NameValueError` for invalid input (missing name, missing data, bad 
    374        quotes, etc.). 
    375    """ 
    376    attlist = [] 
    377    while line: 
    378        equals = line.find('=') 
    379        if equals == -1: 
    380            raise NameValueError('missing "="') 
    381        attname = line[:equals].strip() 
    382        if equals == 0 or not attname: 
    383            raise NameValueError( 
    384                  'missing attribute name before "="') 
    385        line = line[equals+1:].lstrip() 
    386        if not line: 
    387            raise NameValueError( 
    388                  'missing value after "%s="' % attname) 
    389        if line[0] in '\'"': 
    390            endquote = line.find(line[0], 1) 
    391            if endquote == -1: 
    392                raise NameValueError( 
    393                      'attribute "%s" missing end quote (%s)' 
    394                      % (attname, line[0])) 
    395            if len(line) > endquote + 1 and line[endquote + 1].strip(): 
    396                raise NameValueError( 
    397                      'attribute "%s" end quote (%s) not followed by ' 
    398                      'whitespace' % (attname, line[0])) 
    399            data = line[1:endquote] 
    400            line = line[endquote+1:].lstrip() 
    401        else: 
    402            space = line.find(' ') 
    403            if space == -1: 
    404                data = line 
    405                line = '' 
    406            else: 
    407                data = line[:space] 
    408                line = line[space+1:].lstrip() 
    409        attlist.append((attname.lower(), data)) 
    410    return attlist 
    411 
    412 
    413def new_reporter(source_path, settings): 
    414    """ 
    415    Return a new Reporter object. 
    416 
    417    :Parameters: 
    418        `source` : string 
    419            The path to or description of the source text of the document. 
    420        `settings` : optparse.Values object 
    421            Runtime settings. 
    422    """ 
    423    reporter = Reporter( 
    424        source_path, settings.report_level, settings.halt_level, 
    425        stream=settings.warning_stream, debug=settings.debug, 
    426        encoding=settings.error_encoding, 
    427        error_handler=settings.error_encoding_error_handler) 
    428    return reporter 
    429 
    430 
    431def new_document(source_path, settings=None): 
    432    """ 
    433    Return a new empty document object. 
    434 
    435    :Parameters: 
    436        `source_path` : string 
    437            The path to or description of the source text of the document. 
    438        `settings` : optparse.Values object 
    439            Runtime settings.  If none are provided, a default core set will 
    440            be used.  If you will use the document object with any Docutils 
    441            components, you must provide their default settings as well. 
    442 
    443            For example, if parsing rST, at least provide the rst-parser 
    444            settings, obtainable as follows: 
    445 
    446            Defaults for parser component:: 
    447 
    448                settings = docutils.frontend.get_default_settings( 
    449                               docutils.parsers.rst.Parser) 
    450 
    451            Defaults and configuration file customizations:: 
    452 
    453                settings = docutils.core.Publisher( 
    454                    parser=docutils.parsers.rst.Parser).get_settings() 
    455 
    456    """ 
    457    # Import at top of module would lead to circular dependency! 
    458    from docutils import frontend 
    459    if settings is None: 
    460        settings = frontend.get_default_settings() 
    461    source_path = decode_path(source_path) 
    462    reporter = new_reporter(source_path, settings) 
    463    document = nodes.document(settings, reporter, source=source_path) 
    464    document.note_source(source_path, -1) 
    465    return document 
    466 
    467 
    468def clean_rcs_keywords(paragraph, keyword_substitutions): 
    469    if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text): 
    470        textnode = paragraph[0] 
    471        for pattern, substitution in keyword_substitutions: 
    472            match = pattern.search(textnode) 
    473            if match: 
    474                paragraph[0] = nodes.Text(pattern.sub(substitution, textnode)) 
    475                return 
    476 
    477 
    478def relative_path(source, target): 
    479    """ 
    480    Build and return a path to `target`, relative to `source` (both files). 
    481 
    482    The return value is a `str` suitable to be included in `source` 
    483    as a reference to `target`. 
    484 
    485    :Parameters: 
    486        `source` : path-like object or None 
    487            Path of a file in the start directory for the relative path 
    488            (the file does not need to exist). 
    489            The value ``None`` is replaced with "<cwd>/dummy_file". 
    490        `target` : path-like object 
    491            End point of the returned relative path. 
    492 
    493    Differences to `os.path.relpath()`: 
    494 
    495    * Inverse argument order. 
    496    * `source` is assumed to be a FILE in the start directory (add a "dummy" 
    497      file name to obtain the path relative from a directory) 
    498      while `os.path.relpath()` expects a DIRECTORY as `start` argument. 
    499    * Always use Posix path separator ("/") for the output. 
    500    * Use `os.sep` for parsing the input 
    501      (changing the value of `os.sep` is ignored by `os.relpath()`). 
    502    * If there is no common prefix, return the absolute path to `target`. 
    503 
    504    Differences to `pathlib.PurePath.relative_to(other)`: 
    505 
    506    * pathlib offers an object oriented interface. 
    507    * `source` expects path to a FILE while `other` expects a DIRECTORY. 
    508    * `target` defaults to the cwd, no default value for `other`. 
    509    * `relative_path()` always returns a path (relative or absolute), 
    510      while `PurePath.relative_to()` raises a ValueError 
    511      if `target` is not a subpath of `other` (no ".." inserted). 
    512    """ 
    513    source_parts = os.path.abspath(source or type(target)('dummy_file') 
    514                                   ).split(os.sep) 
    515    target_parts = os.path.abspath(target).split(os.sep) 
    516    # Check first 2 parts because '/dir'.split('/') == ['', 'dir']: 
    517    if source_parts[:2] != target_parts[:2]: 
    518        # Nothing in common between paths. 
    519        # Return absolute path, using '/' for URLs: 
    520        return '/'.join(target_parts) 
    521    source_parts.reverse() 
    522    target_parts.reverse() 
    523    while (source_parts and target_parts 
    524           and source_parts[-1] == target_parts[-1]): 
    525        # Remove path components in common: 
    526        source_parts.pop() 
    527        target_parts.pop() 
    528    target_parts.reverse() 
    529    parts = ['..'] * (len(source_parts) - 1) + target_parts 
    530    return '/'.join(parts) 
    531 
    532 
    533def get_stylesheet_reference(settings, relative_to=None): 
    534    """ 
    535    Retrieve a stylesheet reference from the settings object. 
    536 
    537    Deprecated. Use get_stylesheet_list() instead to 
    538    enable specification of multiple stylesheets as a comma-separated 
    539    list. 
    540    """ 
    541    warnings.warn('utils.get_stylesheet_reference()' 
    542                  ' is obsoleted by utils.get_stylesheet_list()' 
    543                  ' and will be removed in Docutils 2.0.', 
    544                  DeprecationWarning, stacklevel=2) 
    545    if settings.stylesheet_path: 
    546        assert not settings.stylesheet, ( 
    547            'stylesheet and stylesheet_path are mutually exclusive.') 
    548        if relative_to is None: 
    549            relative_to = settings._destination 
    550        return relative_path(relative_to, settings.stylesheet_path) 
    551    else: 
    552        return settings.stylesheet 
    553 
    554 
    555# Return 'stylesheet' or 'stylesheet_path' arguments as list. 
    556# 
    557# The original settings arguments are kept unchanged: you can test 
    558# with e.g. ``if settings.stylesheet_path: ...``. 
    559# 
    560# Differences to the depracated `get_stylesheet_reference()`: 
    561# * return value is a list 
    562# * no re-writing of the path (and therefore no optional argument) 
    563#   (if required, use ``utils.relative_path(source, target)`` 
    564#   in the calling script) 
    565def get_stylesheet_list(settings): 
    566    """ 
    567    Retrieve list of stylesheet references from the settings object. 
    568    """ 
    569    assert not (settings.stylesheet and settings.stylesheet_path), ( 
    570            'stylesheet and stylesheet_path are mutually exclusive.') 
    571    stylesheets = settings.stylesheet_path or settings.stylesheet or [] 
    572    # programmatically set default may be string with comma separated list: 
    573    if not isinstance(stylesheets, list): 
    574        stylesheets = [path.strip() for path in stylesheets.split(',')] 
    575    if settings.stylesheet_path: 
    576        # expand relative paths if found in stylesheet-dirs: 
    577        stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs) 
    578                       for path in stylesheets] 
    579    return stylesheets 
    580 
    581 
    582def find_file_in_dirs(path, dirs): 
    583    """ 
    584    Search for `path` in the list of directories `dirs`. 
    585 
    586    Return the first expansion that matches an existing file. 
    587    """ 
    588    path = Path(path) 
    589    if path.is_absolute(): 
    590        return path.as_posix() 
    591    for d in dirs: 
    592        f = Path(d).expanduser() / path 
    593        if f.exists(): 
    594            return f.as_posix() 
    595    return path.as_posix() 
    596 
    597 
    598def get_trim_footnote_ref_space(settings): 
    599    """ 
    600    Return whether or not to trim footnote space. 
    601 
    602    If trim_footnote_reference_space is not None, return it. 
    603 
    604    If trim_footnote_reference_space is None, return False unless the 
    605    footnote reference style is 'superscript'. 
    606    """ 
    607    if settings.setdefault('trim_footnote_reference_space', None) is None: 
    608        return getattr(settings, 'footnote_references', None) == 'superscript' 
    609    else: 
    610        return settings.trim_footnote_reference_space 
    611 
    612 
    613def get_source_line(node): 
    614    """ 
    615    Return the "source" and "line" attributes from the `node` given or from 
    616    its closest ancestor. 
    617    """ 
    618    while node: 
    619        if node.source or node.line: 
    620            return node.source, node.line 
    621        node = node.parent 
    622    return None, None 
    623 
    624 
    625def escape2null(text): 
    626    """Return a string with escape-backslashes converted to nulls.""" 
    627    parts = [] 
    628    start = 0 
    629    while True: 
    630        found = text.find('\\', start) 
    631        if found == -1: 
    632            parts.append(text[start:]) 
    633            return ''.join(parts) 
    634        parts.append(text[start:found]) 
    635        parts.append('\x00' + text[found+1:found+2]) 
    636        start = found + 2               # skip character after escape 
    637 
    638 
    639def split_escaped_whitespace(text): 
    640    """ 
    641    Split `text` on escaped whitespace (null+space or null+newline). 
    642    Return a list of strings. 
    643    """ 
    644    strings = text.split('\x00 ') 
    645    strings = [string.split('\x00\n') for string in strings] 
    646    # flatten list of lists of strings to list of strings: 
    647    return list(itertools.chain(*strings)) 
    648 
    649 
    650def strip_combining_chars(text): 
    651    return ''.join(c for c in text if not unicodedata.combining(c)) 
    652 
    653 
    654def find_combining_chars(text): 
    655    """Return indices of all combining chars in  Unicode string `text`. 
    656 
    657    >>> from docutils.utils import find_combining_chars 
    658    >>> find_combining_chars('A t̆ab̆lĕ') 
    659    [3, 6, 9] 
    660 
    661    """ 
    662    return [i for i, c in enumerate(text) if unicodedata.combining(c)] 
    663 
    664 
    665def column_indices(text): 
    666    """Indices of Unicode string `text` when skipping combining characters. 
    667 
    668    >>> from docutils.utils import column_indices 
    669    >>> column_indices('A t̆ab̆lĕ') 
    670    [0, 1, 2, 4, 5, 7, 8] 
    671 
    672    """ 
    673    # TODO: account for asian wide chars here instead of using dummy 
    674    # replacements in the tableparser? 
    675    string_indices = list(range(len(text))) 
    676    for index in find_combining_chars(text): 
    677        string_indices[index] = None 
    678    return [i for i in string_indices if i is not None] 
    679 
    680 
    681east_asian_widths = {'W': 2,   # Wide 
    682                     'F': 2,   # Full-width (wide) 
    683                     'Na': 1,  # Narrow 
    684                     'H': 1,   # Half-width (narrow) 
    685                     'N': 1,   # Neutral (not East Asian, treated as narrow) 
    686                     'A': 1,   # Ambiguous (s/b wide in East Asian context, 
    687                     }         # narrow otherwise, but that doesn't work) 
    688"""Mapping of result codes from `unicodedata.east_asian_widt()` to character 
    689column widths.""" 
    690 
    691 
    692def column_width(text): 
    693    """Return the column width of text. 
    694 
    695    Correct ``len(text)`` for wide East Asian and combining Unicode chars. 
    696    """ 
    697    width = sum(east_asian_widths[unicodedata.east_asian_width(c)] 
    698                for c in text) 
    699    # correction for combining chars: 
    700    width -= len(find_combining_chars(text)) 
    701    return width 
    702 
    703 
    704def uniq(L): 
    705    r = [] 
    706    for item in L: 
    707        if item not in r: 
    708            r.append(item) 
    709    return r 
    710 
    711 
    712def normalize_language_tag(tag): 
    713    """Return a list of normalized combinations for a `BCP 47` language tag. 
    714 
    715    Example: 
    716 
    717    >>> from docutils.utils import normalize_language_tag 
    718    >>> normalize_language_tag('de_AT-1901') 
    719    ['de-at-1901', 'de-at', 'de-1901', 'de'] 
    720    >>> normalize_language_tag('de-CH-x_altquot') 
    721    ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de'] 
    722 
    723    """ 
    724    # normalize: 
    725    tag = tag.lower().replace('-', '_') 
    726    # split (except singletons, which mark the following tag as non-standard): 
    727    tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag) 
    728    subtags = [subtag for subtag in tag.split('_')] 
    729    base_tag = (subtags.pop(0),) 
    730    # find all combinations of subtags 
    731    taglist = [] 
    732    for n in range(len(subtags), 0, -1): 
    733        for tags in itertools.combinations(subtags, n): 
    734            taglist.append('-'.join(base_tag+tags)) 
    735    taglist += base_tag 
    736    return taglist 
    737 
    738 
    739def xml_declaration(encoding=None): 
    740    """Return an XML text declaration. 
    741 
    742    Include an encoding declaration, if `encoding` 
    743    is not 'unicode', '', or None. 
    744    """ 
    745    if encoding and encoding.lower() != 'unicode': 
    746        encoding_declaration = f' encoding="{encoding}"' 
    747    else: 
    748        encoding_declaration = '' 
    749    return f'<?xml version="1.0"{encoding_declaration}?>\n' 
    750 
    751 
    752class DependencyList: 
    753 
    754    """ 
    755    List of dependencies, with file recording support. 
    756 
    757    Note that the output file is not automatically closed.  You have 
    758    to explicitly call the close() method. 
    759    """ 
    760 
    761    def __init__(self, output_file=None, dependencies=()): 
    762        """ 
    763        Initialize the dependency list, automatically setting the 
    764        output file to `output_file` (see `set_output()`) and adding 
    765        all supplied dependencies. 
    766 
    767        If output_file is None, no file output is done when calling add(). 
    768        """ 
    769        self.list = [] 
    770        self.file = None 
    771        if output_file: 
    772            self.set_output(output_file) 
    773        self.add(*dependencies) 
    774 
    775    def set_output(self, output_file): 
    776        """ 
    777        Set the output file and clear the list of already added 
    778        dependencies. 
    779 
    780        `output_file` must be a string.  The specified file is 
    781        immediately overwritten. 
    782 
    783        If output_file is '-', the output will be written to stdout. 
    784        """ 
    785        if output_file: 
    786            if output_file == '-': 
    787                self.file = sys.stdout 
    788            else: 
    789                self.file = open(output_file, 'w', encoding='utf-8') 
    790 
    791    def add(self, *paths): 
    792        """ 
    793        Append `path` to `self.list` unless it is already there. 
    794 
    795        Also append to `self.file` unless it is already there 
    796        or `self.file is `None`. 
    797        """ 
    798        for path in paths: 
    799            if isinstance(path, PurePath): 
    800                path = path.as_posix()  # use '/' as separator 
    801            if path not in self.list: 
    802                self.list.append(path) 
    803                if self.file is not None: 
    804                    self.file.write(path+'\n') 
    805 
    806    def close(self): 
    807        """ 
    808        Close the output file. 
    809        """ 
    810        if self.file is not sys.stdout: 
    811            self.file.close() 
    812        self.file = None 
    813 
    814    def __repr__(self): 
    815        try: 
    816            output_file = self.file.name 
    817        except AttributeError: 
    818            output_file = None 
    819        return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list) 
    820 
    821 
    822release_level_abbreviations = { 
    823    'alpha': 'a', 
    824    'beta': 'b', 
    825    'candidate': 'rc', 
    826    'final': ''} 
    827 
    828 
    829def version_identifier(version_info=None): 
    830    """ 
    831    Return a version identifier string built from `version_info`, a 
    832    `docutils.VersionInfo` namedtuple instance or compatible tuple. If 
    833    `version_info` is not provided, by default return a version identifier 
    834    string based on `docutils.__version_info__` (i.e. the current Docutils 
    835    version). 
    836    """ 
    837    if version_info is None: 
    838        version_info = __version_info__ 
    839    if version_info.micro: 
    840        micro = '.%s' % version_info.micro 
    841    else: 
    842        # 0 is omitted: 
    843        micro = '' 
    844    releaselevel = release_level_abbreviations[version_info.releaselevel] 
    845    if version_info.serial: 
    846        serial = version_info.serial 
    847    else: 
    848        # 0 is omitted: 
    849        serial = '' 
    850    if version_info.release: 
    851        dev = '' 
    852    else: 
    853        dev = '.dev' 
    854    version = '%s.%s%s%s%s%s' % ( 
    855        version_info.major, 
    856        version_info.minor, 
    857        micro, 
    858        releaselevel, 
    859        serial, 
    860        dev) 
    861    return version