1import os 
    2import re 
    3 
    4 
    5_default_seps = os.sep + str(os.altsep) * bool(os.altsep) 
    6 
    7 
    8class Translator: 
    9    """ 
    10    >>> Translator('xyz') 
    11    Traceback (most recent call last): 
    12    ... 
    13    AssertionError: Invalid separators 
    14 
    15    >>> Translator('') 
    16    Traceback (most recent call last): 
    17    ... 
    18    AssertionError: Invalid separators 
    19    """ 
    20 
    21    seps: str 
    22 
    23    def __init__(self, seps: str = _default_seps): 
    24        assert seps and set(seps) <= set(_default_seps), "Invalid separators" 
    25        self.seps = seps 
    26 
    27    def translate(self, pattern): 
    28        """ 
    29        Given a glob pattern, produce a regex that matches it. 
    30        """ 
    31        return self.extend(self.translate_core(pattern)) 
    32 
    33    def extend(self, pattern): 
    34        r""" 
    35        Extend regex for pattern-wide concerns. 
    36 
    37        Apply '(?s:)' to create a non-matching group that 
    38        matches newlines (valid on Unix). 
    39 
    40        Append '\Z' to imply fullmatch even when match is used. 
    41        """ 
    42        return rf'(?s:{pattern})\Z' 
    43 
    44    def translate_core(self, pattern): 
    45        r""" 
    46        Given a glob pattern, produce a regex that matches it. 
    47 
    48        >>> t = Translator() 
    49        >>> t.translate_core('*.txt').replace('\\\\', '') 
    50        '[^/]*\\.txt' 
    51        >>> t.translate_core('a?txt') 
    52        'a[^/]txt' 
    53        >>> t.translate_core('**/*').replace('\\\\', '') 
    54        '.*/[^/][^/]*' 
    55        """ 
    56        self.restrict_rglob(pattern) 
    57        return ''.join(map(self.replace, separate(self.star_not_empty(pattern)))) 
    58 
    59    def replace(self, match): 
    60        """ 
    61        Perform the replacements for a match from :func:`separate`. 
    62        """ 
    63        return match.group('set') or ( 
    64            re.escape(match.group(0)) 
    65            .replace('\\*\\*', r'.*') 
    66            .replace('\\*', rf'[^{re.escape(self.seps)}]*') 
    67            .replace('\\?', r'[^/]') 
    68        ) 
    69 
    70    def restrict_rglob(self, pattern): 
    71        """ 
    72        Raise ValueError if ** appears in anything but a full path segment. 
    73 
    74        >>> Translator().translate('**foo') 
    75        Traceback (most recent call last): 
    76        ... 
    77        ValueError: ** must appear alone in a path segment 
    78        """ 
    79        seps_pattern = rf'[{re.escape(self.seps)}]+' 
    80        segments = re.split(seps_pattern, pattern) 
    81        if any('**' in segment and segment != '**' for segment in segments): 
    82            raise ValueError("** must appear alone in a path segment") 
    83 
    84    def star_not_empty(self, pattern): 
    85        """ 
    86        Ensure that * will not match an empty segment. 
    87        """ 
    88 
    89        def handle_segment(match): 
    90            segment = match.group(0) 
    91            return '?*' if segment == '*' else segment 
    92 
    93        not_seps_pattern = rf'[^{re.escape(self.seps)}]+' 
    94        return re.sub(not_seps_pattern, handle_segment, pattern) 
    95 
    96 
    97def separate(pattern): 
    98    """ 
    99    Separate out character sets to avoid translating their contents. 
    100 
    101    >>> [m.group(0) for m in separate('*.txt')] 
    102    ['*.txt'] 
    103    >>> [m.group(0) for m in separate('a[?]txt')] 
    104    ['a', '[?]', 'txt'] 
    105    """ 
    106    return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)