Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 43%

1811 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1812

1813 - ``exception_action`` - method to be called when expression fails to parse;

1814 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1815 """

1816 self.debugActions = self.DebugActions(

1817 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1818 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1819 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1820 )

1821 self.debug = True

1822 return self

1823

1824 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement":

1825 """

1826 Enable display of debugging messages while doing pattern matching.

1827 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1828 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1829

1830 Example::

1831

1832 wd = Word(alphas).set_name("alphaword")

1833 integer = Word(nums).set_name("numword")

1834 term = wd | integer

1835

1836 # turn on debugging for wd

1837 wd.set_debug()

1838

1839 term[1, ...].parse_string("abc 123 xyz 890")

1840

1841 prints::

1842

1843 Match alphaword at loc 0(1,1)

1844 Matched alphaword -> ['abc']

1845 Match alphaword at loc 3(1,4)

1846 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1847 Match alphaword at loc 7(1,8)

1848 Matched alphaword -> ['xyz']

1849 Match alphaword at loc 11(1,12)

1850 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1851 Match alphaword at loc 15(1,16)

1852 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1853

1854 The output shown is that produced by the default debug actions - custom debug actions can be

1855 specified using :class:`set_debug_actions`. Prior to attempting

1856 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1857 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1858 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1859 which makes debugging and exception messages easier to understand - for instance, the default

1860 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1861 """

1862 if recurse:

1863 for expr in self.visit_all():

1864 expr.set_debug(flag, recurse=False)

1865 return self

1866

1867 if flag:

1868 self.set_debug_actions(

1869 _default_start_debug_action,

1870 _default_success_debug_action,

1871 _default_exception_debug_action,

1872 )

1873 else:

1874 self.debug = False

1875 return self

1876

1877 @property

1878 def default_name(self) -> str:

1879 if self._defaultName is None:

1880 self._defaultName = self._generateDefaultName()

1881 return self._defaultName

1882

1883 @abstractmethod

1884 def _generateDefaultName(self) -> str:

1885 """

1886 Child classes must define this method, which defines how the ``default_name`` is set.

1887 """

1888

1889 def set_name(self, name: typing.Optional[str]) -> "ParserElement":

1890 """

1891 Define name for this expression, makes debugging and exception messages clearer. If

1892 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

1893 enable debug for this expression.

1894

1895 If `name` is None, clears any custom name for this expression, and clears the

1896 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

1897

1898 Example::

1899

1900 integer = Word(nums)

1901 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1902

1903 integer.set_name("integer")

1904 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1905 """

1906 self.customName = name

1907 self.errmsg = f"Expected {str(self)}"

1908

1909 if __diag__.enable_debug_on_named_expressions:

1910 self.set_debug(name is not None)

1911

1912 return self

1913

1914 @property

1915 def name(self) -> str:

1916 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1917 return self.customName if self.customName is not None else self.default_name

1918

1919 @name.setter

1920 def name(self, new_name) -> None:

1921 self.set_name(new_name)

1922

1923 def __str__(self) -> str:

1924 return self.name

1925

1926 def __repr__(self) -> str:

1927 return str(self)

1928

1929 def streamline(self) -> "ParserElement":

1930 self.streamlined = True

1931 self._defaultName = None

1932 return self

1933

1934 def recurse(self) -> List["ParserElement"]:

1935 return []

1936

1937 def _checkRecursion(self, parseElementList):

1938 subRecCheckList = parseElementList[:] + [self]

1939 for e in self.recurse():

1940 e._checkRecursion(subRecCheckList)

1941

1942 def validate(self, validateTrace=None) -> None:

1943 """

1944 Check defined expressions for valid structure, check for infinite recursive definitions.

1945 """

1946 warnings.warn(

1947 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

1948 DeprecationWarning,

1949 stacklevel=2,

1950 )

1951 self._checkRecursion([])

1952

1953 def parse_file(

1954 self,

1955 file_or_filename: Union[str, Path, TextIO],

1956 encoding: str = "utf-8",

1957 parse_all: bool = False,

1958 *,

1959 parseAll: bool = False,

1960 ) -> ParseResults:

1961 """

1962 Execute the parse expression on the given file or filename.

1963 If a filename is specified (instead of a file object),

1964 the entire file is opened, read, and closed before parsing.

1965 """

1966 parseAll = parseAll or parse_all

1967 try:

1968 file_or_filename = typing.cast(TextIO, file_or_filename)

1969 file_contents = file_or_filename.read()

1970 except AttributeError:

1971 file_or_filename = typing.cast(str, file_or_filename)

1972 with open(file_or_filename, "r", encoding=encoding) as f:

1973 file_contents = f.read()

1974 try:

1975 return self.parse_string(file_contents, parseAll)

1976 except ParseBaseException as exc:

1977 if ParserElement.verbose_stacktrace:

1978 raise

1979

1980 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1981 raise exc.with_traceback(None)

1982

1983 def __eq__(self, other):

1984 if self is other:

1985 return True

1986 elif isinstance(other, str_type):

1987 return self.matches(other, parse_all=True)

1988 elif isinstance(other, ParserElement):

1989 return vars(self) == vars(other)

1990 return False

1991

1992 def __hash__(self):

1993 return id(self)

1994

1995 def matches(

1996 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

1997 ) -> bool:

1998 """

1999 Method for quick testing of a parser against a test string. Good for simple

2000 inline microtests of sub expressions while building up larger parser.

2001

2002 Parameters:

2003

2004 - ``test_string`` - to test against this expression for a match

2005 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2006

2007 Example::

2008

2009 expr = Word(nums)

2010 assert expr.matches("100")

2011 """

2012 parseAll = parseAll and parse_all

2013 try:

2014 self.parse_string(str(test_string), parse_all=parseAll)

2015 return True

2016 except ParseBaseException:

2017 return False

2018

2019 def run_tests(

2020 self,

2021 tests: Union[str, List[str]],

2022 parse_all: bool = True,

2023 comment: typing.Optional[Union["ParserElement", str]] = "#",

2024 full_dump: bool = True,

2025 print_results: bool = True,

2026 failure_tests: bool = False,

2027 post_parse: typing.Optional[

2028 Callable[[str, ParseResults], typing.Optional[str]]

2029 ] = None,

2030 file: typing.Optional[TextIO] = None,

2031 with_line_numbers: bool = False,

2032 *,

2033 parseAll: bool = True,

2034 fullDump: bool = True,

2035 printResults: bool = True,

2036 failureTests: bool = False,

2037 postParse: typing.Optional[

2038 Callable[[str, ParseResults], typing.Optional[str]]

2039 ] = None,

2040 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:

2041 """

2042 Execute the parse expression on a series of test strings, showing each

2043 test, the parsed results or where the parse failed. Quick and easy way to

2044 run a parse expression against a list of sample strings.

2045

2046 Parameters:

2047

2048 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2049 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2050 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2051 string; pass None to disable comment filtering

2052 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2053 if False, only dump nested list

2054 - ``print_results`` - (default= ``True``) prints test output to stdout

2055 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2056 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2057 `fn(test_string, parse_results)` and returns a string to be added to the test output

2058 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2059 if None, will default to ``sys.stdout``

2060 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2061

2062 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2063 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2064 test's output

2065

2066 Example::

2067

2068 number_expr = pyparsing_common.number.copy()

2069

2070 result = number_expr.run_tests('''

2071 # unsigned integer

2072 100

2073 # negative integer

2074 -100

2075 # float with scientific notation

2076 6.02e23

2077 # integer with scientific notation

2078 1e-12

2079 ''')

2080 print("Success" if result[0] else "Failed!")

2081

2082 result = number_expr.run_tests('''

2083 # stray character

2084 100Z

2085 # missing leading digit before '.'

2086 -.100

2087 # too many '.'

2088 3.14.159

2089 ''', failure_tests=True)

2090 print("Success" if result[0] else "Failed!")

2091

2092 prints::

2093

2094 # unsigned integer

2095 100

2096 [100]

2097

2098 # negative integer

2099 -100

2100 [-100]

2101

2102 # float with scientific notation

2103 6.02e23

2104 [6.02e+23]

2105

2106 # integer with scientific notation

2107 1e-12

2108 [1e-12]

2109

2110 Success

2111

2112 # stray character

2113 100Z

2114 ^

2115 FAIL: Expected end of text (at char 3), (line:1, col:4)

2116

2117 # missing leading digit before '.'

2118 -.100

2119 ^

2120 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2121

2122 # too many '.'

2123 3.14.159

2124 ^

2125 FAIL: Expected end of text (at char 4), (line:1, col:5)

2126

2127 Success

2128

2129 Each test string must be on a single line. If you want to test a string that spans multiple

2130 lines, create a test like this::

2131

2132 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2133

2134 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2135 """

2136 from .testing import pyparsing_test

2137

2138 parseAll = parseAll and parse_all

2139 fullDump = fullDump and full_dump

2140 printResults = printResults and print_results

2141 failureTests = failureTests or failure_tests

2142 postParse = postParse or post_parse

2143 if isinstance(tests, str_type):

2144 tests = typing.cast(str, tests)

2145 line_strip = type(tests).strip

2146 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2147 comment_specified = comment is not None

2148 if comment_specified:

2149 if isinstance(comment, str_type):

2150 comment = typing.cast(str, comment)

2151 comment = Literal(comment)

2152 comment = typing.cast(ParserElement, comment)

2153 if file is None:

2154 file = sys.stdout

2155 print_ = file.write

2156

2157 result: Union[ParseResults, Exception]

2158 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = []

2159 comments: List[str] = []

2160 success = True

2161 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2162 BOM = "\ufeff"

2163 nlstr = "\n"

2164 for t in tests:

2165 if comment_specified and comment.matches(t, False) or comments and not t:

2166 comments.append(

2167 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2168 )

2169 continue

2170 if not t:

2171 continue

2172 out = [

2173 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2174 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2175 ]

2176 comments.clear()

2177 try:

2178 # convert newline marks to actual newlines, and strip leading BOM if present

2179 t = NL.transform_string(t.lstrip(BOM))

2180 result = self.parse_string(t, parse_all=parseAll)

2181 except ParseBaseException as pe:

2182 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2183 out.append(pe.explain())

2184 out.append(f"FAIL: {fatal}{pe}")

2185 if ParserElement.verbose_stacktrace:

2186 out.extend(traceback.format_tb(pe.__traceback__))

2187 success = success and failureTests

2188 result = pe

2189 except Exception as exc:

2190 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}")

2191 if ParserElement.verbose_stacktrace:

2192 out.extend(traceback.format_tb(exc.__traceback__))

2193 success = success and failureTests

2194 result = exc

2195 else:

2196 success = success and not failureTests

2197 if postParse is not None:

2198 try:

2199 pp_value = postParse(t, result)

2200 if pp_value is not None:

2201 if isinstance(pp_value, ParseResults):

2202 out.append(pp_value.dump())

2203 else:

2204 out.append(str(pp_value))

2205 else:

2206 out.append(result.dump())

2207 except Exception as e:

2208 out.append(result.dump(full=fullDump))

2209 out.append(

2210 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2211 )

2212 else:

2213 out.append(result.dump(full=fullDump))

2214 out.append("")

2215

2216 if printResults:

2217 print_("\n".join(out))

2218

2219 allResults.append((t, result))

2220

2221 return success, allResults

2222

2223 def create_diagram(

2224 self,

2225 output_html: Union[TextIO, Path, str],

2226 vertical: int = 3,

2227 show_results_names: bool = False,

2228 show_groups: bool = False,

2229 embed: bool = False,

2230 **kwargs,

2231 ) -> None:

2232 """

2233 Create a railroad diagram for the parser.

2234

2235 Parameters:

2236

2237 - ``output_html`` (str or file-like object) - output target for generated

2238 diagram HTML

2239 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2240 instead of horizontally (default=3)

2241 - ``show_results_names`` - bool flag whether diagram should show annotations for

2242 defined results names

2243 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2244 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2245 the resulting HTML in an enclosing HTML source

2246 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2247 can be used to insert custom CSS styling

2248 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2249 generated code

2250

2251 Additional diagram-formatting keyword arguments can also be included;

2252 see railroad.Diagram class.

2253 """

2254

2255 try:

2256 from .diagram import to_railroad, railroad_to_html

2257 except ImportError as ie:

2258 raise Exception(

2259 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2260 ) from ie

2261

2262 self.streamline()

2263

2264 railroad = to_railroad(

2265 self,

2266 vertical=vertical,

2267 show_results_names=show_results_names,

2268 show_groups=show_groups,

2269 diagram_kwargs=kwargs,

2270 )

2271 if not isinstance(output_html, (str, Path)):

2272 # we were passed a file-like object, just write to it

2273 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2274 return

2275

2276 with open(output_html, "w", encoding="utf-8") as diag_file:

2277 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2278

2279 # Compatibility synonyms

2280 # fmt: off

2281 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2282 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2283 "setDefaultWhitespaceChars", set_default_whitespace_chars

2284 ))

2285 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2286 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2287 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2288 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2289

2290 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2291 setBreak = replaced_by_pep8("setBreak", set_break)

2292 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2293 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2294 addCondition = replaced_by_pep8("addCondition", add_condition)

2295 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2296 tryParse = replaced_by_pep8("tryParse", try_parse)

2297 parseString = replaced_by_pep8("parseString", parse_string)

2298 scanString = replaced_by_pep8("scanString", scan_string)

2299 transformString = replaced_by_pep8("transformString", transform_string)

2300 searchString = replaced_by_pep8("searchString", search_string)

2301 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2302 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2303 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2304 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2305 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2306 setDebug = replaced_by_pep8("setDebug", set_debug)

2307 setName = replaced_by_pep8("setName", set_name)

2308 parseFile = replaced_by_pep8("parseFile", parse_file)

2309 runTests = replaced_by_pep8("runTests", run_tests)

2310 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2311 defaultName = default_name

2312 # fmt: on

2313

2314

2315class _PendingSkip(ParserElement):

2316 # internal placeholder class to hold a place were '...' is added to a parser element,

2317 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2318 def __init__(self, expr: ParserElement, must_skip: bool = False):

2319 super().__init__()

2320 self.anchor = expr

2321 self.must_skip = must_skip

2322

2323 def _generateDefaultName(self) -> str:

2324 return str(self.anchor + Empty()).replace("Empty", "...")

2325

2326 def __add__(self, other) -> "ParserElement":

2327 skipper = SkipTo(other).set_name("...")("_skipped*")

2328 if self.must_skip:

2329

2330 def must_skip(t):

2331 if not t._skipped or t._skipped.as_list() == [""]:

2332 del t[0]

2333 t.pop("_skipped", None)

2334

2335 def show_skip(t):

2336 if t._skipped.as_list()[-1:] == [""]:

2337 t.pop("_skipped")

2338 t["_skipped"] = f"missing <{self.anchor!r}>"

2339

2340 return (

2341 self.anchor + skipper().add_parse_action(must_skip)

2342 | skipper().add_parse_action(show_skip)

2343 ) + other

2344

2345 return self.anchor + skipper + other

2346

2347 def __repr__(self):

2348 return self.defaultName

2349

2350 def parseImpl(self, *args) -> ParseImplReturnType:

2351 raise Exception(

2352 "use of `...` expression without following SkipTo target expression"

2353 )

2354

2355

2356class Token(ParserElement):

2357 """Abstract :class:`ParserElement` subclass, for defining atomic

2358 matching patterns.

2359 """

2360

2361 def __init__(self):

2362 super().__init__(savelist=False)

2363

2364 def _generateDefaultName(self) -> str:

2365 return type(self).__name__

2366

2367

2368class NoMatch(Token):

2369 """

2370 A token that will never match.

2371 """

2372

2373 def __init__(self):

2374 super().__init__()

2375 self.mayReturnEmpty = True

2376 self.mayIndexError = False

2377 self.errmsg = "Unmatchable token"

2378

2379 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2380 raise ParseException(instring, loc, self.errmsg, self)

2381

2382

2383class Literal(Token):

2384 """

2385 Token to exactly match a specified string.

2386

2387 Example::

2388

2389 Literal('abc').parse_string('abc') # -> ['abc']

2390 Literal('abc').parse_string('abcdef') # -> ['abc']

2391 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2392

2393 For case-insensitive matching, use :class:`CaselessLiteral`.

2394

2395 For keyword matching (force word break before and after the matched string),

2396 use :class:`Keyword` or :class:`CaselessKeyword`.

2397 """

2398

2399 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2400 # Performance tuning: select a subclass with optimized parseImpl

2401 if cls is Literal:

2402 match_string = matchString or match_string

2403 if not match_string:

2404 return super().__new__(Empty)

2405 if len(match_string) == 1:

2406 return super().__new__(_SingleCharLiteral)

2407

2408 # Default behavior

2409 return super().__new__(cls)

2410

2411 # Needed to make copy.copy() work correctly if we customize __new__

2412 def __getnewargs__(self):

2413 return (self.match,)

2414

2415 def __init__(self, match_string: str = "", *, matchString: str = ""):

2416 super().__init__()

2417 match_string = matchString or match_string

2418 self.match = match_string

2419 self.matchLen = len(match_string)

2420 self.firstMatchChar = match_string[:1]

2421 self.errmsg = f"Expected {self.name}"

2422 self.mayReturnEmpty = False

2423 self.mayIndexError = False

2424

2425 def _generateDefaultName(self) -> str:

2426 return repr(self.match)

2427

2428 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2429 if instring[loc] == self.firstMatchChar and instring.startswith(

2430 self.match, loc

2431 ):

2432 return loc + self.matchLen, self.match

2433 raise ParseException(instring, loc, self.errmsg, self)

2434

2435

2436class Empty(Literal):

2437 """

2438 An empty token, will always match.

2439 """

2440

2441 def __init__(self, match_string="", *, matchString=""):

2442 super().__init__("")

2443 self.mayReturnEmpty = True

2444 self.mayIndexError = False

2445

2446 def _generateDefaultName(self) -> str:

2447 return "Empty"

2448

2449 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2450 return loc, []

2451

2452

2453class _SingleCharLiteral(Literal):

2454 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2455 if instring[loc] == self.firstMatchChar:

2456 return loc + 1, self.match

2457 raise ParseException(instring, loc, self.errmsg, self)

2458

2459

2460ParserElement._literalStringClass = Literal

2461

2462

2463class Keyword(Token):

2464 """

2465 Token to exactly match a specified string as a keyword, that is,

2466 it must be immediately preceded and followed by whitespace or

2467 non-keyword characters. Compare with :class:`Literal`:

2468

2469 - ``Literal("if")`` will match the leading ``'if'`` in

2470 ``'ifAndOnlyIf'``.

2471 - ``Keyword("if")`` will not; it will only match the leading

2472 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2473

2474 Accepts two optional constructor arguments in addition to the

2475 keyword string:

2476

2477 - ``ident_chars`` is a string of characters that would be valid

2478 identifier characters, defaulting to all alphanumerics + "_" and

2479 "$"

2480 - ``caseless`` allows case-insensitive matching, default is ``False``.

2481

2482 Example::

2483

2484 Keyword("start").parse_string("start") # -> ['start']

2485 Keyword("start").parse_string("starting") # -> Exception

2486

2487 For case-insensitive matching, use :class:`CaselessKeyword`.

2488 """

2489

2490 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2491

2492 def __init__(

2493 self,

2494 match_string: str = "",

2495 ident_chars: typing.Optional[str] = None,

2496 caseless: bool = False,

2497 *,

2498 matchString: str = "",

2499 identChars: typing.Optional[str] = None,

2500 ):

2501 super().__init__()

2502 identChars = identChars or ident_chars

2503 if identChars is None:

2504 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2505 match_string = matchString or match_string

2506 self.match = match_string

2507 self.matchLen = len(match_string)

2508 try:

2509 self.firstMatchChar = match_string[0]

2510 except IndexError:

2511 raise ValueError("null string passed to Keyword; use Empty() instead")

2512 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2513 self.mayReturnEmpty = False

2514 self.mayIndexError = False

2515 self.caseless = caseless

2516 if caseless:

2517 self.caselessmatch = match_string.upper()

2518 identChars = identChars.upper()

2519 self.identChars = set(identChars)

2520

2521 def _generateDefaultName(self) -> str:

2522 return repr(self.match)

2523

2524 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2525 errmsg = self.errmsg

2526 errloc = loc

2527 if self.caseless:

2528 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2529 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2530 if (

2531 loc >= len(instring) - self.matchLen

2532 or instring[loc + self.matchLen].upper() not in self.identChars

2533 ):

2534 return loc + self.matchLen, self.match

2535

2536 # followed by keyword char

2537 errmsg += ", was immediately followed by keyword character"

2538 errloc = loc + self.matchLen

2539 else:

2540 # preceded by keyword char

2541 errmsg += ", keyword was immediately preceded by keyword character"

2542 errloc = loc - 1

2543 # else no match just raise plain exception

2544

2545 elif (

2546 instring[loc] == self.firstMatchChar

2547 and self.matchLen == 1

2548 or instring.startswith(self.match, loc)

2549 ):

2550 if loc == 0 or instring[loc - 1] not in self.identChars:

2551 if (

2552 loc >= len(instring) - self.matchLen

2553 or instring[loc + self.matchLen] not in self.identChars

2554 ):

2555 return loc + self.matchLen, self.match

2556

2557 # followed by keyword char

2558 errmsg += ", keyword was immediately followed by keyword character"

2559 errloc = loc + self.matchLen

2560 else:

2561 # preceded by keyword char

2562 errmsg += ", keyword was immediately preceded by keyword character"

2563 errloc = loc - 1

2564 # else no match just raise plain exception

2565

2566 raise ParseException(instring, errloc, errmsg, self)

2567

2568 @staticmethod

2569 def set_default_keyword_chars(chars) -> None:

2570 """

2571 Overrides the default characters used by :class:`Keyword` expressions.

2572 """

2573 Keyword.DEFAULT_KEYWORD_CHARS = chars

2574

2575 # Compatibility synonyms

2576 setDefaultKeywordChars = staticmethod(

2577 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2578 )

2579

2580

2581class CaselessLiteral(Literal):

2582 """

2583 Token to match a specified string, ignoring case of letters.

2584 Note: the matched results will always be in the case of the given

2585 match string, NOT the case of the input text.

2586

2587 Example::

2588

2589 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2590 # -> ['CMD', 'CMD', 'CMD']

2591

2592 (Contrast with example for :class:`CaselessKeyword`.)

2593 """

2594

2595 def __init__(self, match_string: str = "", *, matchString: str = ""):

2596 match_string = matchString or match_string

2597 super().__init__(match_string.upper())

2598 # Preserve the defining literal.

2599 self.returnString = match_string

2600 self.errmsg = f"Expected {self.name}"

2601

2602 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2603 if instring[loc : loc + self.matchLen].upper() == self.match:

2604 return loc + self.matchLen, self.returnString

2605 raise ParseException(instring, loc, self.errmsg, self)

2606

2607

2608class CaselessKeyword(Keyword):

2609 """

2610 Caseless version of :class:`Keyword`.

2611

2612 Example::

2613

2614 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2615 # -> ['CMD', 'CMD']

2616

2617 (Contrast with example for :class:`CaselessLiteral`.)

2618 """

2619

2620 def __init__(

2621 self,

2622 match_string: str = "",

2623 ident_chars: typing.Optional[str] = None,

2624 *,

2625 matchString: str = "",

2626 identChars: typing.Optional[str] = None,

2627 ):

2628 identChars = identChars or ident_chars

2629 match_string = matchString or match_string

2630 super().__init__(match_string, identChars, caseless=True)

2631

2632

2633class CloseMatch(Token):

2634 """A variation on :class:`Literal` which matches "close" matches,

2635 that is, strings with at most 'n' mismatching characters.

2636 :class:`CloseMatch` takes parameters:

2637

2638 - ``match_string`` - string to be matched

2639 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2640 - ``max_mismatches`` - (``default=1``) maximum number of

2641 mismatches allowed to count as a match

2642

2643 The results from a successful parse will contain the matched text

2644 from the input string and the following named results:

2645

2646 - ``mismatches`` - a list of the positions within the

2647 match_string where mismatches were found

2648 - ``original`` - the original match_string used to compare

2649 against the input string

2650

2651 If ``mismatches`` is an empty list, then the match was an exact

2652 match.

2653

2654 Example::

2655

2656 patt = CloseMatch("ATCATCGAATGGA")

2657 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2658 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2659

2660 # exact match

2661 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2662

2663 # close match allowing up to 2 mismatches

2664 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2665 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2666 """

2667

2668 def __init__(

2669 self,

2670 match_string: str,

2671 max_mismatches: typing.Optional[int] = None,

2672 *,

2673 maxMismatches: int = 1,

2674 caseless=False,

2675 ):

2676 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2677 super().__init__()

2678 self.match_string = match_string

2679 self.maxMismatches = maxMismatches

2680 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2681 self.caseless = caseless

2682 self.mayIndexError = False

2683 self.mayReturnEmpty = False

2684

2685 def _generateDefaultName(self) -> str:

2686 return f"{type(self).__name__}:{self.match_string!r}"

2687

2688 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2689 start = loc

2690 instrlen = len(instring)

2691 maxloc = start + len(self.match_string)

2692

2693 if maxloc <= instrlen:

2694 match_string = self.match_string

2695 match_stringloc = 0

2696 mismatches = []

2697 maxMismatches = self.maxMismatches

2698

2699 for match_stringloc, s_m in enumerate(

2700 zip(instring[loc:maxloc], match_string)

2701 ):

2702 src, mat = s_m

2703 if self.caseless:

2704 src, mat = src.lower(), mat.lower()

2705

2706 if src != mat:

2707 mismatches.append(match_stringloc)

2708 if len(mismatches) > maxMismatches:

2709 break

2710 else:

2711 loc = start + match_stringloc + 1

2712 results = ParseResults([instring[start:loc]])

2713 results["original"] = match_string

2714 results["mismatches"] = mismatches

2715 return loc, results

2716

2717 raise ParseException(instring, loc, self.errmsg, self)

2718

2719

2720class Word(Token):

2721 """Token for matching words composed of allowed character sets.

2722

2723 Parameters:

2724

2725 - ``init_chars`` - string of all characters that should be used to

2726 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2727 if ``body_chars`` is also specified, then this is the string of

2728 initial characters

2729 - ``body_chars`` - string of characters that

2730 can be used for matching after a matched initial character as

2731 given in ``init_chars``; if omitted, same as the initial characters

2732 (default=``None``)

2733 - ``min`` - minimum number of characters to match (default=1)

2734 - ``max`` - maximum number of characters to match (default=0)

2735 - ``exact`` - exact number of characters to match (default=0)

2736 - ``as_keyword`` - match as a keyword (default=``False``)

2737 - ``exclude_chars`` - characters that might be

2738 found in the input ``body_chars`` string but which should not be

2739 accepted for matching ;useful to define a word of all

2740 printables except for one or two characters, for instance

2741 (default=``None``)

2742

2743 :class:`srange` is useful for defining custom character set strings

2744 for defining :class:`Word` expressions, using range notation from

2745 regular expression character sets.

2746

2747 A common mistake is to use :class:`Word` to match a specific literal

2748 string, as in ``Word("Address")``. Remember that :class:`Word`

2749 uses the string argument to define *sets* of matchable characters.

2750 This expression would match "Add", "AAA", "dAred", or any other word

2751 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2752 exact literal string, use :class:`Literal` or :class:`Keyword`.

2753

2754 pyparsing includes helper strings for building Words:

2755

2756 - :class:`alphas`

2757 - :class:`nums`

2758 - :class:`alphanums`

2759 - :class:`hexnums`

2760 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2761 - accented, tilded, umlauted, etc.)

2762 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2763 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2764 - :class:`printables` (any non-whitespace character)

2765

2766 ``alphas``, ``nums``, and ``printables`` are also defined in several

2767 Unicode sets - see :class:`pyparsing_unicode``.

2768

2769 Example::

2770

2771 # a word composed of digits

2772 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2773

2774 # a word with a leading capital, and zero or more lowercase

2775 capitalized_word = Word(alphas.upper(), alphas.lower())

2776

2777 # hostnames are alphanumeric, with leading alpha, and '-'

2778 hostname = Word(alphas, alphanums + '-')

2779

2780 # roman numeral (not a strict parser, accepts invalid mix of characters)

2781 roman = Word("IVXLCDM")

2782

2783 # any string of non-whitespace characters, except for ','

2784 csv_value = Word(printables, exclude_chars=",")

2785 """

2786

2787 def __init__(

2788 self,

2789 init_chars: str = "",

2790 body_chars: typing.Optional[str] = None,

2791 min: int = 1,

2792 max: int = 0,

2793 exact: int = 0,

2794 as_keyword: bool = False,

2795 exclude_chars: typing.Optional[str] = None,

2796 *,

2797 initChars: typing.Optional[str] = None,

2798 bodyChars: typing.Optional[str] = None,

2799 asKeyword: bool = False,

2800 excludeChars: typing.Optional[str] = None,

2801 ):

2802 initChars = initChars or init_chars

2803 bodyChars = bodyChars or body_chars

2804 asKeyword = asKeyword or as_keyword

2805 excludeChars = excludeChars or exclude_chars

2806 super().__init__()

2807 if not initChars:

2808 raise ValueError(

2809 f"invalid {type(self).__name__}, initChars cannot be empty string"

2810 )

2811

2812 initChars_set = set(initChars)

2813 if excludeChars:

2814 excludeChars_set = set(excludeChars)

2815 initChars_set -= excludeChars_set

2816 if bodyChars:

2817 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2818 self.initChars = initChars_set

2819 self.initCharsOrig = "".join(sorted(initChars_set))

2820

2821 if bodyChars:

2822 self.bodyChars = set(bodyChars)

2823 self.bodyCharsOrig = "".join(sorted(bodyChars))

2824 else:

2825 self.bodyChars = initChars_set

2826 self.bodyCharsOrig = self.initCharsOrig

2827

2828 self.maxSpecified = max > 0

2829

2830 if min < 1:

2831 raise ValueError(

2832 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2833 )

2834

2835 if self.maxSpecified and min > max:

2836 raise ValueError(

2837 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2838 )

2839

2840 self.minLen = min

2841

2842 if max > 0:

2843 self.maxLen = max

2844 else:

2845 self.maxLen = _MAX_INT

2846

2847 if exact > 0:

2848 min = max = exact

2849 self.maxLen = exact

2850 self.minLen = exact

2851

2852 self.errmsg = f"Expected {self.name}"

2853 self.mayIndexError = False

2854 self.asKeyword = asKeyword

2855 if self.asKeyword:

2856 self.errmsg += " as a keyword"

2857

2858 # see if we can make a regex for this Word

2859 if " " not in (self.initChars | self.bodyChars):

2860 if len(self.initChars) == 1:

2861 re_leading_fragment = re.escape(self.initCharsOrig)

2862 else:

2863 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2864

2865 if self.bodyChars == self.initChars:

2866 if max == 0 and self.minLen == 1:

2867 repeat = "+"

2868 elif max == 1:

2869 repeat = ""

2870 else:

2871 if self.minLen != self.maxLen:

2872 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2873 else:

2874 repeat = f"{{{self.minLen}}}"

2875 self.reString = f"{re_leading_fragment}{repeat}"

2876 else:

2877 if max == 1:

2878 re_body_fragment = ""

2879 repeat = ""

2880 else:

2881 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2882 if max == 0 and self.minLen == 1:

2883 repeat = "*"

2884 elif max == 2:

2885 repeat = "?" if min <= 1 else ""

2886 else:

2887 if min != max:

2888 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2889 else:

2890 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2891

2892 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2893

2894 if self.asKeyword:

2895 self.reString = rf"\b{self.reString}\b"

2896

2897 try:

2898 self.re = re.compile(self.reString)

2899 except re.error:

2900 self.re = None # type: ignore[assignment]

2901 else:

2902 self.re_match = self.re.match

2903 self.parseImpl = self.parseImpl_regex # type: ignore[assignment]

2904

2905 def _generateDefaultName(self) -> str:

2906 def charsAsStr(s):

2907 max_repr_len = 16

2908 s = _collapse_string_to_ranges(s, re_escape=False)

2909

2910 if len(s) > max_repr_len:

2911 return s[: max_repr_len - 3] + "..."

2912

2913 return s

2914

2915 if self.initChars != self.bodyChars:

2916 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2917 else:

2918 base = f"W:({charsAsStr(self.initChars)})"

2919

2920 # add length specification

2921 if self.minLen > 1 or self.maxLen != _MAX_INT:

2922 if self.minLen == self.maxLen:

2923 if self.minLen == 1:

2924 return base[2:]

2925 else:

2926 return base + f"{{{self.minLen}}}"

2927 elif self.maxLen == _MAX_INT:

2928 return base + f"{{{self.minLen},...}}"

2929 else:

2930 return base + f"{{{self.minLen},{self.maxLen}}}"

2931 return base

2932

2933 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2934 if instring[loc] not in self.initChars:

2935 raise ParseException(instring, loc, self.errmsg, self)

2936

2937 start = loc

2938 loc += 1

2939 instrlen = len(instring)

2940 body_chars: set[str] = self.bodyChars

2941 maxloc = start + self.maxLen

2942 maxloc = min(maxloc, instrlen)

2943 while loc < maxloc and instring[loc] in body_chars:

2944 loc += 1

2945

2946 throw_exception = False

2947 if loc - start < self.minLen:

2948 throw_exception = True

2949 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

2950 throw_exception = True

2951 elif self.asKeyword and (

2952 (start > 0 and instring[start - 1] in body_chars)

2953 or (loc < instrlen and instring[loc] in body_chars)

2954 ):

2955 throw_exception = True

2956

2957 if throw_exception:

2958 raise ParseException(instring, loc, self.errmsg, self)

2959

2960 return loc, instring[start:loc]

2961

2962 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2963 result = self.re_match(instring, loc)

2964 if not result:

2965 raise ParseException(instring, loc, self.errmsg, self)

2966

2967 loc = result.end()

2968 return loc, result.group()

2969

2970

2971class Char(Word):

2972 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

2973 when defining a match of any single character in a string of

2974 characters.

2975 """

2976

2977 def __init__(

2978 self,

2979 charset: str,

2980 as_keyword: bool = False,

2981 exclude_chars: typing.Optional[str] = None,

2982 *,

2983 asKeyword: bool = False,

2984 excludeChars: typing.Optional[str] = None,

2985 ):

2986 asKeyword = asKeyword or as_keyword

2987 excludeChars = excludeChars or exclude_chars

2988 super().__init__(

2989 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

2990 )

2991

2992

2993class Regex(Token):

2994 r"""Token for matching strings that match a given regular

2995 expression. Defined with string specifying the regular expression in

2996 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

2997 If the given regex contains named groups (defined using ``(?P<name>...)``),

2998 these will be preserved as named :class:`ParseResults`.

2999

3000 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3001 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3002 a compiled RE that was compiled using ``regex``.

3003

3004 Example::

3005

3006 realnum = Regex(r"[+-]?\d+\.\d*")

3007 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3008 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3009

3010 # named fields in a regex will be returned as named results

3011 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3012

3013 # the Regex class will accept re's compiled using the regex module

3014 import regex

3015 parser = pp.Regex(regex.compile(r'[0-9]'))

3016 """

3017

3018 def __init__(

3019 self,

3020 pattern: Any,

3021 flags: Union[re.RegexFlag, int] = 0,

3022 as_group_list: bool = False,

3023 as_match: bool = False,

3024 *,

3025 asGroupList: bool = False,

3026 asMatch: bool = False,

3027 ):

3028 """The parameters ``pattern`` and ``flags`` are passed

3029 to the ``re.compile()`` function as-is. See the Python

3030 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3031 explanation of the acceptable patterns and flags.

3032 """

3033 super().__init__()

3034 asGroupList = asGroupList or as_group_list

3035 asMatch = asMatch or as_match

3036

3037 if isinstance(pattern, str_type):

3038 if not pattern:

3039 raise ValueError("null string passed to Regex; use Empty() instead")

3040

3041 self._re = None

3042 self.reString = self.pattern = pattern

3043 self.flags = flags

3044

3045 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3046 self._re = pattern

3047 self.pattern = self.reString = pattern.pattern

3048 self.flags = flags

3049

3050 else:

3051 raise TypeError(

3052 "Regex may only be constructed with a string or a compiled RE object"

3053 )

3054

3055 self.errmsg = f"Expected {self.name}"

3056 self.mayIndexError = False

3057 self.asGroupList = asGroupList

3058 self.asMatch = asMatch

3059 if self.asGroupList:

3060 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment]

3061 if self.asMatch:

3062 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment]

3063

3064 @cached_property

3065 def re(self) -> _RePattern:

3066 if self._re:

3067 return self._re

3068

3069 try:

3070 return re.compile(self.pattern, self.flags)

3071 except re.error:

3072 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3073

3074 @cached_property

3075 def re_match(self) -> Callable[[str], Any]:

3076 return self.re.match

3077

3078 @cached_property

3079 def mayReturnEmpty(self) -> bool:

3080 return self.re_match("") is not None

3081

3082 def _generateDefaultName(self) -> str:

3083 unescaped = self.pattern.replace("\\\\", "\\")

3084 return f"Re:({unescaped!r})"

3085

3086 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3087 result = self.re_match(instring, loc)

3088 if not result:

3089 raise ParseException(instring, loc, self.errmsg, self)

3090

3091 loc = result.end()

3092 ret = ParseResults(result.group())

3093 d = result.groupdict()

3094

3095 for k, v in d.items():

3096 ret[k] = v

3097

3098 return loc, ret

3099

3100 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3101 result = self.re_match(instring, loc)

3102 if not result:

3103 raise ParseException(instring, loc, self.errmsg, self)

3104

3105 loc = result.end()

3106 ret = result.groups()

3107 return loc, ret

3108

3109 def parseImplAsMatch(self, instring, loc, do_actions=True):

3110 result = self.re_match(instring, loc)

3111 if not result:

3112 raise ParseException(instring, loc, self.errmsg, self)

3113

3114 loc = result.end()

3115 ret = result

3116 return loc, ret

3117

3118 def sub(self, repl: str) -> ParserElement:

3119 r"""

3120 Return :class:`Regex` with an attached parse action to transform the parsed

3121 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3122

3123 Example::

3124

3125 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3126 print(make_html.transform_string("h1:main title:"))

3127 # prints "<h1>main title</h1>"

3128 """

3129 if self.asGroupList:

3130 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3131

3132 if self.asMatch and callable(repl):

3133 raise TypeError(

3134 "cannot use sub() with a callable with Regex(as_match=True)"

3135 )

3136

3137 if self.asMatch:

3138

3139 def pa(tokens):

3140 return tokens[0].expand(repl)

3141

3142 else:

3143

3144 def pa(tokens):

3145 return self.re.sub(repl, tokens[0])

3146

3147 return self.add_parse_action(pa)

3148

3149

3150class QuotedString(Token):

3151 r"""

3152 Token for matching strings that are delimited by quoting characters.

3153

3154 Defined with the following parameters:

3155

3156 - ``quote_char`` - string of one or more characters defining the

3157 quote delimiting string

3158 - ``esc_char`` - character to re_escape quotes, typically backslash

3159 (default= ``None``)

3160 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3161 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3162 (default= ``None``)

3163 - ``multiline`` - boolean indicating whether quotes can span

3164 multiple lines (default= ``False``)

3165 - ``unquote_results`` - boolean indicating whether the matched text

3166 should be unquoted (default= ``True``)

3167 - ``end_quote_char`` - string of one or more characters defining the

3168 end of the quote delimited string (default= ``None`` => same as

3169 quote_char)

3170 - ``convert_whitespace_escapes`` - convert escaped whitespace

3171 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3172 (default= ``True``)

3173

3174 Example::

3175

3176 qs = QuotedString('"')

3177 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3178 complex_qs = QuotedString('{{', end_quote_char='}}')

3179 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3180 sql_qs = QuotedString('"', esc_quote='""')

3181 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3182

3183 prints::

3184

3185 [['This is the quote']]

3186 [['This is the "quote"']]

3187 [['This is the quote with "embedded" quotes']]

3188 """

3189

3190 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3191

3192 def __init__(

3193 self,

3194 quote_char: str = "",

3195 esc_char: typing.Optional[str] = None,

3196 esc_quote: typing.Optional[str] = None,

3197 multiline: bool = False,

3198 unquote_results: bool = True,

3199 end_quote_char: typing.Optional[str] = None,

3200 convert_whitespace_escapes: bool = True,

3201 *,

3202 quoteChar: str = "",

3203 escChar: typing.Optional[str] = None,

3204 escQuote: typing.Optional[str] = None,

3205 unquoteResults: bool = True,

3206 endQuoteChar: typing.Optional[str] = None,

3207 convertWhitespaceEscapes: bool = True,

3208 ):

3209 super().__init__()

3210 esc_char = escChar or esc_char

3211 esc_quote = escQuote or esc_quote

3212 unquote_results = unquoteResults and unquote_results

3213 end_quote_char = endQuoteChar or end_quote_char

3214 convert_whitespace_escapes = (

3215 convertWhitespaceEscapes and convert_whitespace_escapes

3216 )

3217 quote_char = quoteChar or quote_char

3218

3219 # remove white space from quote chars

3220 quote_char = quote_char.strip()

3221 if not quote_char:

3222 raise ValueError("quote_char cannot be the empty string")

3223

3224 if end_quote_char is None:

3225 end_quote_char = quote_char

3226 else:

3227 end_quote_char = end_quote_char.strip()

3228 if not end_quote_char:

3229 raise ValueError("end_quote_char cannot be the empty string")

3230

3231 self.quote_char: str = quote_char

3232 self.quote_char_len: int = len(quote_char)

3233 self.first_quote_char: str = quote_char[0]

3234 self.end_quote_char: str = end_quote_char

3235 self.end_quote_char_len: int = len(end_quote_char)

3236 self.esc_char: str = esc_char or ""

3237 self.has_esc_char: bool = esc_char is not None

3238 self.esc_quote: str = esc_quote or ""

3239 self.unquote_results: bool = unquote_results

3240 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3241 self.multiline = multiline

3242 self.re_flags = re.RegexFlag(0)

3243

3244 # fmt: off

3245 # build up re pattern for the content between the quote delimiters

3246 inner_pattern: List[str] = []

3247

3248 if esc_quote:

3249 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3250

3251 if esc_char:

3252 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3253

3254 if len(self.end_quote_char) > 1:

3255 inner_pattern.append(

3256 "(?:"

3257 + "|".join(

3258 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3259 for i in range(len(self.end_quote_char) - 1, 0, -1)

3260 )

3261 + ")"

3262 )

3263

3264 if self.multiline:

3265 self.re_flags |= re.MULTILINE | re.DOTALL

3266 inner_pattern.append(

3267 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3268 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3269 )

3270 else:

3271 inner_pattern.append(

3272 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3273 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3274 )

3275

3276 self.pattern = "".join(

3277 [

3278 re.escape(self.quote_char),

3279 "(?:",

3280 '|'.join(inner_pattern),

3281 ")*",

3282 re.escape(self.end_quote_char),

3283 ]

3284 )

3285

3286 if self.unquote_results:

3287 if self.convert_whitespace_escapes:

3288 self.unquote_scan_re = re.compile(

3289 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3290 rf"|({re.escape(self.esc_char)}.)"

3291 rf"|(\n|.)",

3292 flags=self.re_flags,

3293 )

3294 else:

3295 self.unquote_scan_re = re.compile(

3296 rf"({re.escape(self.esc_char)}.)"

3297 rf"|(\n|.)",

3298 flags=self.re_flags

3299 )

3300 # fmt: on

3301

3302 try:

3303 self.re = re.compile(self.pattern, self.re_flags)

3304 self.reString = self.pattern

3305 self.re_match = self.re.match

3306 except re.error:

3307 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3308

3309 self.errmsg = f"Expected {self.name}"

3310 self.mayIndexError = False

3311 self.mayReturnEmpty = True

3312

3313 def _generateDefaultName(self) -> str:

3314 if self.quote_char == self.end_quote_char and isinstance(

3315 self.quote_char, str_type

3316 ):

3317 return f"string enclosed in {self.quote_char!r}"

3318

3319 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3320

3321 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3322 # check first character of opening quote to see if that is a match

3323 # before doing the more complicated regex match

3324 result = (

3325 instring[loc] == self.first_quote_char

3326 and self.re_match(instring, loc)

3327 or None

3328 )

3329 if not result:

3330 raise ParseException(instring, loc, self.errmsg, self)

3331

3332 # get ending loc and matched string from regex matching result

3333 loc = result.end()

3334 ret = result.group()

3335

3336 if self.unquote_results:

3337 # strip off quotes

3338 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3339

3340 if isinstance(ret, str_type):

3341 # fmt: off

3342 if self.convert_whitespace_escapes:

3343 # as we iterate over matches in the input string,

3344 # collect from whichever match group of the unquote_scan_re

3345 # regex matches (only 1 group will match at any given time)

3346 ret = "".join(

3347 # match group 1 matches \t, \n, etc.

3348 self.ws_map[match.group(1)] if match.group(1)

3349 # match group 2 matches escaped characters

3350 else match.group(2)[-1] if match.group(2)

3351 # match group 3 matches any character

3352 else match.group(3)

3353 for match in self.unquote_scan_re.finditer(ret)

3354 )

3355 else:

3356 ret = "".join(

3357 # match group 1 matches escaped characters

3358 match.group(1)[-1] if match.group(1)

3359 # match group 2 matches any character

3360 else match.group(2)

3361 for match in self.unquote_scan_re.finditer(ret)

3362 )

3363 # fmt: on

3364

3365 # replace escaped quotes

3366 if self.esc_quote:

3367 ret = ret.replace(self.esc_quote, self.end_quote_char)

3368

3369 return loc, ret

3370

3371

3372class CharsNotIn(Token):

3373 """Token for matching words composed of characters *not* in a given

3374 set (will include whitespace in matched characters if not listed in

3375 the provided exclusion set - see example). Defined with string

3376 containing all disallowed characters, and an optional minimum,

3377 maximum, and/or exact length. The default value for ``min`` is

3378 1 (a minimum value < 1 is not valid); the default values for

3379 ``max`` and ``exact`` are 0, meaning no maximum or exact

3380 length restriction.

3381

3382 Example::

3383

3384 # define a comma-separated-value as anything that is not a ','

3385 csv_value = CharsNotIn(',')

3386 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3387

3388 prints::

3389

3390 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3391 """

3392

3393 def __init__(

3394 self,

3395 not_chars: str = "",

3396 min: int = 1,

3397 max: int = 0,

3398 exact: int = 0,

3399 *,

3400 notChars: str = "",

3401 ):

3402 super().__init__()

3403 self.skipWhitespace = False

3404 self.notChars = not_chars or notChars

3405 self.notCharsSet = set(self.notChars)

3406

3407 if min < 1:

3408 raise ValueError(

3409 "cannot specify a minimum length < 1; use"

3410 " Opt(CharsNotIn()) if zero-length char group is permitted"

3411 )

3412

3413 self.minLen = min

3414

3415 if max > 0:

3416 self.maxLen = max

3417 else:

3418 self.maxLen = _MAX_INT

3419

3420 if exact > 0:

3421 self.maxLen = exact

3422 self.minLen = exact

3423

3424 self.errmsg = f"Expected {self.name}"

3425 self.mayReturnEmpty = self.minLen == 0

3426 self.mayIndexError = False

3427

3428 def _generateDefaultName(self) -> str:

3429 not_chars_str = _collapse_string_to_ranges(self.notChars)

3430 if len(not_chars_str) > 16:

3431 return f"!W:({self.notChars[: 16 - 3]}...)"

3432 else:

3433 return f"!W:({self.notChars})"

3434

3435 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3436 notchars = self.notCharsSet

3437 if instring[loc] in notchars:

3438 raise ParseException(instring, loc, self.errmsg, self)

3439

3440 start = loc

3441 loc += 1

3442 maxlen = min(start + self.maxLen, len(instring))

3443 while loc < maxlen and instring[loc] not in notchars:

3444 loc += 1

3445

3446 if loc - start < self.minLen:

3447 raise ParseException(instring, loc, self.errmsg, self)

3448

3449 return loc, instring[start:loc]

3450

3451

3452class White(Token):

3453 """Special matching class for matching whitespace. Normally,

3454 whitespace is ignored by pyparsing grammars. This class is included

3455 when some whitespace structures are significant. Define with

3456 a string containing the whitespace characters to be matched; default

3457 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3458 ``max``, and ``exact`` arguments, as defined for the

3459 :class:`Word` class.

3460 """

3461

3462 whiteStrs = {

3463 " ": "<SP>",

3464 "\t": "<TAB>",

3465 "\n": "<LF>",

3466 "\r": "<CR>",

3467 "\f": "<FF>",

3468 "\u00A0": "<NBSP>",

3469 "\u1680": "<OGHAM_SPACE_MARK>",

3470 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3471 "\u2000": "<EN_QUAD>",

3472 "\u2001": "<EM_QUAD>",

3473 "\u2002": "<EN_SPACE>",

3474 "\u2003": "<EM_SPACE>",

3475 "\u2004": "<THREE-PER-EM_SPACE>",

3476 "\u2005": "<FOUR-PER-EM_SPACE>",

3477 "\u2006": "<SIX-PER-EM_SPACE>",

3478 "\u2007": "<FIGURE_SPACE>",

3479 "\u2008": "<PUNCTUATION_SPACE>",

3480 "\u2009": "<THIN_SPACE>",

3481 "\u200A": "<HAIR_SPACE>",

3482 "\u200B": "<ZERO_WIDTH_SPACE>",

3483 "\u202F": "<NNBSP>",

3484 "\u205F": "<MMSP>",

3485 "\u3000": "<IDEOGRAPHIC_SPACE>",

3486 }

3487

3488 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):

3489 super().__init__()

3490 self.matchWhite = ws

3491 self.set_whitespace_chars(

3492 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3493 copy_defaults=True,

3494 )

3495 # self.leave_whitespace()

3496 self.mayReturnEmpty = True

3497 self.errmsg = f"Expected {self.name}"

3498

3499 self.minLen = min

3500

3501 if max > 0:

3502 self.maxLen = max

3503 else:

3504 self.maxLen = _MAX_INT

3505

3506 if exact > 0:

3507 self.maxLen = exact

3508 self.minLen = exact

3509

3510 def _generateDefaultName(self) -> str:

3511 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3512

3513 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3514 if instring[loc] not in self.matchWhite:

3515 raise ParseException(instring, loc, self.errmsg, self)

3516 start = loc

3517 loc += 1

3518 maxloc = start + self.maxLen

3519 maxloc = min(maxloc, len(instring))

3520 while loc < maxloc and instring[loc] in self.matchWhite:

3521 loc += 1

3522

3523 if loc - start < self.minLen:

3524 raise ParseException(instring, loc, self.errmsg, self)

3525

3526 return loc, instring[start:loc]

3527

3528

3529class PositionToken(Token):

3530 def __init__(self):

3531 super().__init__()

3532 self.mayReturnEmpty = True

3533 self.mayIndexError = False

3534

3535

3536class GoToColumn(PositionToken):

3537 """Token to advance to a specific column of input text; useful for

3538 tabular report scraping.

3539 """

3540

3541 def __init__(self, colno: int):

3542 super().__init__()

3543 self.col = colno

3544

3545 def preParse(self, instring: str, loc: int) -> int:

3546 if col(loc, instring) == self.col:

3547 return loc

3548

3549 instrlen = len(instring)

3550 if self.ignoreExprs:

3551 loc = self._skipIgnorables(instring, loc)

3552 while (

3553 loc < instrlen

3554 and instring[loc].isspace()

3555 and col(loc, instring) != self.col

3556 ):

3557 loc += 1

3558

3559 return loc

3560

3561 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3562 thiscol = col(loc, instring)

3563 if thiscol > self.col:

3564 raise ParseException(instring, loc, "Text not in expected column", self)

3565 newloc = loc + self.col - thiscol

3566 ret = instring[loc:newloc]

3567 return newloc, ret

3568

3569

3570class LineStart(PositionToken):

3571 r"""Matches if current position is at the beginning of a line within

3572 the parse string

3573

3574 Example::

3575

3576 test = '''\

3577 AAA this line

3578 AAA and this line

3579 AAA but not this one

3580 B AAA and definitely not this one

3581 '''

3582

3583 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3584 print(t)

3585

3586 prints::

3587

3588 ['AAA', ' this line']

3589 ['AAA', ' and this line']

3590

3591 """

3592

3593 def __init__(self):

3594 super().__init__()

3595 self.leave_whitespace()

3596 self.orig_whiteChars = set() | self.whiteChars

3597 self.whiteChars.discard("\n")

3598 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3599 self.set_name("start of line")

3600

3601 def preParse(self, instring: str, loc: int) -> int:

3602 if loc == 0:

3603 return loc

3604

3605 ret = self.skipper.preParse(instring, loc)

3606

3607 if "\n" in self.orig_whiteChars:

3608 while instring[ret : ret + 1] == "\n":

3609 ret = self.skipper.preParse(instring, ret + 1)

3610

3611 return ret

3612

3613 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3614 if col(loc, instring) == 1:

3615 return loc, []

3616 raise ParseException(instring, loc, self.errmsg, self)

3617

3618

3619class LineEnd(PositionToken):

3620 """Matches if current position is at the end of a line within the

3621 parse string

3622 """

3623

3624 def __init__(self):

3625 super().__init__()

3626 self.whiteChars.discard("\n")

3627 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3628 self.set_name("end of line")

3629

3630 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3631 if loc < len(instring):

3632 if instring[loc] == "\n":

3633 return loc + 1, "\n"

3634 else:

3635 raise ParseException(instring, loc, self.errmsg, self)

3636 elif loc == len(instring):

3637 return loc + 1, []

3638 else:

3639 raise ParseException(instring, loc, self.errmsg, self)

3640

3641

3642class StringStart(PositionToken):

3643 """Matches if current position is at the beginning of the parse

3644 string

3645 """

3646

3647 def __init__(self):

3648 super().__init__()

3649 self.set_name("start of text")

3650

3651 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3652 # see if entire string up to here is just whitespace and ignoreables

3653 if loc != 0 and loc != self.preParse(instring, 0):

3654 raise ParseException(instring, loc, self.errmsg, self)

3655

3656 return loc, []

3657

3658

3659class StringEnd(PositionToken):

3660 """

3661 Matches if current position is at the end of the parse string

3662 """

3663

3664 def __init__(self):

3665 super().__init__()

3666 self.set_name("end of text")

3667

3668 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3669 if loc < len(instring):

3670 raise ParseException(instring, loc, self.errmsg, self)

3671 if loc == len(instring):

3672 return loc + 1, []

3673 if loc > len(instring):

3674 return loc, []

3675

3676 raise ParseException(instring, loc, self.errmsg, self)

3677

3678

3679class WordStart(PositionToken):

3680 """Matches if the current position is at the beginning of a

3681 :class:`Word`, and is not preceded by any character in a given

3682 set of ``word_chars`` (default= ``printables``). To emulate the

3683 ``\b`` behavior of regular expressions, use

3684 ``WordStart(alphanums)``. ``WordStart`` will also match at

3685 the beginning of the string being parsed, or at the beginning of

3686 a line.

3687 """

3688

3689 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3690 wordChars = word_chars if wordChars == printables else wordChars

3691 super().__init__()

3692 self.wordChars = set(wordChars)

3693 self.set_name("start of a word")

3694

3695 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3696 if loc != 0:

3697 if (

3698 instring[loc - 1] in self.wordChars

3699 or instring[loc] not in self.wordChars

3700 ):

3701 raise ParseException(instring, loc, self.errmsg, self)

3702 return loc, []

3703

3704

3705class WordEnd(PositionToken):

3706 """Matches if the current position is at the end of a :class:`Word`,

3707 and is not followed by any character in a given set of ``word_chars``

3708 (default= ``printables``). To emulate the ``\b`` behavior of

3709 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3710 will also match at the end of the string being parsed, or at the end

3711 of a line.

3712 """

3713

3714 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3715 wordChars = word_chars if wordChars == printables else wordChars

3716 super().__init__()

3717 self.wordChars = set(wordChars)

3718 self.skipWhitespace = False

3719 self.set_name("end of a word")

3720

3721 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3722 instrlen = len(instring)

3723 if instrlen > 0 and loc < instrlen:

3724 if (

3725 instring[loc] in self.wordChars

3726 or instring[loc - 1] not in self.wordChars

3727 ):

3728 raise ParseException(instring, loc, self.errmsg, self)

3729 return loc, []

3730

3731

3732class Tag(Token):

3733 """

3734 A meta-element for inserting a named result into the parsed

3735 tokens that may be checked later in a parse action or while

3736 processing the parsed results. Accepts an optional tag value,

3737 defaulting to `True`.

3738

3739 Example::

3740

3741 end_punc = "." | ("!" + Tag("enthusiastic")))

3742 greeting = "Hello," + Word(alphas) + end_punc

3743

3744 result = greeting.parse_string("Hello, World.")

3745 print(result.dump())

3746

3747 result = greeting.parse_string("Hello, World!")

3748 print(result.dump())

3749

3750 prints::

3751

3752 ['Hello,', 'World', '.']

3753

3754 ['Hello,', 'World', '!']

3755 - enthusiastic: True

3756 """

3757 def __init__(self, tag_name: str, value: Any = True):

3758 super().__init__()

3759 self.mayReturnEmpty = True

3760 self.mayIndexError = False

3761 self.leave_whitespace()

3762 self.tag_name = tag_name

3763 self.tag_value = value

3764 self.add_parse_action(self._add_tag)

3765

3766 def _add_tag(self, tokens: ParseResults):

3767 tokens[self.tag_name] = self.tag_value

3768

3769 def _generateDefaultName(self) -> str:

3770 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

3771

3772

3773class ParseExpression(ParserElement):

3774 """Abstract subclass of ParserElement, for combining and

3775 post-processing parsed tokens.

3776 """

3777

3778 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3779 super().__init__(savelist)

3780 self.exprs: List[ParserElement]

3781 if isinstance(exprs, _generatorType):

3782 exprs = list(exprs)

3783

3784 if isinstance(exprs, str_type):

3785 self.exprs = [self._literalStringClass(exprs)]

3786 elif isinstance(exprs, ParserElement):

3787 self.exprs = [exprs]

3788 elif isinstance(exprs, Iterable):

3789 exprs = list(exprs)

3790 # if sequence of strings provided, wrap with Literal

3791 if any(isinstance(expr, str_type) for expr in exprs):

3792 exprs = (

3793 self._literalStringClass(e) if isinstance(e, str_type) else e

3794 for e in exprs

3795 )

3796 self.exprs = list(exprs)

3797 else:

3798 try:

3799 self.exprs = list(exprs)

3800 except TypeError:

3801 self.exprs = [exprs]

3802 self.callPreparse = False

3803

3804 def recurse(self) -> List[ParserElement]:

3805 return self.exprs[:]

3806

3807 def append(self, other) -> ParserElement:

3808 self.exprs.append(other)

3809 self._defaultName = None

3810 return self

3811

3812 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3813 """

3814 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3815 all contained expressions.

3816 """

3817 super().leave_whitespace(recursive)

3818

3819 if recursive:

3820 self.exprs = [e.copy() for e in self.exprs]

3821 for e in self.exprs:

3822 e.leave_whitespace(recursive)

3823 return self

3824

3825 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3826 """

3827 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3828 all contained expressions.

3829 """

3830 super().ignore_whitespace(recursive)

3831 if recursive:

3832 self.exprs = [e.copy() for e in self.exprs]

3833 for e in self.exprs:

3834 e.ignore_whitespace(recursive)

3835 return self

3836

3837 def ignore(self, other) -> ParserElement:

3838 if isinstance(other, Suppress):

3839 if other not in self.ignoreExprs:

3840 super().ignore(other)

3841 for e in self.exprs:

3842 e.ignore(self.ignoreExprs[-1])

3843 else:

3844 super().ignore(other)

3845 for e in self.exprs:

3846 e.ignore(self.ignoreExprs[-1])

3847 return self

3848

3849 def _generateDefaultName(self) -> str:

3850 return f"{type(self).__name__}:({self.exprs})"

3851

3852 def streamline(self) -> ParserElement:

3853 if self.streamlined:

3854 return self

3855

3856 super().streamline()

3857

3858 for e in self.exprs:

3859 e.streamline()

3860

3861 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3862 # but only if there are no parse actions or resultsNames on the nested And's

3863 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3864 if len(self.exprs) == 2:

3865 other = self.exprs[0]

3866 if (

3867 isinstance(other, self.__class__)

3868 and not other.parseAction

3869 and other.resultsName is None

3870 and not other.debug

3871 ):

3872 self.exprs = other.exprs[:] + [self.exprs[1]]

3873 self._defaultName = None

3874 self.mayReturnEmpty |= other.mayReturnEmpty

3875 self.mayIndexError |= other.mayIndexError

3876

3877 other = self.exprs[-1]

3878 if (

3879 isinstance(other, self.__class__)

3880 and not other.parseAction

3881 and other.resultsName is None

3882 and not other.debug

3883 ):

3884 self.exprs = self.exprs[:-1] + other.exprs[:]

3885 self._defaultName = None

3886 self.mayReturnEmpty |= other.mayReturnEmpty

3887 self.mayIndexError |= other.mayIndexError

3888

3889 self.errmsg = f"Expected {self}"

3890

3891 return self

3892

3893 def validate(self, validateTrace=None) -> None:

3894 warnings.warn(

3895 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

3896 DeprecationWarning,

3897 stacklevel=2,

3898 )

3899 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

3900 for e in self.exprs:

3901 e.validate(tmp)

3902 self._checkRecursion([])

3903

3904 def copy(self) -> ParserElement:

3905 ret = super().copy()

3906 ret = typing.cast(ParseExpression, ret)

3907 ret.exprs = [e.copy() for e in self.exprs]

3908 return ret

3909

3910 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

3911 if not (

3912 __diag__.warn_ungrouped_named_tokens_in_collection

3913 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3914 not in self.suppress_warnings_

3915 ):

3916 return super()._setResultsName(name, list_all_matches)

3917

3918 for e in self.exprs:

3919 if (

3920 isinstance(e, ParserElement)

3921 and e.resultsName

3922 and (

3923 Diagnostics.warn_ungrouped_named_tokens_in_collection

3924 not in e.suppress_warnings_

3925 )

3926 ):

3927 warning = (

3928 "warn_ungrouped_named_tokens_in_collection:"

3929 f" setting results name {name!r} on {type(self).__name__} expression"

3930 f" collides with {e.resultsName!r} on contained expression"

3931 )

3932 warnings.warn(warning, stacklevel=3)

3933 break

3934

3935 return super()._setResultsName(name, list_all_matches)

3936

3937 # Compatibility synonyms

3938 # fmt: off

3939 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

3940 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

3941 # fmt: on

3942

3943

3944class And(ParseExpression):

3945 """

3946 Requires all given :class:`ParseExpression` s to be found in the given order.

3947 Expressions may be separated by whitespace.

3948 May be constructed using the ``'+'`` operator.

3949 May also be constructed using the ``'-'`` operator, which will

3950 suppress backtracking.

3951

3952 Example::

3953

3954 integer = Word(nums)

3955 name_expr = Word(alphas)[1, ...]

3956

3957 expr = And([integer("id"), name_expr("name"), integer("age")])

3958 # more easily written as:

3959 expr = integer("id") + name_expr("name") + integer("age")

3960 """

3961

3962 class _ErrorStop(Empty):

3963 def __init__(self, *args, **kwargs):

3964 super().__init__(*args, **kwargs)

3965 self.leave_whitespace()

3966

3967 def _generateDefaultName(self) -> str:

3968 return "-"

3969

3970 def __init__(

3971 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True

3972 ):

3973 exprs: List[ParserElement] = list(exprs_arg)

3974 if exprs and Ellipsis in exprs:

3975 tmp: List[ParserElement] = []

3976 for i, expr in enumerate(exprs):

3977 if expr is not Ellipsis:

3978 tmp.append(expr)

3979 continue

3980

3981 if i < len(exprs) - 1:

3982 skipto_arg: ParserElement = typing.cast(

3983 ParseExpression, (Empty() + exprs[i + 1])

3984 ).exprs[-1]

3985 tmp.append(SkipTo(skipto_arg)("_skipped*"))

3986 continue

3987

3988 raise Exception("cannot construct And with sequence ending in ...")

3989 exprs[:] = tmp

3990 super().__init__(exprs, savelist)

3991 if self.exprs:

3992 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3993 if not isinstance(self.exprs[0], White):

3994 self.set_whitespace_chars(

3995 self.exprs[0].whiteChars,

3996 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

3997 )

3998 self.skipWhitespace = self.exprs[0].skipWhitespace

3999 else:

4000 self.skipWhitespace = False

4001 else:

4002 self.mayReturnEmpty = True

4003 self.callPreparse = True

4004

4005 def streamline(self) -> ParserElement:

4006 # collapse any _PendingSkip's

4007 if self.exprs and any(

4008 isinstance(e, ParseExpression)

4009 and e.exprs

4010 and isinstance(e.exprs[-1], _PendingSkip)

4011 for e in self.exprs[:-1]

4012 ):

4013 deleted_expr_marker = NoMatch()

4014 for i, e in enumerate(self.exprs[:-1]):

4015 if e is deleted_expr_marker:

4016 continue

4017 if (

4018 isinstance(e, ParseExpression)

4019 and e.exprs

4020 and isinstance(e.exprs[-1], _PendingSkip)

4021 ):

4022 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4023 self.exprs[i + 1] = deleted_expr_marker

4024 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4025

4026 super().streamline()

4027

4028 # link any IndentedBlocks to the prior expression

4029 prev: ParserElement

4030 cur: ParserElement

4031 for prev, cur in zip(self.exprs, self.exprs[1:]):

4032 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4033 # (but watch out for recursive grammar)

4034 seen = set()

4035 while True:

4036 if id(cur) in seen:

4037 break

4038 seen.add(id(cur))

4039 if isinstance(cur, IndentedBlock):

4040 prev.add_parse_action(

4041 lambda s, l, t, cur_=cur: setattr(

4042 cur_, "parent_anchor", col(l, s)

4043 )

4044 )

4045 break

4046 subs = cur.recurse()

4047 next_first = next(iter(subs), None)

4048 if next_first is None:

4049 break

4050 cur = typing.cast(ParserElement, next_first)

4051

4052 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4053 return self

4054

4055 def parseImpl(self, instring, loc, do_actions=True):

4056 # pass False as callPreParse arg to _parse for first element, since we already

4057 # pre-parsed the string as part of our And pre-parsing

4058 loc, resultlist = self.exprs[0]._parse(

4059 instring, loc, do_actions, callPreParse=False

4060 )

4061 errorStop = False

4062 for e in self.exprs[1:]:

4063 # if isinstance(e, And._ErrorStop):

4064 if type(e) is And._ErrorStop:

4065 errorStop = True

4066 continue

4067 if errorStop:

4068 try:

4069 loc, exprtokens = e._parse(instring, loc, do_actions)

4070 except ParseSyntaxException:

4071 raise

4072 except ParseBaseException as pe:

4073 pe.__traceback__ = None

4074 raise ParseSyntaxException._from_exception(pe)

4075 except IndexError:

4076 raise ParseSyntaxException(

4077 instring, len(instring), self.errmsg, self

4078 )

4079 else:

4080 loc, exprtokens = e._parse(instring, loc, do_actions)

4081 resultlist += exprtokens

4082 return loc, resultlist

4083

4084 def __iadd__(self, other):

4085 if isinstance(other, str_type):

4086 other = self._literalStringClass(other)

4087 if not isinstance(other, ParserElement):

4088 return NotImplemented

4089 return self.append(other) # And([self, other])

4090

4091 def _checkRecursion(self, parseElementList):

4092 subRecCheckList = parseElementList[:] + [self]

4093 for e in self.exprs:

4094 e._checkRecursion(subRecCheckList)

4095 if not e.mayReturnEmpty:

4096 break

4097

4098 def _generateDefaultName(self) -> str:

4099 inner = " ".join(str(e) for e in self.exprs)

4100 # strip off redundant inner {}'s

4101 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4102 inner = inner[1:-1]

4103 return f"{{{inner}}}"

4104

4105

4106class Or(ParseExpression):

4107 """Requires that at least one :class:`ParseExpression` is found. If

4108 two expressions match, the expression that matches the longest

4109 string will be used. May be constructed using the ``'^'``

4110 operator.

4111

4112 Example::

4113

4114 # construct Or using '^' operator

4115

4116 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4117 print(number.search_string("123 3.1416 789"))

4118

4119 prints::

4120

4121 [['123'], ['3.1416'], ['789']]

4122 """

4123

4124 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4125 super().__init__(exprs, savelist)

4126 if self.exprs:

4127 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4128 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4129 else:

4130 self.mayReturnEmpty = True

4131

4132 def streamline(self) -> ParserElement:

4133 super().streamline()

4134 if self.exprs:

4135 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4136 self.saveAsList = any(e.saveAsList for e in self.exprs)

4137 self.skipWhitespace = all(

4138 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4139 )

4140 else:

4141 self.saveAsList = False

4142 return self

4143

4144 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4145 maxExcLoc = -1

4146 maxException = None

4147 matches: List[Tuple[int, ParserElement]] = []

4148 fatals: List[ParseFatalException] = []

4149 if all(e.callPreparse for e in self.exprs):

4150 loc = self.preParse(instring, loc)

4151 for e in self.exprs:

4152 try:

4153 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4154 except ParseFatalException as pfe:

4155 pfe.__traceback__ = None

4156 pfe.parser_element = e

4157 fatals.append(pfe)

4158 maxException = None

4159 maxExcLoc = -1

4160 except ParseException as err:

4161 if not fatals:

4162 err.__traceback__ = None

4163 if err.loc > maxExcLoc:

4164 maxException = err

4165 maxExcLoc = err.loc

4166 except IndexError:

4167 if len(instring) > maxExcLoc:

4168 maxException = ParseException(

4169 instring, len(instring), e.errmsg, self

4170 )

4171 maxExcLoc = len(instring)

4172 else:

4173 # save match among all matches, to retry longest to shortest

4174 matches.append((loc2, e))

4175

4176 if matches:

4177 # re-evaluate all matches in descending order of length of match, in case attached actions

4178 # might change whether or how much they match of the input.

4179 matches.sort(key=itemgetter(0), reverse=True)

4180

4181 if not do_actions:

4182 # no further conditions or parse actions to change the selection of

4183 # alternative, so the first match will be the best match

4184 best_expr = matches[0][1]

4185 return best_expr._parse(instring, loc, do_actions)

4186

4187 longest = -1, None

4188 for loc1, expr1 in matches:

4189 if loc1 <= longest[0]:

4190 # already have a longer match than this one will deliver, we are done

4191 return longest

4192

4193 try:

4194 loc2, toks = expr1._parse(instring, loc, do_actions)

4195 except ParseException as err:

4196 err.__traceback__ = None

4197 if err.loc > maxExcLoc:

4198 maxException = err

4199 maxExcLoc = err.loc

4200 else:

4201 if loc2 >= loc1:

4202 return loc2, toks

4203 # didn't match as much as before

4204 elif loc2 > longest[0]:

4205 longest = loc2, toks

4206

4207 if longest != (-1, None):

4208 return longest

4209

4210 if fatals:

4211 if len(fatals) > 1:

4212 fatals.sort(key=lambda e: -e.loc)

4213 if fatals[0].loc == fatals[1].loc:

4214 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4215 max_fatal = fatals[0]

4216 raise max_fatal

4217

4218 if maxException is not None:

4219 # infer from this check that all alternatives failed at the current position

4220 # so emit this collective error message instead of any single error message

4221 if maxExcLoc == loc:

4222 maxException.msg = self.errmsg

4223 raise maxException

4224

4225 raise ParseException(instring, loc, "no defined alternatives to match", self)

4226

4227 def __ixor__(self, other):

4228 if isinstance(other, str_type):

4229 other = self._literalStringClass(other)

4230 if not isinstance(other, ParserElement):

4231 return NotImplemented

4232 return self.append(other) # Or([self, other])

4233

4234 def _generateDefaultName(self) -> str:

4235 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4236

4237 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4238 if (

4239 __diag__.warn_multiple_tokens_in_named_alternation

4240 and Diagnostics.warn_multiple_tokens_in_named_alternation

4241 not in self.suppress_warnings_

4242 ):

4243 if any(

4244 isinstance(e, And)

4245 and Diagnostics.warn_multiple_tokens_in_named_alternation

4246 not in e.suppress_warnings_

4247 for e in self.exprs

4248 ):

4249 warning = (

4250 "warn_multiple_tokens_in_named_alternation:"

4251 f" setting results name {name!r} on {type(self).__name__} expression"

4252 " will return a list of all parsed tokens in an And alternative,"

4253 " in prior versions only the first token was returned; enclose"

4254 " contained argument in Group"

4255 )

4256 warnings.warn(warning, stacklevel=3)

4257

4258 return super()._setResultsName(name, list_all_matches)

4259

4260

4261class MatchFirst(ParseExpression):

4262 """Requires that at least one :class:`ParseExpression` is found. If

4263 more than one expression matches, the first one listed is the one that will

4264 match. May be constructed using the ``'|'`` operator.

4265

4266 Example::

4267

4268 # construct MatchFirst using '|' operator

4269

4270 # watch the order of expressions to match

4271 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4272 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4273

4274 # put more selective expression first

4275 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4276 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4277 """

4278

4279 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4280 super().__init__(exprs, savelist)

4281 if self.exprs:

4282 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4283 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4284 else:

4285 self.mayReturnEmpty = True

4286

4287 def streamline(self) -> ParserElement:

4288 if self.streamlined:

4289 return self

4290

4291 super().streamline()

4292 if self.exprs:

4293 self.saveAsList = any(e.saveAsList for e in self.exprs)

4294 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4295 self.skipWhitespace = all(

4296 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4297 )

4298 else:

4299 self.saveAsList = False

4300 self.mayReturnEmpty = True

4301 return self

4302

4303 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4304 maxExcLoc = -1

4305 maxException = None

4306

4307 for e in self.exprs:

4308 try:

4309 return e._parse(instring, loc, do_actions)

4310 except ParseFatalException as pfe:

4311 pfe.__traceback__ = None

4312 pfe.parser_element = e

4313 raise

4314 except ParseException as err:

4315 if err.loc > maxExcLoc:

4316 maxException = err

4317 maxExcLoc = err.loc

4318 except IndexError:

4319 if len(instring) > maxExcLoc:

4320 maxException = ParseException(

4321 instring, len(instring), e.errmsg, self

4322 )

4323 maxExcLoc = len(instring)

4324

4325 if maxException is not None:

4326 # infer from this check that all alternatives failed at the current position

4327 # so emit this collective error message instead of any individual error message

4328 if maxExcLoc == loc:

4329 maxException.msg = self.errmsg

4330 raise maxException

4331

4332 raise ParseException(instring, loc, "no defined alternatives to match", self)

4333

4334 def __ior__(self, other):

4335 if isinstance(other, str_type):

4336 other = self._literalStringClass(other)

4337 if not isinstance(other, ParserElement):

4338 return NotImplemented

4339 return self.append(other) # MatchFirst([self, other])

4340

4341 def _generateDefaultName(self) -> str:

4342 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4343

4344 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4345 if (

4346 __diag__.warn_multiple_tokens_in_named_alternation

4347 and Diagnostics.warn_multiple_tokens_in_named_alternation

4348 not in self.suppress_warnings_

4349 ):

4350 if any(

4351 isinstance(e, And)

4352 and Diagnostics.warn_multiple_tokens_in_named_alternation

4353 not in e.suppress_warnings_

4354 for e in self.exprs

4355 ):

4356 warning = (

4357 "warn_multiple_tokens_in_named_alternation:"

4358 f" setting results name {name!r} on {type(self).__name__} expression"

4359 " will return a list of all parsed tokens in an And alternative,"

4360 " in prior versions only the first token was returned; enclose"

4361 " contained argument in Group"

4362 )

4363 warnings.warn(warning, stacklevel=3)

4364

4365 return super()._setResultsName(name, list_all_matches)

4366

4367

4368class Each(ParseExpression):

4369 """Requires all given :class:`ParseExpression` s to be found, but in

4370 any order. Expressions may be separated by whitespace.

4371

4372 May be constructed using the ``'&'`` operator.

4373

4374 Example::

4375

4376 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4377 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4378 integer = Word(nums)

4379 shape_attr = "shape:" + shape_type("shape")

4380 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4381 color_attr = "color:" + color("color")

4382 size_attr = "size:" + integer("size")

4383

4384 # use Each (using operator '&') to accept attributes in any order

4385 # (shape and posn are required, color and size are optional)

4386 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4387

4388 shape_spec.run_tests('''

4389 shape: SQUARE color: BLACK posn: 100, 120

4390 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4391 color:GREEN size:20 shape:TRIANGLE posn:20,40

4392 '''

4393 )

4394

4395 prints::

4396

4397 shape: SQUARE color: BLACK posn: 100, 120

4398 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4399 - color: BLACK

4400 - posn: ['100', ',', '120']

4401 - x: 100

4402 - y: 120

4403 - shape: SQUARE

4404

4405

4406 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4407 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4408 - color: BLUE

4409 - posn: ['50', ',', '80']

4410 - x: 50

4411 - y: 80

4412 - shape: CIRCLE

4413 - size: 50

4414

4415

4416 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4417 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4418 - color: GREEN

4419 - posn: ['20', ',', '40']

4420 - x: 20

4421 - y: 40

4422 - shape: TRIANGLE

4423 - size: 20

4424 """

4425

4426 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):

4427 super().__init__(exprs, savelist)

4428 if self.exprs:

4429 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4430 else:

4431 self.mayReturnEmpty = True

4432 self.skipWhitespace = True

4433 self.initExprGroups = True

4434 self.saveAsList = True

4435

4436 def __iand__(self, other):

4437 if isinstance(other, str_type):

4438 other = self._literalStringClass(other)

4439 if not isinstance(other, ParserElement):

4440 return NotImplemented

4441 return self.append(other) # Each([self, other])

4442

4443 def streamline(self) -> ParserElement:

4444 super().streamline()

4445 if self.exprs:

4446 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4447 else:

4448 self.mayReturnEmpty = True

4449 return self

4450

4451 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4452 if self.initExprGroups:

4453 self.opt1map = dict(

4454 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4455 )

4456 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4457 opt2 = [

4458 e

4459 for e in self.exprs

4460 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4461 ]

4462 self.optionals = opt1 + opt2

4463 self.multioptionals = [

4464 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4465 for e in self.exprs

4466 if isinstance(e, _MultipleMatch)

4467 ]

4468 self.multirequired = [

4469 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4470 for e in self.exprs

4471 if isinstance(e, OneOrMore)

4472 ]

4473 self.required = [

4474 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4475 ]

4476 self.required += self.multirequired

4477 self.initExprGroups = False

4478

4479 tmpLoc = loc

4480 tmpReqd = self.required[:]

4481 tmpOpt = self.optionals[:]

4482 multis = self.multioptionals[:]

4483 matchOrder: List[ParserElement] = []

4484

4485 keepMatching = True

4486 failed: List[ParserElement] = []

4487 fatals: List[ParseFatalException] = []

4488 while keepMatching:

4489 tmpExprs = tmpReqd + tmpOpt + multis

4490 failed.clear()

4491 fatals.clear()

4492 for e in tmpExprs:

4493 try:

4494 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4495 except ParseFatalException as pfe:

4496 pfe.__traceback__ = None

4497 pfe.parser_element = e

4498 fatals.append(pfe)

4499 failed.append(e)

4500 except ParseException:

4501 failed.append(e)

4502 else:

4503 matchOrder.append(self.opt1map.get(id(e), e))

4504 if e in tmpReqd:

4505 tmpReqd.remove(e)

4506 elif e in tmpOpt:

4507 tmpOpt.remove(e)

4508 if len(failed) == len(tmpExprs):

4509 keepMatching = False

4510

4511 # look for any ParseFatalExceptions

4512 if fatals:

4513 if len(fatals) > 1:

4514 fatals.sort(key=lambda e: -e.loc)

4515 if fatals[0].loc == fatals[1].loc:

4516 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4517 max_fatal = fatals[0]

4518 raise max_fatal

4519

4520 if tmpReqd:

4521 missing = ", ".join([str(e) for e in tmpReqd])

4522 raise ParseException(

4523 instring,

4524 loc,

4525 f"Missing one or more required elements ({missing})",

4526 )

4527

4528 # add any unmatched Opts, in case they have default values defined

4529 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4530

4531 total_results = ParseResults([])

4532 for e in matchOrder:

4533 loc, results = e._parse(instring, loc, do_actions)

4534 total_results += results

4535

4536 return loc, total_results

4537

4538 def _generateDefaultName(self) -> str:

4539 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4540

4541

4542class ParseElementEnhance(ParserElement):

4543 """Abstract subclass of :class:`ParserElement`, for combining and

4544 post-processing parsed tokens.

4545 """

4546

4547 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

4548 super().__init__(savelist)

4549 if isinstance(expr, str_type):

4550 expr_str = typing.cast(str, expr)

4551 if issubclass(self._literalStringClass, Token):

4552 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4553 elif issubclass(type(self), self._literalStringClass):

4554 expr = Literal(expr_str)

4555 else:

4556 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4557 expr = typing.cast(ParserElement, expr)

4558 self.expr = expr

4559 if expr is not None:

4560 self.mayIndexError = expr.mayIndexError

4561 self.mayReturnEmpty = expr.mayReturnEmpty

4562 self.set_whitespace_chars(

4563 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4564 )

4565 self.skipWhitespace = expr.skipWhitespace

4566 self.saveAsList = expr.saveAsList

4567 self.callPreparse = expr.callPreparse

4568 self.ignoreExprs.extend(expr.ignoreExprs)

4569

4570 def recurse(self) -> List[ParserElement]:

4571 return [self.expr] if self.expr is not None else []

4572

4573 def parseImpl(self, instring, loc, do_actions=True):

4574 if self.expr is None:

4575 raise ParseException(instring, loc, "No expression defined", self)

4576

4577 try:

4578 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

4579 except ParseSyntaxException:

4580 raise

4581 except ParseBaseException as pbe:

4582 if not isinstance(self, Forward) or self.customName is not None:

4583 if self.errmsg:

4584 pbe.msg = self.errmsg

4585 raise

4586

4587 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4588 super().leave_whitespace(recursive)

4589

4590 if recursive:

4591 if self.expr is not None:

4592 self.expr = self.expr.copy()

4593 self.expr.leave_whitespace(recursive)

4594 return self

4595

4596 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4597 super().ignore_whitespace(recursive)

4598

4599 if recursive:

4600 if self.expr is not None:

4601 self.expr = self.expr.copy()

4602 self.expr.ignore_whitespace(recursive)

4603 return self

4604

4605 def ignore(self, other) -> ParserElement:

4606 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4607 super().ignore(other)

4608 if self.expr is not None:

4609 self.expr.ignore(self.ignoreExprs[-1])

4610

4611 return self

4612

4613 def streamline(self) -> ParserElement:

4614 super().streamline()

4615 if self.expr is not None:

4616 self.expr.streamline()

4617 return self

4618

4619 def _checkRecursion(self, parseElementList):

4620 if self in parseElementList:

4621 raise RecursiveGrammarException(parseElementList + [self])

4622 subRecCheckList = parseElementList[:] + [self]

4623 if self.expr is not None:

4624 self.expr._checkRecursion(subRecCheckList)

4625

4626 def validate(self, validateTrace=None) -> None:

4627 warnings.warn(

4628 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4629 DeprecationWarning,

4630 stacklevel=2,

4631 )

4632 if validateTrace is None:

4633 validateTrace = []

4634 tmp = validateTrace[:] + [self]

4635 if self.expr is not None:

4636 self.expr.validate(tmp)

4637 self._checkRecursion([])

4638

4639 def _generateDefaultName(self) -> str:

4640 return f"{type(self).__name__}:({self.expr})"

4641

4642 # Compatibility synonyms

4643 # fmt: off

4644 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4645 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4646 # fmt: on

4647

4648

4649class IndentedBlock(ParseElementEnhance):

4650 """

4651 Expression to match one or more expressions at a given indentation level.

4652 Useful for parsing text where structure is implied by indentation (like Python source code).

4653 """

4654

4655 class _Indent(Empty):

4656 def __init__(self, ref_col: int):

4657 super().__init__()

4658 self.errmsg = f"expected indent at column {ref_col}"

4659 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4660

4661 class _IndentGreater(Empty):

4662 def __init__(self, ref_col: int):

4663 super().__init__()

4664 self.errmsg = f"expected indent at column greater than {ref_col}"

4665 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4666

4667 def __init__(

4668 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4669 ):

4670 super().__init__(expr, savelist=True)

4671 # if recursive:

4672 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4673 self._recursive = recursive

4674 self._grouped = grouped

4675 self.parent_anchor = 1

4676

4677 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4678 # advance parse position to non-whitespace by using an Empty()

4679 # this should be the column to be used for all subsequent indented lines

4680 anchor_loc = Empty().preParse(instring, loc)

4681

4682 # see if self.expr matches at the current location - if not it will raise an exception

4683 # and no further work is necessary

4684 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

4685

4686 indent_col = col(anchor_loc, instring)

4687 peer_detect_expr = self._Indent(indent_col)

4688

4689 inner_expr = Empty() + peer_detect_expr + self.expr

4690 if self._recursive:

4691 sub_indent = self._IndentGreater(indent_col)

4692 nested_block = IndentedBlock(

4693 self.expr, recursive=self._recursive, grouped=self._grouped

4694 )

4695 nested_block.set_debug(self.debug)

4696 nested_block.parent_anchor = indent_col

4697 inner_expr += Opt(sub_indent + nested_block)

4698

4699 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4700 block = OneOrMore(inner_expr)

4701

4702 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4703

4704 if self._grouped:

4705 wrapper = Group

4706 else:

4707 wrapper = lambda expr: expr

4708 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4709 instring, anchor_loc, do_actions

4710 )

4711

4712

4713class AtStringStart(ParseElementEnhance):

4714 """Matches if expression matches at the beginning of the parse

4715 string::

4716

4717 AtStringStart(Word(nums)).parse_string("123")

4718 # prints ["123"]

4719

4720 AtStringStart(Word(nums)).parse_string(" 123")

4721 # raises ParseException

4722 """

4723

4724 def __init__(self, expr: Union[ParserElement, str]):

4725 super().__init__(expr)

4726 self.callPreparse = False

4727

4728 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4729 if loc != 0:

4730 raise ParseException(instring, loc, "not found at string start")

4731 return super().parseImpl(instring, loc, do_actions)

4732

4733

4734class AtLineStart(ParseElementEnhance):

4735 r"""Matches if an expression matches at the beginning of a line within

4736 the parse string

4737

4738 Example::

4739

4740 test = '''\

4741 AAA this line

4742 AAA and this line

4743 AAA but not this one

4744 B AAA and definitely not this one

4745 '''

4746

4747 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4748 print(t)

4749

4750 prints::

4751

4752 ['AAA', ' this line']

4753 ['AAA', ' and this line']

4754

4755 """

4756

4757 def __init__(self, expr: Union[ParserElement, str]):

4758 super().__init__(expr)

4759 self.callPreparse = False

4760

4761 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4762 if col(loc, instring) != 1:

4763 raise ParseException(instring, loc, "not found at line start")

4764 return super().parseImpl(instring, loc, do_actions)

4765

4766

4767class FollowedBy(ParseElementEnhance):

4768 """Lookahead matching of the given parse expression.

4769 ``FollowedBy`` does *not* advance the parsing position within

4770 the input string, it only verifies that the specified parse

4771 expression matches at the current position. ``FollowedBy``

4772 always returns a null token list. If any results names are defined

4773 in the lookahead expression, those *will* be returned for access by

4774 name.

4775

4776 Example::

4777

4778 # use FollowedBy to match a label only if it is followed by a ':'

4779 data_word = Word(alphas)

4780 label = data_word + FollowedBy(':')

4781 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4782

4783 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4784

4785 prints::

4786

4787 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4788 """

4789

4790 def __init__(self, expr: Union[ParserElement, str]):

4791 super().__init__(expr)

4792 self.mayReturnEmpty = True

4793

4794 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4795 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4796 # we keep any named results that were defined in the FollowedBy expression

4797 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

4798 del ret[:]

4799

4800 return loc, ret

4801

4802

4803class PrecededBy(ParseElementEnhance):

4804 """Lookbehind matching of the given parse expression.

4805 ``PrecededBy`` does not advance the parsing position within the

4806 input string, it only verifies that the specified parse expression

4807 matches prior to the current position. ``PrecededBy`` always

4808 returns a null token list, but if a results name is defined on the

4809 given expression, it is returned.

4810

4811 Parameters:

4812

4813 - ``expr`` - expression that must match prior to the current parse

4814 location

4815 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4816 to lookbehind prior to the current parse location

4817

4818 If the lookbehind expression is a string, :class:`Literal`,

4819 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4820 with a specified exact or maximum length, then the retreat

4821 parameter is not required. Otherwise, retreat must be specified to

4822 give a maximum number of characters to look back from

4823 the current parse position for a lookbehind match.

4824

4825 Example::

4826

4827 # VB-style variable names with type prefixes

4828 int_var = PrecededBy("#") + pyparsing_common.identifier

4829 str_var = PrecededBy("$") + pyparsing_common.identifier

4830

4831 """

4832

4833 def __init__(

4834 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None

4835 ):

4836 super().__init__(expr)

4837 self.expr = self.expr().leave_whitespace()

4838 self.mayReturnEmpty = True

4839 self.mayIndexError = False

4840 self.exact = False

4841 if isinstance(expr, str_type):

4842 expr = typing.cast(str, expr)

4843 retreat = len(expr)

4844 self.exact = True

4845 elif isinstance(expr, (Literal, Keyword)):

4846 retreat = expr.matchLen

4847 self.exact = True

4848 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4849 retreat = expr.maxLen

4850 self.exact = True

4851 elif isinstance(expr, PositionToken):

4852 retreat = 0

4853 self.exact = True

4854 self.retreat = retreat

4855 self.errmsg = f"not preceded by {expr}"

4856 self.skipWhitespace = False

4857 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4858

4859 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

4860 if self.exact:

4861 if loc < self.retreat:

4862 raise ParseException(instring, loc, self.errmsg)

4863 start = loc - self.retreat

4864 _, ret = self.expr._parse(instring, start)

4865 return loc, ret

4866

4867 # retreat specified a maximum lookbehind window, iterate

4868 test_expr = self.expr + StringEnd()

4869 instring_slice = instring[max(0, loc - self.retreat) : loc]

4870 last_expr = ParseException(instring, loc, self.errmsg)

4871

4872 for offset in range(1, min(loc, self.retreat + 1) + 1):

4873 try:

4874 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

4875 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

4876 except ParseBaseException as pbe:

4877 last_expr = pbe

4878 else:

4879 break

4880 else:

4881 raise last_expr

4882

4883 return loc, ret

4884

4885

4886class Located(ParseElementEnhance):

4887 """

4888 Decorates a returned token with its starting and ending

4889 locations in the input string.

4890

4891 This helper adds the following results names:

4892

4893 - ``locn_start`` - location where matched expression begins

4894 - ``locn_end`` - location where matched expression ends

4895 - ``value`` - the actual parsed results

4896

4897 Be careful if the input text contains ``<TAB>`` characters, you

4898 may want to call :class:`ParserElement.parse_with_tabs`

4899

4900 Example::

4901

4902 wd = Word(alphas)

4903 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

4904 print(match)

4905

4906 prints::

4907

4908 [0, ['ljsdf'], 5]

4909 [8, ['lksdjjf'], 15]

4910 [18, ['lkkjj'], 23]

4911

4912 """

4913

4914 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4915 start = loc

4916 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

4917 ret_tokens = ParseResults([start, tokens, loc])

4918 ret_tokens["locn_start"] = start

4919 ret_tokens["value"] = tokens

4920 ret_tokens["locn_end"] = loc

4921 if self.resultsName:

4922 # must return as a list, so that the name will be attached to the complete group

4923 return loc, [ret_tokens]

4924 else:

4925 return loc, ret_tokens

4926

4927

4928class NotAny(ParseElementEnhance):

4929 """

4930 Lookahead to disallow matching with the given parse expression.

4931 ``NotAny`` does *not* advance the parsing position within the

4932 input string, it only verifies that the specified parse expression

4933 does *not* match at the current position. Also, ``NotAny`` does

4934 *not* skip over leading whitespace. ``NotAny`` always returns

4935 a null token list. May be constructed using the ``'~'`` operator.

4936

4937 Example::

4938

4939 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

4940

4941 # take care not to mistake keywords for identifiers

4942 ident = ~(AND | OR | NOT) + Word(alphas)

4943 boolean_term = Opt(NOT) + ident

4944

4945 # very crude boolean expression - to support parenthesis groups and

4946 # operation hierarchy, use infix_notation

4947 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

4948

4949 # integers that are followed by "." are actually floats

4950 integer = Word(nums) + ~Char(".")

4951 """

4952

4953 def __init__(self, expr: Union[ParserElement, str]):

4954 super().__init__(expr)

4955 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

4956 # self.leave_whitespace()

4957 self.skipWhitespace = False

4958

4959 self.mayReturnEmpty = True

4960 self.errmsg = f"Found unwanted token, {self.expr}"

4961

4962 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4963 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

4964 raise ParseException(instring, loc, self.errmsg, self)

4965 return loc, []

4966

4967 def _generateDefaultName(self) -> str:

4968 return f"~{{{self.expr}}}"

4969

4970

4971class _MultipleMatch(ParseElementEnhance):

4972 def __init__(

4973 self,

4974 expr: Union[str, ParserElement],

4975 stop_on: typing.Optional[Union[ParserElement, str]] = None,

4976 *,

4977 stopOn: typing.Optional[Union[ParserElement, str]] = None,

4978 ):

4979 super().__init__(expr)

4980 stopOn = stopOn or stop_on

4981 self.saveAsList = True

4982 ender = stopOn

4983 if isinstance(ender, str_type):

4984 ender = self._literalStringClass(ender)

4985 self.stopOn(ender)

4986

4987 def stopOn(self, ender) -> ParserElement:

4988 if isinstance(ender, str_type):

4989 ender = self._literalStringClass(ender)

4990 self.not_ender = ~ender if ender is not None else None

4991 return self

4992

4993 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4994 self_expr_parse = self.expr._parse

4995 self_skip_ignorables = self._skipIgnorables

4996 check_ender = self.not_ender is not None

4997 if check_ender:

4998 try_not_ender = self.not_ender.try_parse

4999

5000 # must be at least one (but first see if we are the stopOn sentinel;

5001 # if so, fail)

5002 if check_ender:

5003 try_not_ender(instring, loc)

5004 loc, tokens = self_expr_parse(instring, loc, do_actions)

5005 try:

5006 hasIgnoreExprs = not not self.ignoreExprs

5007 while 1:

5008 if check_ender:

5009 try_not_ender(instring, loc)

5010 if hasIgnoreExprs:

5011 preloc = self_skip_ignorables(instring, loc)

5012 else:

5013 preloc = loc

5014 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5015 tokens += tmptokens

5016 except (ParseException, IndexError):

5017 pass

5018

5019 return loc, tokens

5020

5021 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5022 if (

5023 __diag__.warn_ungrouped_named_tokens_in_collection

5024 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5025 not in self.suppress_warnings_

5026 ):

5027 for e in [self.expr] + self.expr.recurse():

5028 if (

5029 isinstance(e, ParserElement)

5030 and e.resultsName

5031 and (

5032 Diagnostics.warn_ungrouped_named_tokens_in_collection

5033 not in e.suppress_warnings_

5034 )

5035 ):

5036 warning = (

5037 "warn_ungrouped_named_tokens_in_collection:"

5038 f" setting results name {name!r} on {type(self).__name__} expression"

5039 f" collides with {e.resultsName!r} on contained expression"

5040 )

5041 warnings.warn(warning, stacklevel=3)

5042 break

5043

5044 return super()._setResultsName(name, list_all_matches)

5045

5046

5047class OneOrMore(_MultipleMatch):

5048 """

5049 Repetition of one or more of the given expression.

5050

5051 Parameters:

5052

5053 - ``expr`` - expression that must match one or more times

5054 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5055 (only required if the sentinel would ordinarily match the repetition

5056 expression)

5057

5058 Example::

5059

5060 data_word = Word(alphas)

5061 label = data_word + FollowedBy(':')

5062 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5063

5064 text = "shape: SQUARE posn: upper left color: BLACK"

5065 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5066

5067 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5068 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5069 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5070

5071 # could also be written as

5072 (attr_expr * (1,)).parse_string(text).pprint()

5073 """

5074

5075 def _generateDefaultName(self) -> str:

5076 return f"{{{self.expr}}}..."

5077

5078

5079class ZeroOrMore(_MultipleMatch):

5080 """

5081 Optional repetition of zero or more of the given expression.

5082

5083 Parameters:

5084

5085 - ``expr`` - expression that must match zero or more times

5086 - ``stop_on`` - expression for a terminating sentinel

5087 (only required if the sentinel would ordinarily match the repetition

5088 expression) - (default= ``None``)

5089

5090 Example: similar to :class:`OneOrMore`

5091 """

5092

5093 def __init__(

5094 self,

5095 expr: Union[str, ParserElement],

5096 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5097 *,

5098 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5099 ):

5100 super().__init__(expr, stopOn=stopOn or stop_on)

5101 self.mayReturnEmpty = True

5102

5103 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5104 try:

5105 return super().parseImpl(instring, loc, do_actions)

5106 except (ParseException, IndexError):

5107 return loc, ParseResults([], name=self.resultsName)

5108

5109 def _generateDefaultName(self) -> str:

5110 return f"[{self.expr}]..."

5111

5112

5113class DelimitedList(ParseElementEnhance):

5114 def __init__(

5115 self,

5116 expr: Union[str, ParserElement],

5117 delim: Union[str, ParserElement] = ",",

5118 combine: bool = False,

5119 min: typing.Optional[int] = None,

5120 max: typing.Optional[int] = None,

5121 *,

5122 allow_trailing_delim: bool = False,

5123 ):

5124 """Helper to define a delimited list of expressions - the delimiter

5125 defaults to ','. By default, the list elements and delimiters can

5126 have intervening whitespace, and comments, but this can be

5127 overridden by passing ``combine=True`` in the constructor. If

5128 ``combine`` is set to ``True``, the matching tokens are

5129 returned as a single token string, with the delimiters included;

5130 otherwise, the matching tokens are returned as a list of tokens,

5131 with the delimiters suppressed.

5132

5133 If ``allow_trailing_delim`` is set to True, then the list may end with

5134 a delimiter.

5135

5136 Example::

5137

5138 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5139 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5140 """

5141 if isinstance(expr, str_type):

5142 expr = ParserElement._literalStringClass(expr)

5143 expr = typing.cast(ParserElement, expr)

5144

5145 if min is not None and min < 1:

5146 raise ValueError("min must be greater than 0")

5147

5148 if max is not None and min is not None and max < min:

5149 raise ValueError("max must be greater than, or equal to min")

5150

5151 self.content = expr

5152 self.raw_delim = str(delim)

5153 self.delim = delim

5154 self.combine = combine

5155 if not combine:

5156 self.delim = Suppress(delim)

5157 self.min = min or 1

5158 self.max = max

5159 self.allow_trailing_delim = allow_trailing_delim

5160

5161 delim_list_expr = self.content + (self.delim + self.content) * (

5162 self.min - 1,

5163 None if self.max is None else self.max - 1,

5164 )

5165 if self.allow_trailing_delim:

5166 delim_list_expr += Opt(self.delim)

5167

5168 if self.combine:

5169 delim_list_expr = Combine(delim_list_expr)

5170

5171 super().__init__(delim_list_expr, savelist=True)

5172

5173 def _generateDefaultName(self) -> str:

5174 content_expr = self.content.streamline()

5175 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5176

5177

5178class _NullToken:

5179 def __bool__(self):

5180 return False

5181

5182 def __str__(self):

5183 return ""

5184

5185

5186class Opt(ParseElementEnhance):

5187 """

5188 Optional matching of the given expression.

5189

5190 Parameters:

5191

5192 - ``expr`` - expression that must match zero or more times

5193 - ``default`` (optional) - value to be returned if the optional expression is not found.

5194

5195 Example::

5196

5197 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5198 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5199 zip.run_tests('''

5200 # traditional ZIP code

5201 12345

5202

5203 # ZIP+4 form

5204 12101-0001

5205

5206 # invalid ZIP

5207 98765-

5208 ''')

5209

5210 prints::

5211

5212 # traditional ZIP code

5213 12345

5214 ['12345']

5215

5216 # ZIP+4 form

5217 12101-0001

5218 ['12101-0001']

5219

5220 # invalid ZIP

5221 98765-

5222 ^

5223 FAIL: Expected end of text (at char 5), (line:1, col:6)

5224 """

5225

5226 __optionalNotMatched = _NullToken()

5227

5228 def __init__(

5229 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5230 ):

5231 super().__init__(expr, savelist=False)

5232 self.saveAsList = self.expr.saveAsList

5233 self.defaultValue = default

5234 self.mayReturnEmpty = True

5235

5236 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5237 self_expr = self.expr

5238 try:

5239 loc, tokens = self_expr._parse(instring, loc, do_actions, callPreParse=False)

5240 except (ParseException, IndexError):

5241 default_value = self.defaultValue

5242 if default_value is not self.__optionalNotMatched:

5243 if self_expr.resultsName:

5244 tokens = ParseResults([default_value])

5245 tokens[self_expr.resultsName] = default_value

5246 else:

5247 tokens = [default_value]

5248 else:

5249 tokens = []

5250 return loc, tokens

5251

5252 def _generateDefaultName(self) -> str:

5253 inner = str(self.expr)

5254 # strip off redundant inner {}'s

5255 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5256 inner = inner[1:-1]

5257 return f"[{inner}]"

5260Optional = Opt

5263class SkipTo(ParseElementEnhance):

5264 """

5265 Token for skipping over all undefined text until the matched

5266 expression is found.

5267

5268 Parameters:

5269

5270 - ``expr`` - target expression marking the end of the data to be skipped

5271 - ``include`` - if ``True``, the target expression is also parsed

5272 (the skipped text and target expression are returned as a 2-element

5273 list) (default= ``False``).

5274 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5275 comments) that might contain false matches to the target expression

5276 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5277 included in the skipped test; if found before the target expression is found,

5278 the :class:`SkipTo` is not a match

5279

5280 Example::

5281

5282 report = '''

5283 Outstanding Issues Report - 1 Jan 2000

5284

5285 # | Severity | Description | Days Open

5286 -----+----------+-------------------------------------------+-----------

5287 101 | Critical | Intermittent system crash | 6

5288 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5289 79 | Minor | System slow when running too many reports | 47

5290 '''

5291 integer = Word(nums)

5292 SEP = Suppress('|')

5293 # use SkipTo to simply match everything up until the next SEP

5294 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5295 # - parse action will call token.strip() for each matched token, i.e., the description body

5296 string_data = SkipTo(SEP, ignore=quoted_string)

5297 string_data.set_parse_action(token_map(str.strip))

5298 ticket_expr = (integer("issue_num") + SEP

5299 + string_data("sev") + SEP

5300 + string_data("desc") + SEP

5301 + integer("days_open"))

5302

5303 for tkt in ticket_expr.search_string(report):

5304 print tkt.dump()

5305

5306 prints::

5307

5308 ['101', 'Critical', 'Intermittent system crash', '6']

5309 - days_open: '6'

5310 - desc: 'Intermittent system crash'

5311 - issue_num: '101'

5312 - sev: 'Critical'

5313 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5314 - days_open: '14'

5315 - desc: "Spelling error on Login ('log|n')"

5316 - issue_num: '94'

5317 - sev: 'Cosmetic'

5318 ['79', 'Minor', 'System slow when running too many reports', '47']

5319 - days_open: '47'

5320 - desc: 'System slow when running too many reports'

5321 - issue_num: '79'

5322 - sev: 'Minor'

5323 """

5324

5325 def __init__(

5326 self,

5327 other: Union[ParserElement, str],

5328 include: bool = False,

5329 ignore: typing.Optional[Union[ParserElement, str]] = None,

5330 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5331 *,

5332 failOn: typing.Optional[Union[ParserElement, str]] = None,

5333 ):

5334 super().__init__(other)

5335 failOn = failOn or fail_on

5336 self.ignoreExpr = ignore

5337 self.mayReturnEmpty = True

5338 self.mayIndexError = False

5339 self.includeMatch = include

5340 self.saveAsList = False

5341 if isinstance(failOn, str_type):

5342 self.failOn = self._literalStringClass(failOn)

5343 else:

5344 self.failOn = failOn

5345 self.errmsg = f"No match found for {self.expr}"

5346 self.ignorer = Empty().leave_whitespace()

5347 self._update_ignorer()

5348

5349 def _update_ignorer(self):

5350 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5351 self.ignorer.ignoreExprs.clear()

5352 for e in self.expr.ignoreExprs:

5353 self.ignorer.ignore(e)

5354 if self.ignoreExpr:

5355 self.ignorer.ignore(self.ignoreExpr)

5356

5357 def ignore(self, expr):

5358 super().ignore(expr)

5359 self._update_ignorer()

5360

5361 def parseImpl(self, instring, loc, do_actions=True):

5362 startloc = loc

5363 instrlen = len(instring)

5364 self_expr_parse = self.expr._parse

5365 self_failOn_canParseNext = (

5366 self.failOn.canParseNext if self.failOn is not None else None

5367 )

5368 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5369

5370 tmploc = loc

5371 while tmploc <= instrlen:

5372 if self_failOn_canParseNext is not None:

5373 # break if failOn expression matches

5374 if self_failOn_canParseNext(instring, tmploc):

5375 break

5376

5377 if ignorer_try_parse is not None:

5378 # advance past ignore expressions

5379 prev_tmploc = tmploc

5380 while 1:

5381 try:

5382 tmploc = ignorer_try_parse(instring, tmploc)

5383 except ParseBaseException:

5384 break

5385 # see if all ignorers matched, but didn't actually ignore anything

5386 if tmploc == prev_tmploc:

5387 break

5388 prev_tmploc = tmploc

5389

5390 try:

5391 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5392 except (ParseException, IndexError):

5393 # no match, advance loc in string

5394 tmploc += 1

5395 else:

5396 # matched skipto expr, done

5397 break

5398

5399 else:

5400 # ran off the end of the input string without matching skipto expr, fail

5401 raise ParseException(instring, loc, self.errmsg, self)

5402

5403 # build up return values

5404 loc = tmploc

5405 skiptext = instring[startloc:loc]

5406 skipresult = ParseResults(skiptext)

5407

5408 if self.includeMatch:

5409 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5410 skipresult += mat

5411

5412 return loc, skipresult

5413

5414

5415class Forward(ParseElementEnhance):

5416 """

5417 Forward declaration of an expression to be defined later -

5418 used for recursive grammars, such as algebraic infix notation.

5419 When the expression is known, it is assigned to the ``Forward``

5420 variable using the ``'<<'`` operator.

5421

5422 Note: take care when assigning to ``Forward`` not to overlook

5423 precedence of operators.

5424

5425 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5426

5427 fwd_expr << a | b | c

5428

5429 will actually be evaluated as::

5430

5431 (fwd_expr << a) | b | c

5432

5433 thereby leaving b and c out as parseable alternatives. It is recommended that you

5434 explicitly group the values inserted into the ``Forward``::

5435

5436 fwd_expr << (a | b | c)

5437

5438 Converting to use the ``'<<='`` operator instead will avoid this problem.

5439

5440 See :class:`ParseResults.pprint` for an example of a recursive

5441 parser created using ``Forward``.

5442 """

5443

5444 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):

5445 self.caller_frame = traceback.extract_stack(limit=2)[0]

5446 super().__init__(other, savelist=False) # type: ignore[arg-type]

5447 self.lshift_line = None

5448

5449 def __lshift__(self, other) -> "Forward":

5450 if hasattr(self, "caller_frame"):

5451 del self.caller_frame

5452 if isinstance(other, str_type):

5453 other = self._literalStringClass(other)

5454

5455 if not isinstance(other, ParserElement):

5456 return NotImplemented

5457

5458 self.expr = other

5459 self.streamlined = other.streamlined

5460 self.mayIndexError = self.expr.mayIndexError

5461 self.mayReturnEmpty = self.expr.mayReturnEmpty

5462 self.set_whitespace_chars(

5463 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5464 )

5465 self.skipWhitespace = self.expr.skipWhitespace

5466 self.saveAsList = self.expr.saveAsList

5467 self.ignoreExprs.extend(self.expr.ignoreExprs)

5468 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5469 return self

5470

5471 def __ilshift__(self, other) -> "Forward":

5472 if not isinstance(other, ParserElement):

5473 return NotImplemented

5474

5475 return self << other

5476

5477 def __or__(self, other) -> "ParserElement":

5478 caller_line = traceback.extract_stack(limit=2)[-2]

5479 if (

5480 __diag__.warn_on_match_first_with_lshift_operator

5481 and caller_line == self.lshift_line

5482 and Diagnostics.warn_on_match_first_with_lshift_operator

5483 not in self.suppress_warnings_

5484 ):

5485 warnings.warn(

5486 "using '<<' operator with '|' is probably an error, use '<<='",

5487 stacklevel=2,

5488 )

5489 ret = super().__or__(other)

5490 return ret

5491

5492 def __del__(self):

5493 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5494 if (

5495 self.expr is None

5496 and __diag__.warn_on_assignment_to_Forward

5497 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5498 ):

5499 warnings.warn_explicit(

5500 "Forward defined here but no expression attached later using '<<=' or '<<'",

5501 UserWarning,

5502 filename=self.caller_frame.filename,

5503 lineno=self.caller_frame.lineno,

5504 )

5505

5506 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5507 if (

5508 self.expr is None

5509 and __diag__.warn_on_parse_using_empty_Forward

5510 and Diagnostics.warn_on_parse_using_empty_Forward

5511 not in self.suppress_warnings_

5512 ):

5513 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5514 parse_fns = (

5515 "parse_string",

5516 "scan_string",

5517 "search_string",

5518 "transform_string",

5519 )

5520 tb = traceback.extract_stack(limit=200)

5521 for i, frm in enumerate(reversed(tb), start=1):

5522 if frm.name in parse_fns:

5523 stacklevel = i + 1

5524 break

5525 else:

5526 stacklevel = 2

5527 warnings.warn(

5528 "Forward expression was never assigned a value, will not parse any input",

5529 stacklevel=stacklevel,

5530 )

5531 if not ParserElement._left_recursion_enabled:

5532 return super().parseImpl(instring, loc, do_actions)

5533 # ## Bounded Recursion algorithm ##

5534 # Recursion only needs to be processed at ``Forward`` elements, since they are

5535 # the only ones that can actually refer to themselves. The general idea is

5536 # to handle recursion stepwise: We start at no recursion, then recurse once,

5537 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5538 #

5539 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5540 # - to *match* a specific recursion level, and

5541 # - to *search* the bounded recursion level

5542 # and the two run concurrently. The *search* must *match* each recursion level

5543 # to find the best possible match. This is handled by a memo table, which

5544 # provides the previous match to the next level match attempt.

5545 #

5546 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5547 #

5548 # There is a complication since we not only *parse* but also *transform* via

5549 # actions: We do not want to run the actions too often while expanding. Thus,

5550 # we expand using `do_actions=False` and only run `do_actions=True` if the next

5551 # recursion level is acceptable.

5552 with ParserElement.recursion_lock:

5553 memo = ParserElement.recursion_memos

5554 try:

5555 # we are parsing at a specific recursion expansion - use it as-is

5556 prev_loc, prev_result = memo[loc, self, do_actions]

5557 if isinstance(prev_result, Exception):

5558 raise prev_result

5559 return prev_loc, prev_result.copy()

5560 except KeyError:

5561 act_key = (loc, self, True)

5562 peek_key = (loc, self, False)

5563 # we are searching for the best recursion expansion - keep on improving

5564 # both `do_actions` cases must be tracked separately here!

5565 prev_loc, prev_peek = memo[peek_key] = (

5566 loc - 1,

5567 ParseException(

5568 instring, loc, "Forward recursion without base case", self

5569 ),

5570 )

5571 if do_actions:

5572 memo[act_key] = memo[peek_key]

5573 while True:

5574 try:

5575 new_loc, new_peek = super().parseImpl(instring, loc, False)

5576 except ParseException:

5577 # we failed before getting any match – do not hide the error

5578 if isinstance(prev_peek, Exception):

5579 raise

5580 new_loc, new_peek = prev_loc, prev_peek

5581 # the match did not get better: we are done

5582 if new_loc <= prev_loc:

5583 if do_actions:

5584 # replace the match for do_actions=False as well,

5585 # in case the action did backtrack

5586 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5587 del memo[peek_key], memo[act_key]

5588 return prev_loc, prev_result.copy()

5589 del memo[peek_key]

5590 return prev_loc, prev_peek.copy()

5591 # the match did get better: see if we can improve further

5592 if do_actions:

5593 try:

5594 memo[act_key] = super().parseImpl(instring, loc, True)

5595 except ParseException as e:

5596 memo[peek_key] = memo[act_key] = (new_loc, e)

5597 raise

5598 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5599

5600 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5601 self.skipWhitespace = False

5602 return self

5603

5604 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5605 self.skipWhitespace = True

5606 return self

5607

5608 def streamline(self) -> ParserElement:

5609 if not self.streamlined:

5610 self.streamlined = True

5611 if self.expr is not None:

5612 self.expr.streamline()

5613 return self

5614

5615 def validate(self, validateTrace=None) -> None:

5616 warnings.warn(

5617 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5618 DeprecationWarning,

5619 stacklevel=2,

5620 )

5621 if validateTrace is None:

5622 validateTrace = []

5623

5624 if self not in validateTrace:

5625 tmp = validateTrace[:] + [self]

5626 if self.expr is not None:

5627 self.expr.validate(tmp)

5628 self._checkRecursion([])

5629

5630 def _generateDefaultName(self) -> str:

5631 # Avoid infinite recursion by setting a temporary _defaultName

5632 self._defaultName = ": ..."

5633

5634 # Use the string representation of main expression.

5635 retString = "..."

5636 try:

5637 if self.expr is not None:

5638 retString = str(self.expr)[:1000]

5639 else:

5640 retString = "None"

5641 finally:

5642 return f"{type(self).__name__}: {retString}"

5643

5644 def copy(self) -> ParserElement:

5645 if self.expr is not None:

5646 return super().copy()

5647 else:

5648 ret = Forward()

5649 ret <<= self

5650 return ret

5651

5652 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5653 # fmt: off

5654 if (

5655 __diag__.warn_name_set_on_empty_Forward

5656 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5657 and self.expr is None

5658 ):

5659 warning = (

5660 "warn_name_set_on_empty_Forward:"

5661 f" setting results name {name!r} on {type(self).__name__} expression"

5662 " that has no contained expression"

5663 )

5664 warnings.warn(warning, stacklevel=3)

5665 # fmt: on

5666

5667 return super()._setResultsName(name, list_all_matches)

5668

5669 # Compatibility synonyms

5670 # fmt: off

5671 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5672 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5673 # fmt: on

5674

5675

5676class TokenConverter(ParseElementEnhance):

5677 """

5678 Abstract subclass of :class:`ParseExpression`, for converting parsed results.

5679 """

5680

5681 def __init__(self, expr: Union[ParserElement, str], savelist=False):

5682 super().__init__(expr) # , savelist)

5683 self.saveAsList = False

5684

5685

5686class Combine(TokenConverter):

5687 """Converter to concatenate all matching tokens to a single string.

5688 By default, the matching patterns must also be contiguous in the

5689 input string; this can be disabled by specifying

5690 ``'adjacent=False'`` in the constructor.

5691

5692 Example::

5693

5694 real = Word(nums) + '.' + Word(nums)

5695 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5696 # will also erroneously match the following

5697 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5698

5699 real = Combine(Word(nums) + '.' + Word(nums))

5700 print(real.parse_string('3.1416')) # -> ['3.1416']

5701 # no match when there are internal spaces

5702 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5703 """

5704

5705 def __init__(

5706 self,

5707 expr: ParserElement,

5708 join_string: str = "",

5709 adjacent: bool = True,

5710 *,

5711 joinString: typing.Optional[str] = None,

5712 ):

5713 super().__init__(expr)

5714 joinString = joinString if joinString is not None else join_string

5715 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5716 if adjacent:

5717 self.leave_whitespace()

5718 self.adjacent = adjacent

5719 self.skipWhitespace = True

5720 self.joinString = joinString

5721 self.callPreparse = True

5722

5723 def ignore(self, other) -> ParserElement:

5724 if self.adjacent:

5725 ParserElement.ignore(self, other)

5726 else:

5727 super().ignore(other)

5728 return self

5729

5730 def postParse(self, instring, loc, tokenlist):

5731 retToks = tokenlist.copy()

5732 del retToks[:]

5733 retToks += ParseResults(

5734 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5735 )

5736

5737 if self.resultsName and retToks.haskeys():

5738 return [retToks]

5739 else:

5740 return retToks

5741

5742

5743class Group(TokenConverter):

5744 """Converter to return the matched tokens as a list - useful for

5745 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5746

5747 The optional ``aslist`` argument when set to True will return the

5748 parsed tokens as a Python list instead of a pyparsing ParseResults.

5749

5750 Example::

5751

5752 ident = Word(alphas)

5753 num = Word(nums)

5754 term = ident | num

5755 func = ident + Opt(DelimitedList(term))

5756 print(func.parse_string("fn a, b, 100"))

5757 # -> ['fn', 'a', 'b', '100']

5758

5759 func = ident + Group(Opt(DelimitedList(term)))

5760 print(func.parse_string("fn a, b, 100"))

5761 # -> ['fn', ['a', 'b', '100']]

5762 """

5763

5764 def __init__(self, expr: ParserElement, aslist: bool = False):

5765 super().__init__(expr)

5766 self.saveAsList = True

5767 self._asPythonList = aslist

5768

5769 def postParse(self, instring, loc, tokenlist):

5770 if self._asPythonList:

5771 return ParseResults.List(

5772 tokenlist.asList()

5773 if isinstance(tokenlist, ParseResults)

5774 else list(tokenlist)

5775 )

5776

5777 return [tokenlist]

5778

5779

5780class Dict(TokenConverter):

5781 """Converter to return a repetitive expression as a list, but also

5782 as a dictionary. Each element can also be referenced using the first

5783 token in the expression as its key. Useful for tabular report

5784 scraping when the first column can be used as a item key.

5785

5786 The optional ``asdict`` argument when set to True will return the

5787 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5788

5789 Example::

5790

5791 data_word = Word(alphas)

5792 label = data_word + FollowedBy(':')

5793

5794 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5795 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5796

5797 # print attributes as plain groups

5798 print(attr_expr[1, ...].parse_string(text).dump())

5799

5800 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5801 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5802 print(result.dump())

5803

5804 # access named fields as dict entries, or output as dict

5805 print(result['shape'])

5806 print(result.as_dict())

5807

5808 prints::

5809

5810 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5811 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5812 - color: 'light blue'

5813 - posn: 'upper left'

5814 - shape: 'SQUARE'

5815 - texture: 'burlap'

5816 SQUARE

5817 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5818

5819 See more examples at :class:`ParseResults` of accessing fields by results name.

5820 """

5821

5822 def __init__(self, expr: ParserElement, asdict: bool = False):

5823 super().__init__(expr)

5824 self.saveAsList = True

5825 self._asPythonDict = asdict

5826

5827 def postParse(self, instring, loc, tokenlist):

5828 for i, tok in enumerate(tokenlist):

5829 if len(tok) == 0:

5830 continue

5831

5832 ikey = tok[0]

5833 if isinstance(ikey, int):

5834 ikey = str(ikey).strip()

5835

5836 if len(tok) == 1:

5837 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5838

5839 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5840 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5841

5842 else:

5843 try:

5844 dictvalue = tok.copy() # ParseResults(i)

5845 except Exception:

5846 exc = TypeError(

5847 "could not extract dict values from parsed results"

5848 " - Dict expression must contain Grouped expressions"

5849 )

5850 raise exc from None

5851

5852 del dictvalue[0]

5853

5854 if len(dictvalue) != 1 or (

5855 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5856 ):

5857 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5858 else:

5859 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5860

5861 if self._asPythonDict:

5862 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

5863

5864 return [tokenlist] if self.resultsName else tokenlist

5865

5866

5867class Suppress(TokenConverter):

5868 """Converter for ignoring the results of a parsed expression.

5869

5870 Example::

5871

5872 source = "a, b, c,d"

5873 wd = Word(alphas)

5874 wd_list1 = wd + (',' + wd)[...]

5875 print(wd_list1.parse_string(source))

5876

5877 # often, delimiters that are useful during parsing are just in the

5878 # way afterward - use Suppress to keep them out of the parsed output

5879 wd_list2 = wd + (Suppress(',') + wd)[...]

5880 print(wd_list2.parse_string(source))

5881

5882 # Skipped text (using '...') can be suppressed as well

5883 source = "lead in START relevant text END trailing text"

5884 start_marker = Keyword("START")

5885 end_marker = Keyword("END")

5886 find_body = Suppress(...) + start_marker + ... + end_marker

5887 print(find_body.parse_string(source)

5888

5889 prints::

5890

5891 ['a', ',', 'b', ',', 'c', ',', 'd']

5892 ['a', 'b', 'c', 'd']

5893 ['START', 'relevant text ', 'END']

5894

5895 (See also :class:`DelimitedList`.)

5896 """

5897

5898 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

5899 if expr is ...:

5900 expr = _PendingSkip(NoMatch())

5901 super().__init__(expr)

5902

5903 def __add__(self, other) -> "ParserElement":

5904 if isinstance(self.expr, _PendingSkip):

5905 return Suppress(SkipTo(other)) + other

5906

5907 return super().__add__(other)

5908

5909 def __sub__(self, other) -> "ParserElement":

5910 if isinstance(self.expr, _PendingSkip):

5911 return Suppress(SkipTo(other)) - other

5912

5913 return super().__sub__(other)

5914

5915 def postParse(self, instring, loc, tokenlist):

5916 return []

5917

5918 def suppress(self) -> ParserElement:

5919 return self

5920

5921

5922def trace_parse_action(f: ParseAction) -> ParseAction:

5923 """Decorator for debugging parse actions.

5924

5925 When the parse action is called, this decorator will print

5926 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

5927 When the parse action completes, the decorator will print

5928 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

5929

5930 Example::

5931

5932 wd = Word(alphas)

5933

5934 @trace_parse_action

5935 def remove_duplicate_chars(tokens):

5936 return ''.join(sorted(set(''.join(tokens))))

5937

5938 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

5939 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

5940

5941 prints::

5942

5943 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

5944 <<leaving remove_duplicate_chars (ret: 'dfjkls')

5945 ['dfjkls']

5946 """

5947 f = _trim_arity(f)

5948

5949 def z(*paArgs):

5950 thisFunc = f.__name__

5951 s, l, t = paArgs[-3:]

5952 if len(paArgs) > 3:

5953 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

5954 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

5955 try:

5956 ret = f(*paArgs)

5957 except Exception as exc:

5958 sys.stderr.write(

5959 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

5960 )

5961 raise

5962 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

5963 return ret

5964

5965 z.__name__ = f.__name__

5966 return z

5967

5968

5969# convenience constants for positional expressions

5970empty = Empty().set_name("empty")

5971line_start = LineStart().set_name("line_start")

5972line_end = LineEnd().set_name("line_end")

5973string_start = StringStart().set_name("string_start")

5974string_end = StringEnd().set_name("string_end")

5975

5976_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

5977 lambda s, l, t: t[0][1]

5978)

5979_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

5980 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

5981)

5982_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

5983 lambda s, l, t: chr(int(t[0][1:], 8))

5984)

5985_singleChar = (

5986 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

5987)

5988_charRange = Group(_singleChar + Suppress("-") + _singleChar)

5989_reBracketExpr = (

5990 Literal("[")

5991 + Opt("^").set_results_name("negate")

5992 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

5993 + Literal("]")

5994)

5995

5996

5997def srange(s: str) -> str:

5998 r"""Helper to easily define string ranges for use in :class:`Word`

5999 construction. Borrows syntax from regexp ``'[]'`` string range

6000 definitions::

6001

6002 srange("[0-9]") -> "0123456789"

6003 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6004 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6005

6006 The input string must be enclosed in []'s, and the returned string

6007 is the expanded character set joined into a single string. The

6008 values enclosed in the []'s may be:

6009

6010 - a single character

6011 - an escaped character with a leading backslash (such as ``\-``

6012 or ``\]``)

6013 - an escaped hex character with a leading ``'\x'``

6014 (``\x21``, which is a ``'!'`` character) (``\0x##``

6015 is also supported for backwards compatibility)

6016 - an escaped octal character with a leading ``'\0'``

6017 (``\041``, which is a ``'!'`` character)

6018 - a range of any of the above, separated by a dash (``'a-z'``,

6019 etc.)

6020 - any combination of the above (``'aeiouy'``,

6021 ``'a-zA-Z0-9_$'``, etc.)

6022 """

6023 _expanded = lambda p: (

6024 p

6025 if not isinstance(p, ParseResults)

6026 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6027 )

6028 try:

6029 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)

6030 except Exception as e:

6031 return ""

6032

6033

6034def token_map(func, *args) -> ParseAction:

6035 """Helper to define a parse action by mapping a function to all

6036 elements of a :class:`ParseResults` list. If any additional args are passed,

6037 they are forwarded to the given function as additional arguments

6038 after the token, as in

6039 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6040 which will convert the parsed data to an integer using base 16.

6041

6042 Example (compare the last to example in :class:`ParserElement.transform_string`::

6043

6044 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6045 hex_ints.run_tests('''

6046 00 11 22 aa FF 0a 0d 1a

6047 ''')

6048

6049 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6050 upperword[1, ...].run_tests('''

6051 my kingdom for a horse

6052 ''')

6053

6054 wd = Word(alphas).set_parse_action(token_map(str.title))

6055 wd[1, ...].set_parse_action(' '.join).run_tests('''

6056 now is the winter of our discontent made glorious summer by this sun of york

6057 ''')

6058

6059 prints::

6060

6061 00 11 22 aa FF 0a 0d 1a

6062 [0, 17, 34, 170, 255, 10, 13, 26]

6063

6064 my kingdom for a horse

6065 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6066

6067 now is the winter of our discontent made glorious summer by this sun of york

6068 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6069 """

6070

6071 def pa(s, l, t):

6072 return [func(tokn, *args) for tokn in t]

6073

6074 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6075 pa.__name__ = func_name

6076

6077 return pa

6078

6079

6080def autoname_elements() -> None:

6081 """

6082 Utility to simplify mass-naming of parser elements, for

6083 generating railroad diagram with named subdiagrams.

6084 """

6085 calling_frame = sys._getframe().f_back

6086 if calling_frame is None:

6087 return

6088 calling_frame = typing.cast(types.FrameType, calling_frame)

6089 for name, var in calling_frame.f_locals.items():

6090 if isinstance(var, ParserElement) and not var.customName:

6091 var.set_name(name)

6092

6093

6094dbl_quoted_string = Combine(

6095 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6096).set_name("string enclosed in double quotes")

6097

6098sgl_quoted_string = Combine(

6099 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6100).set_name("string enclosed in single quotes")

6101

6102quoted_string = Combine(

6103 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6104 "double quoted string"

6105 )

6106 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6107 "single quoted string"

6108 )

6109).set_name("quoted string using single or double quotes")

6110

6111python_quoted_string = Combine(

6112 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6113 "multiline double quoted string"

6114 )

6115 ^ (

6116 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6117 ).set_name("multiline single quoted string")

6118 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6119 "double quoted string"

6120 )

6121 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6122 "single quoted string"

6123 )

6124).set_name("Python quoted string")

6125

6126unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6127

6128

6129alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6130punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6131

6132# build list of built-in expressions, for future reference if a global default value

6133# gets updated

6134_builtin_exprs: List[ParserElement] = [

6135 v for v in vars().values() if isinstance(v, ParserElement)

6136]

6137

6138# Compatibility synonyms

6139# fmt: off

6140sglQuotedString = sgl_quoted_string

6141dblQuotedString = dbl_quoted_string

6142quotedString = quoted_string

6143unicodeString = unicode_string

6144lineStart = line_start

6145lineEnd = line_end

6146stringStart = string_start

6147stringEnd = string_end

6148nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6149traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6150conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6151tokenMap = replaced_by_pep8("tokenMap", token_map)

6152# fmt: on