Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%

1855 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1856

1857 - ``exception_action`` - method to be called when expression fails to parse;

1858 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1859 """

1860 self.debugActions = self.DebugActions(

1861 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1862 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1863 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1864 )

1865 self.debug = True

1866 return self

1867

1868 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

1869 """

1870 Enable display of debugging messages while doing pattern matching.

1871 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1872 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1873

1874 Example::

1875

1876 wd = Word(alphas).set_name("alphaword")

1877 integer = Word(nums).set_name("numword")

1878 term = wd | integer

1879

1880 # turn on debugging for wd

1881 wd.set_debug()

1882

1883 term[1, ...].parse_string("abc 123 xyz 890")

1884

1885 prints::

1886

1887 Match alphaword at loc 0(1,1)

1888 Matched alphaword -> ['abc']

1889 Match alphaword at loc 3(1,4)

1890 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1891 Match alphaword at loc 7(1,8)

1892 Matched alphaword -> ['xyz']

1893 Match alphaword at loc 11(1,12)

1894 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1895 Match alphaword at loc 15(1,16)

1896 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1897

1898 The output shown is that produced by the default debug actions - custom debug actions can be

1899 specified using :class:`set_debug_actions`. Prior to attempting

1900 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1901 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1902 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1903 which makes debugging and exception messages easier to understand - for instance, the default

1904 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1905

1906 .. versionchanged:: 3.1.0

1907 ``recurse`` argument added.

1908 """

1909 if recurse:

1910 for expr in self.visit_all():

1911 expr.set_debug(flag, recurse=False)

1912 return self

1913

1914 if flag:

1915 self.set_debug_actions(

1916 _default_start_debug_action,

1917 _default_success_debug_action,

1918 _default_exception_debug_action,

1919 )

1920 else:

1921 self.debug = False

1922 return self

1923

1924 @property

1925 def default_name(self) -> str:

1926 if self._defaultName is None:

1927 self._defaultName = self._generateDefaultName()

1928 return self._defaultName

1929

1930 @abstractmethod

1931 def _generateDefaultName(self) -> str:

1932 """

1933 Child classes must define this method, which defines how the ``default_name`` is set.

1934 """

1935

1936 def set_name(self, name: typing.Optional[str]) -> ParserElement:

1937 """

1938 Define name for this expression, makes debugging and exception messages clearer. If

1939 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

1940 enable debug for this expression.

1941

1942 If `name` is None, clears any custom name for this expression, and clears the

1943 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

1944

1945 Example::

1946

1947 integer = Word(nums)

1948 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1949

1950 integer.set_name("integer")

1951 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1952

1953 .. versionchanged:: 3.1.0

1954 Accept ``None`` as the ``name`` argument.

1955 """

1956 self.customName = name # type: ignore[assignment]

1957 self.errmsg = f"Expected {str(self)}"

1958

1959 if __diag__.enable_debug_on_named_expressions:

1960 self.set_debug(name is not None)

1961

1962 return self

1963

1964 @property

1965 def name(self) -> str:

1966 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1967 return self.customName if self.customName is not None else self.default_name

1968

1969 @name.setter

1970 def name(self, new_name) -> None:

1971 self.set_name(new_name)

1972

1973 def __str__(self) -> str:

1974 return self.name

1975

1976 def __repr__(self) -> str:

1977 return str(self)

1978

1979 def streamline(self) -> ParserElement:

1980 self.streamlined = True

1981 self._defaultName = None

1982 return self

1983

1984 def recurse(self) -> list[ParserElement]:

1985 return []

1986

1987 def _checkRecursion(self, parseElementList):

1988 subRecCheckList = parseElementList[:] + [self]

1989 for e in self.recurse():

1990 e._checkRecursion(subRecCheckList)

1991

1992 def validate(self, validateTrace=None) -> None:

1993 """

1994 .. deprecated:: 3.0.0

1995 Do not use to check for left recursion.

1996

1997 Check defined expressions for valid structure, check for infinite recursive definitions.

1998

1999 """

2000 warnings.warn(

2001 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

2002 DeprecationWarning,

2003 stacklevel=2,

2004 )

2005 self._checkRecursion([])

2006

2007 def parse_file(

2008 self,

2009 file_or_filename: Union[str, Path, TextIO],

2010 encoding: str = "utf-8",

2011 parse_all: bool = False,

2012 *,

2013 parseAll: bool = False,

2014 ) -> ParseResults:

2015 """

2016 Execute the parse expression on the given file or filename.

2017 If a filename is specified (instead of a file object),

2018 the entire file is opened, read, and closed before parsing.

2019 """

2020 parseAll = parseAll or parse_all

2021 try:

2022 file_or_filename = typing.cast(TextIO, file_or_filename)

2023 file_contents = file_or_filename.read()

2024 except AttributeError:

2025 file_or_filename = typing.cast(str, file_or_filename)

2026 with open(file_or_filename, "r", encoding=encoding) as f:

2027 file_contents = f.read()

2028 try:

2029 return self.parse_string(file_contents, parseAll)

2030 except ParseBaseException as exc:

2031 if ParserElement.verbose_stacktrace:

2032 raise

2033

2034 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2035 raise exc.with_traceback(None)

2036

2037 def __eq__(self, other):

2038 if self is other:

2039 return True

2040 elif isinstance(other, str_type):

2041 return self.matches(other, parse_all=True)

2042 elif isinstance(other, ParserElement):

2043 return vars(self) == vars(other)

2044 return False

2045

2046 def __hash__(self):

2047 return id(self)

2048

2049 def matches(

2050 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

2051 ) -> bool:

2052 """

2053 Method for quick testing of a parser against a test string. Good for simple

2054 inline microtests of sub expressions while building up larger parser.

2055

2056 Parameters:

2057

2058 - ``test_string`` - to test against this expression for a match

2059 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2060

2061 Example::

2062

2063 expr = Word(nums)

2064 assert expr.matches("100")

2065 """

2066 parseAll = parseAll and parse_all

2067 try:

2068 self.parse_string(str(test_string), parse_all=parseAll)

2069 return True

2070 except ParseBaseException:

2071 return False

2072

2073 def run_tests(

2074 self,

2075 tests: Union[str, list[str]],

2076 parse_all: bool = True,

2077 comment: typing.Optional[Union[ParserElement, str]] = "#",

2078 full_dump: bool = True,

2079 print_results: bool = True,

2080 failure_tests: bool = False,

2081 post_parse: typing.Optional[

2082 Callable[[str, ParseResults], typing.Optional[str]]

2083 ] = None,

2084 file: typing.Optional[TextIO] = None,

2085 with_line_numbers: bool = False,

2086 *,

2087 parseAll: bool = True,

2088 fullDump: bool = True,

2089 printResults: bool = True,

2090 failureTests: bool = False,

2091 postParse: typing.Optional[

2092 Callable[[str, ParseResults], typing.Optional[str]]

2093 ] = None,

2094 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2095 """

2096 Execute the parse expression on a series of test strings, showing each

2097 test, the parsed results or where the parse failed. Quick and easy way to

2098 run a parse expression against a list of sample strings.

2099

2100 Parameters:

2101

2102 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2103 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2104 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2105 string; pass None to disable comment filtering

2106 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2107 if False, only dump nested list

2108 - ``print_results`` - (default= ``True``) prints test output to stdout

2109 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2110 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2111 `fn(test_string, parse_results)` and returns a string to be added to the test output

2112 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2113 if None, will default to ``sys.stdout``

2114 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2115

2116 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2117 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2118 test's output

2119

2120 Example::

2121

2122 number_expr = pyparsing_common.number.copy()

2123

2124 result = number_expr.run_tests('''

2125 # unsigned integer

2126 100

2127 # negative integer

2128 -100

2129 # float with scientific notation

2130 6.02e23

2131 # integer with scientific notation

2132 1e-12

2133 ''')

2134 print("Success" if result[0] else "Failed!")

2135

2136 result = number_expr.run_tests('''

2137 # stray character

2138 100Z

2139 # missing leading digit before '.'

2140 -.100

2141 # too many '.'

2142 3.14.159

2143 ''', failure_tests=True)

2144 print("Success" if result[0] else "Failed!")

2145

2146 prints::

2147

2148 # unsigned integer

2149 100

2150 [100]

2151

2152 # negative integer

2153 -100

2154 [-100]

2155

2156 # float with scientific notation

2157 6.02e23

2158 [6.02e+23]

2159

2160 # integer with scientific notation

2161 1e-12

2162 [1e-12]

2163

2164 Success

2165

2166 # stray character

2167 100Z

2168 ^

2169 FAIL: Expected end of text (at char 3), (line:1, col:4)

2170

2171 # missing leading digit before '.'

2172 -.100

2173 ^

2174 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2175

2176 # too many '.'

2177 3.14.159

2178 ^

2179 FAIL: Expected end of text (at char 4), (line:1, col:5)

2180

2181 Success

2182

2183 Each test string must be on a single line. If you want to test a string that spans multiple

2184 lines, create a test like this::

2185

2186 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2187

2188 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2189 """

2190 from .testing import pyparsing_test

2191

2192 parseAll = parseAll and parse_all

2193 fullDump = fullDump and full_dump

2194 printResults = printResults and print_results

2195 failureTests = failureTests or failure_tests

2196 postParse = postParse or post_parse

2197 if isinstance(tests, str_type):

2198 tests = typing.cast(str, tests)

2199 line_strip = type(tests).strip

2200 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2201 comment_specified = comment is not None

2202 if comment_specified:

2203 if isinstance(comment, str_type):

2204 comment = typing.cast(str, comment)

2205 comment = Literal(comment)

2206 comment = typing.cast(ParserElement, comment)

2207 if file is None:

2208 file = sys.stdout

2209 print_ = file.write

2210

2211 result: Union[ParseResults, Exception]

2212 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2213 comments: list[str] = []

2214 success = True

2215 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2216 BOM = "\ufeff"

2217 nlstr = "\n"

2218 for t in tests:

2219 if comment_specified and comment.matches(t, False) or comments and not t:

2220 comments.append(

2221 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2222 )

2223 continue

2224 if not t:

2225 continue

2226 out = [

2227 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2228 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2229 ]

2230 comments.clear()

2231 try:

2232 # convert newline marks to actual newlines, and strip leading BOM if present

2233 t = NL.transform_string(t.lstrip(BOM))

2234 result = self.parse_string(t, parse_all=parseAll)

2235 except ParseBaseException as pe:

2236 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2237 out.append(pe.explain())

2238 out.append(f"FAIL: {fatal}{pe}")

2239 if ParserElement.verbose_stacktrace:

2240 out.extend(traceback.format_tb(pe.__traceback__))

2241 success = success and failureTests

2242 result = pe

2243 except Exception as exc:

2244 tag = "FAIL-EXCEPTION"

2245

2246 # see if this exception was raised in a parse action

2247 tb = exc.__traceback__

2248 it = iter(traceback.walk_tb(tb))

2249 for f, line in it:

2250 if (f.f_code.co_filename, line) == pa_call_line_synth:

2251 next_f = next(it)[0]

2252 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2253 break

2254

2255 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2256 if ParserElement.verbose_stacktrace:

2257 out.extend(traceback.format_tb(exc.__traceback__))

2258 success = success and failureTests

2259 result = exc

2260 else:

2261 success = success and not failureTests

2262 if postParse is not None:

2263 try:

2264 pp_value = postParse(t, result)

2265 if pp_value is not None:

2266 if isinstance(pp_value, ParseResults):

2267 out.append(pp_value.dump())

2268 else:

2269 out.append(str(pp_value))

2270 else:

2271 out.append(result.dump())

2272 except Exception as e:

2273 out.append(result.dump(full=fullDump))

2274 out.append(

2275 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2276 )

2277 else:

2278 out.append(result.dump(full=fullDump))

2279 out.append("")

2280

2281 if printResults:

2282 print_("\n".join(out))

2283

2284 allResults.append((t, result))

2285

2286 return success, allResults

2287

2288 def create_diagram(

2289 self,

2290 output_html: Union[TextIO, Path, str],

2291 vertical: int = 3,

2292 show_results_names: bool = False,

2293 show_groups: bool = False,

2294 embed: bool = False,

2295 show_hidden: bool = False,

2296 **kwargs,

2297 ) -> None:

2298 """

2299 Create a railroad diagram for the parser.

2300

2301 Parameters:

2302

2303 - ``output_html`` (str or file-like object) - output target for generated

2304 diagram HTML

2305 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2306 instead of horizontally (default=3)

2307 - ``show_results_names`` - bool flag whether diagram should show annotations for

2308 defined results names

2309 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2310 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2311 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2312 the resulting HTML in an enclosing HTML source

2313 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2314 can be used to insert custom CSS styling

2315 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2316 generated code

2317

2318 Additional diagram-formatting keyword arguments can also be included;

2319 see railroad.Diagram class.

2320

2321 .. versionchanged:: 3.1.0

2322 ``embed`` argument added.

2323 """

2324

2325 try:

2326 from .diagram import to_railroad, railroad_to_html

2327 except ImportError as ie:

2328 raise Exception(

2329 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2330 ) from ie

2331

2332 self.streamline()

2333

2334 railroad = to_railroad(

2335 self,

2336 vertical=vertical,

2337 show_results_names=show_results_names,

2338 show_groups=show_groups,

2339 show_hidden=show_hidden,

2340 diagram_kwargs=kwargs,

2341 )

2342 if not isinstance(output_html, (str, Path)):

2343 # we were passed a file-like object, just write to it

2344 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2345 return

2346

2347 with open(output_html, "w", encoding="utf-8") as diag_file:

2348 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2349

2350 # Compatibility synonyms

2351 # fmt: off

2352 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2353 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2354 "setDefaultWhitespaceChars", set_default_whitespace_chars

2355 ))

2356 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2357 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2358 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2359 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2360

2361 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2362 setBreak = replaced_by_pep8("setBreak", set_break)

2363 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2364 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2365 addCondition = replaced_by_pep8("addCondition", add_condition)

2366 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2367 tryParse = replaced_by_pep8("tryParse", try_parse)

2368 parseString = replaced_by_pep8("parseString", parse_string)

2369 scanString = replaced_by_pep8("scanString", scan_string)

2370 transformString = replaced_by_pep8("transformString", transform_string)

2371 searchString = replaced_by_pep8("searchString", search_string)

2372 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2373 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2374 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2375 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2376 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2377 setDebug = replaced_by_pep8("setDebug", set_debug)

2378 setName = replaced_by_pep8("setName", set_name)

2379 parseFile = replaced_by_pep8("parseFile", parse_file)

2380 runTests = replaced_by_pep8("runTests", run_tests)

2381 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2382 defaultName = default_name

2383 # fmt: on

2384

2385

2386class _PendingSkip(ParserElement):

2387 # internal placeholder class to hold a place were '...' is added to a parser element,

2388 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2389 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2390 super().__init__()

2391 self.anchor = expr

2392 self.must_skip = must_skip

2393

2394 def _generateDefaultName(self) -> str:

2395 return str(self.anchor + Empty()).replace("Empty", "...")

2396

2397 def __add__(self, other) -> ParserElement:

2398 skipper = SkipTo(other).set_name("...")("_skipped*")

2399 if self.must_skip:

2400

2401 def must_skip(t):

2402 if not t._skipped or t._skipped.as_list() == [""]:

2403 del t[0]

2404 t.pop("_skipped", None)

2405

2406 def show_skip(t):

2407 if t._skipped.as_list()[-1:] == [""]:

2408 t.pop("_skipped")

2409 t["_skipped"] = f"missing <{self.anchor!r}>"

2410

2411 return (

2412 self.anchor + skipper().add_parse_action(must_skip)

2413 | skipper().add_parse_action(show_skip)

2414 ) + other

2415

2416 return self.anchor + skipper + other

2417

2418 def __repr__(self):

2419 return self.defaultName

2420

2421 def parseImpl(self, *args) -> ParseImplReturnType:

2422 raise Exception(

2423 "use of `...` expression without following SkipTo target expression"

2424 )

2425

2426

2427class Token(ParserElement):

2428 """Abstract :class:`ParserElement` subclass, for defining atomic

2429 matching patterns.

2430 """

2431

2432 def __init__(self) -> None:

2433 super().__init__(savelist=False)

2434

2435 def _generateDefaultName(self) -> str:

2436 return type(self).__name__

2437

2438

2439class NoMatch(Token):

2440 """

2441 A token that will never match.

2442 """

2443

2444 def __init__(self) -> None:

2445 super().__init__()

2446 self._may_return_empty = True

2447 self.mayIndexError = False

2448 self.errmsg = "Unmatchable token"

2449

2450 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2451 raise ParseException(instring, loc, self.errmsg, self)

2452

2453

2454class Literal(Token):

2455 """

2456 Token to exactly match a specified string.

2457

2458 Example::

2459

2460 Literal('abc').parse_string('abc') # -> ['abc']

2461 Literal('abc').parse_string('abcdef') # -> ['abc']

2462 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2463

2464 For case-insensitive matching, use :class:`CaselessLiteral`.

2465

2466 For keyword matching (force word break before and after the matched string),

2467 use :class:`Keyword` or :class:`CaselessKeyword`.

2468 """

2469

2470 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2471 # Performance tuning: select a subclass with optimized parseImpl

2472 if cls is Literal:

2473 match_string = matchString or match_string

2474 if not match_string:

2475 return super().__new__(Empty)

2476 if len(match_string) == 1:

2477 return super().__new__(_SingleCharLiteral)

2478

2479 # Default behavior

2480 return super().__new__(cls)

2481

2482 # Needed to make copy.copy() work correctly if we customize __new__

2483 def __getnewargs__(self):

2484 return (self.match,)

2485

2486 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2487 super().__init__()

2488 match_string = matchString or match_string

2489 self.match = match_string

2490 self.matchLen = len(match_string)

2491 self.firstMatchChar = match_string[:1]

2492 self.errmsg = f"Expected {self.name}"

2493 self._may_return_empty = False

2494 self.mayIndexError = False

2495

2496 def _generateDefaultName(self) -> str:

2497 return repr(self.match)

2498

2499 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2500 if instring[loc] == self.firstMatchChar and instring.startswith(

2501 self.match, loc

2502 ):

2503 return loc + self.matchLen, self.match

2504 raise ParseException(instring, loc, self.errmsg, self)

2505

2506

2507class Empty(Literal):

2508 """

2509 An empty token, will always match.

2510 """

2511

2512 def __init__(self, match_string="", *, matchString="") -> None:

2513 super().__init__("")

2514 self._may_return_empty = True

2515 self.mayIndexError = False

2516

2517 def _generateDefaultName(self) -> str:

2518 return "Empty"

2519

2520 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2521 return loc, []

2522

2523

2524class _SingleCharLiteral(Literal):

2525 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2526 if instring[loc] == self.firstMatchChar:

2527 return loc + 1, self.match

2528 raise ParseException(instring, loc, self.errmsg, self)

2529

2530

2531ParserElement._literalStringClass = Literal

2532

2533

2534class Keyword(Token):

2535 """

2536 Token to exactly match a specified string as a keyword, that is,

2537 it must be immediately preceded and followed by whitespace or

2538 non-keyword characters. Compare with :class:`Literal`:

2539

2540 - ``Literal("if")`` will match the leading ``'if'`` in

2541 ``'ifAndOnlyIf'``.

2542 - ``Keyword("if")`` will not; it will only match the leading

2543 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2544

2545 Accepts two optional constructor arguments in addition to the

2546 keyword string:

2547

2548 - ``ident_chars`` is a string of characters that would be valid

2549 identifier characters, defaulting to all alphanumerics + "_" and

2550 "$"

2551 - ``caseless`` allows case-insensitive matching, default is ``False``.

2552

2553 Example::

2554

2555 Keyword("start").parse_string("start") # -> ['start']

2556 Keyword("start").parse_string("starting") # -> Exception

2557

2558 For case-insensitive matching, use :class:`CaselessKeyword`.

2559 """

2560

2561 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2562

2563 def __init__(

2564 self,

2565 match_string: str = "",

2566 ident_chars: typing.Optional[str] = None,

2567 caseless: bool = False,

2568 *,

2569 matchString: str = "",

2570 identChars: typing.Optional[str] = None,

2571 ) -> None:

2572 super().__init__()

2573 identChars = identChars or ident_chars

2574 if identChars is None:

2575 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2576 match_string = matchString or match_string

2577 self.match = match_string

2578 self.matchLen = len(match_string)

2579 self.firstMatchChar = match_string[:1]

2580 if not self.firstMatchChar:

2581 raise ValueError("null string passed to Keyword; use Empty() instead")

2582 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2583 self._may_return_empty = False

2584 self.mayIndexError = False

2585 self.caseless = caseless

2586 if caseless:

2587 self.caselessmatch = match_string.upper()

2588 identChars = identChars.upper()

2589 self.identChars = set(identChars)

2590

2591 def _generateDefaultName(self) -> str:

2592 return repr(self.match)

2593

2594 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2595 errmsg = self.errmsg or ""

2596 errloc = loc

2597 if self.caseless:

2598 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2599 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2600 if (

2601 loc >= len(instring) - self.matchLen

2602 or instring[loc + self.matchLen].upper() not in self.identChars

2603 ):

2604 return loc + self.matchLen, self.match

2605

2606 # followed by keyword char

2607 errmsg += ", was immediately followed by keyword character"

2608 errloc = loc + self.matchLen

2609 else:

2610 # preceded by keyword char

2611 errmsg += ", keyword was immediately preceded by keyword character"

2612 errloc = loc - 1

2613 # else no match just raise plain exception

2614

2615 elif (

2616 instring[loc] == self.firstMatchChar

2617 and self.matchLen == 1

2618 or instring.startswith(self.match, loc)

2619 ):

2620 if loc == 0 or instring[loc - 1] not in self.identChars:

2621 if (

2622 loc >= len(instring) - self.matchLen

2623 or instring[loc + self.matchLen] not in self.identChars

2624 ):

2625 return loc + self.matchLen, self.match

2626

2627 # followed by keyword char

2628 errmsg += ", keyword was immediately followed by keyword character"

2629 errloc = loc + self.matchLen

2630 else:

2631 # preceded by keyword char

2632 errmsg += ", keyword was immediately preceded by keyword character"

2633 errloc = loc - 1

2634 # else no match just raise plain exception

2635

2636 raise ParseException(instring, errloc, errmsg, self)

2637

2638 @staticmethod

2639 def set_default_keyword_chars(chars) -> None:

2640 """

2641 Overrides the default characters used by :class:`Keyword` expressions.

2642 """

2643 Keyword.DEFAULT_KEYWORD_CHARS = chars

2644

2645 # Compatibility synonyms

2646 setDefaultKeywordChars = staticmethod(

2647 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2648 )

2649

2650

2651class CaselessLiteral(Literal):

2652 """

2653 Token to match a specified string, ignoring case of letters.

2654 Note: the matched results will always be in the case of the given

2655 match string, NOT the case of the input text.

2656

2657 Example::

2658

2659 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2660 # -> ['CMD', 'CMD', 'CMD']

2661

2662 (Contrast with example for :class:`CaselessKeyword`.)

2663 """

2664

2665 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2666 match_string = matchString or match_string

2667 super().__init__(match_string.upper())

2668 # Preserve the defining literal.

2669 self.returnString = match_string

2670 self.errmsg = f"Expected {self.name}"

2671

2672 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2673 if instring[loc : loc + self.matchLen].upper() == self.match:

2674 return loc + self.matchLen, self.returnString

2675 raise ParseException(instring, loc, self.errmsg, self)

2676

2677

2678class CaselessKeyword(Keyword):

2679 """

2680 Caseless version of :class:`Keyword`.

2681

2682 Example::

2683

2684 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2685 # -> ['CMD', 'CMD']

2686

2687 (Contrast with example for :class:`CaselessLiteral`.)

2688 """

2689

2690 def __init__(

2691 self,

2692 match_string: str = "",

2693 ident_chars: typing.Optional[str] = None,

2694 *,

2695 matchString: str = "",

2696 identChars: typing.Optional[str] = None,

2697 ) -> None:

2698 identChars = identChars or ident_chars

2699 match_string = matchString or match_string

2700 super().__init__(match_string, identChars, caseless=True)

2701

2702

2703class CloseMatch(Token):

2704 """A variation on :class:`Literal` which matches "close" matches,

2705 that is, strings with at most 'n' mismatching characters.

2706 :class:`CloseMatch` takes parameters:

2707

2708 - ``match_string`` - string to be matched

2709 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2710 - ``max_mismatches`` - (``default=1``) maximum number of

2711 mismatches allowed to count as a match

2712

2713 The results from a successful parse will contain the matched text

2714 from the input string and the following named results:

2715

2716 - ``mismatches`` - a list of the positions within the

2717 match_string where mismatches were found

2718 - ``original`` - the original match_string used to compare

2719 against the input string

2720

2721 If ``mismatches`` is an empty list, then the match was an exact

2722 match.

2723

2724 Example::

2725

2726 patt = CloseMatch("ATCATCGAATGGA")

2727 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2728 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2729

2730 # exact match

2731 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2732

2733 # close match allowing up to 2 mismatches

2734 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2735 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2736 """

2737

2738 def __init__(

2739 self,

2740 match_string: str,

2741 max_mismatches: typing.Optional[int] = None,

2742 *,

2743 maxMismatches: int = 1,

2744 caseless=False,

2745 ) -> None:

2746 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2747 super().__init__()

2748 self.match_string = match_string

2749 self.maxMismatches = maxMismatches

2750 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2751 self.caseless = caseless

2752 self.mayIndexError = False

2753 self._may_return_empty = False

2754

2755 def _generateDefaultName(self) -> str:

2756 return f"{type(self).__name__}:{self.match_string!r}"

2757

2758 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2759 start = loc

2760 instrlen = len(instring)

2761 maxloc = start + len(self.match_string)

2762

2763 if maxloc <= instrlen:

2764 match_string = self.match_string

2765 match_stringloc = 0

2766 mismatches = []

2767 maxMismatches = self.maxMismatches

2768

2769 for match_stringloc, s_m in enumerate(

2770 zip(instring[loc:maxloc], match_string)

2771 ):

2772 src, mat = s_m

2773 if self.caseless:

2774 src, mat = src.lower(), mat.lower()

2775

2776 if src != mat:

2777 mismatches.append(match_stringloc)

2778 if len(mismatches) > maxMismatches:

2779 break

2780 else:

2781 loc = start + match_stringloc + 1

2782 results = ParseResults([instring[start:loc]])

2783 results["original"] = match_string

2784 results["mismatches"] = mismatches

2785 return loc, results

2786

2787 raise ParseException(instring, loc, self.errmsg, self)

2788

2789

2790class Word(Token):

2791 """Token for matching words composed of allowed character sets.

2792

2793 Parameters:

2794

2795 - ``init_chars`` - string of all characters that should be used to

2796 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2797 if ``body_chars`` is also specified, then this is the string of

2798 initial characters

2799 - ``body_chars`` - string of characters that

2800 can be used for matching after a matched initial character as

2801 given in ``init_chars``; if omitted, same as the initial characters

2802 (default=``None``)

2803 - ``min`` - minimum number of characters to match (default=1)

2804 - ``max`` - maximum number of characters to match (default=0)

2805 - ``exact`` - exact number of characters to match (default=0)

2806 - ``as_keyword`` - match as a keyword (default=``False``)

2807 - ``exclude_chars`` - characters that might be

2808 found in the input ``body_chars`` string but which should not be

2809 accepted for matching ;useful to define a word of all

2810 printables except for one or two characters, for instance

2811 (default=``None``)

2812

2813 :class:`srange` is useful for defining custom character set strings

2814 for defining :class:`Word` expressions, using range notation from

2815 regular expression character sets.

2816

2817 A common mistake is to use :class:`Word` to match a specific literal

2818 string, as in ``Word("Address")``. Remember that :class:`Word`

2819 uses the string argument to define *sets* of matchable characters.

2820 This expression would match "Add", "AAA", "dAred", or any other word

2821 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2822 exact literal string, use :class:`Literal` or :class:`Keyword`.

2823

2824 pyparsing includes helper strings for building Words:

2825

2826 - :class:`alphas`

2827 - :class:`nums`

2828 - :class:`alphanums`

2829 - :class:`hexnums`

2830 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2831 - accented, tilded, umlauted, etc.)

2832 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2833 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2834 - :class:`printables` (any non-whitespace character)

2835

2836 ``alphas``, ``nums``, and ``printables`` are also defined in several

2837 Unicode sets - see :class:`pyparsing_unicode`.

2838

2839 Example::

2840

2841 # a word composed of digits

2842 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2843

2844 # a word with a leading capital, and zero or more lowercase

2845 capitalized_word = Word(alphas.upper(), alphas.lower())

2846

2847 # hostnames are alphanumeric, with leading alpha, and '-'

2848 hostname = Word(alphas, alphanums + '-')

2849

2850 # roman numeral (not a strict parser, accepts invalid mix of characters)

2851 roman = Word("IVXLCDM")

2852

2853 # any string of non-whitespace characters, except for ','

2854 csv_value = Word(printables, exclude_chars=",")

2855

2856 :raises ValueError: If ``min`` and ``max`` are both specified

2857 and the test ``min <= max`` fails.

2858

2859 .. versionchanged:: 3.1.0

2860 Raises :exc:`ValueError` if ``min`` > ``max``.

2861 """

2862

2863 def __init__(

2864 self,

2865 init_chars: str = "",

2866 body_chars: typing.Optional[str] = None,

2867 min: int = 1,

2868 max: int = 0,

2869 exact: int = 0,

2870 as_keyword: bool = False,

2871 exclude_chars: typing.Optional[str] = None,

2872 *,

2873 initChars: typing.Optional[str] = None,

2874 bodyChars: typing.Optional[str] = None,

2875 asKeyword: bool = False,

2876 excludeChars: typing.Optional[str] = None,

2877 ) -> None:

2878 initChars = initChars or init_chars

2879 bodyChars = bodyChars or body_chars

2880 asKeyword = asKeyword or as_keyword

2881 excludeChars = excludeChars or exclude_chars

2882 super().__init__()

2883 if not initChars:

2884 raise ValueError(

2885 f"invalid {type(self).__name__}, initChars cannot be empty string"

2886 )

2887

2888 initChars_set = set(initChars)

2889 if excludeChars:

2890 excludeChars_set = set(excludeChars)

2891 initChars_set -= excludeChars_set

2892 if bodyChars:

2893 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2894 self.initChars = initChars_set

2895 self.initCharsOrig = "".join(sorted(initChars_set))

2896

2897 if bodyChars:

2898 self.bodyChars = set(bodyChars)

2899 self.bodyCharsOrig = "".join(sorted(bodyChars))

2900 else:

2901 self.bodyChars = initChars_set

2902 self.bodyCharsOrig = self.initCharsOrig

2903

2904 self.maxSpecified = max > 0

2905

2906 if min < 1:

2907 raise ValueError(

2908 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2909 )

2910

2911 if self.maxSpecified and min > max:

2912 raise ValueError(

2913 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2914 )

2915

2916 self.minLen = min

2917

2918 if max > 0:

2919 self.maxLen = max

2920 else:

2921 self.maxLen = _MAX_INT

2922

2923 if exact > 0:

2924 min = max = exact

2925 self.maxLen = exact

2926 self.minLen = exact

2927

2928 self.errmsg = f"Expected {self.name}"

2929 self.mayIndexError = False

2930 self.asKeyword = asKeyword

2931 if self.asKeyword:

2932 self.errmsg += " as a keyword"

2933

2934 # see if we can make a regex for this Word

2935 if " " not in (self.initChars | self.bodyChars):

2936 if len(self.initChars) == 1:

2937 re_leading_fragment = re.escape(self.initCharsOrig)

2938 else:

2939 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2940

2941 if self.bodyChars == self.initChars:

2942 if max == 0 and self.minLen == 1:

2943 repeat = "+"

2944 elif max == 1:

2945 repeat = ""

2946 else:

2947 if self.minLen != self.maxLen:

2948 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2949 else:

2950 repeat = f"{{{self.minLen}}}"

2951 self.reString = f"{re_leading_fragment}{repeat}"

2952 else:

2953 if max == 1:

2954 re_body_fragment = ""

2955 repeat = ""

2956 else:

2957 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2958 if max == 0 and self.minLen == 1:

2959 repeat = "*"

2960 elif max == 2:

2961 repeat = "?" if min <= 1 else ""

2962 else:

2963 if min != max:

2964 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2965 else:

2966 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2967

2968 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2969

2970 if self.asKeyword:

2971 self.reString = rf"\b{self.reString}\b"

2972

2973 try:

2974 self.re = re.compile(self.reString)

2975 except re.error:

2976 self.re = None # type: ignore[assignment]

2977 else:

2978 self.re_match = self.re.match

2979 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

2980

2981 def _generateDefaultName(self) -> str:

2982 def charsAsStr(s):

2983 max_repr_len = 16

2984 s = _collapse_string_to_ranges(s, re_escape=False)

2985

2986 if len(s) > max_repr_len:

2987 return s[: max_repr_len - 3] + "..."

2988

2989 return s

2990

2991 if self.initChars != self.bodyChars:

2992 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2993 else:

2994 base = f"W:({charsAsStr(self.initChars)})"

2995

2996 # add length specification

2997 if self.minLen > 1 or self.maxLen != _MAX_INT:

2998 if self.minLen == self.maxLen:

2999 if self.minLen == 1:

3000 return base[2:]

3001 else:

3002 return base + f"{{{self.minLen}}}"

3003 elif self.maxLen == _MAX_INT:

3004 return base + f"{{{self.minLen},...}}"

3005 else:

3006 return base + f"{{{self.minLen},{self.maxLen}}}"

3007 return base

3008

3009 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3010 if instring[loc] not in self.initChars:

3011 raise ParseException(instring, loc, self.errmsg, self)

3012

3013 start = loc

3014 loc += 1

3015 instrlen = len(instring)

3016 body_chars: set[str] = self.bodyChars

3017 maxloc = start + self.maxLen

3018 maxloc = min(maxloc, instrlen)

3019 while loc < maxloc and instring[loc] in body_chars:

3020 loc += 1

3021

3022 throw_exception = False

3023 if loc - start < self.minLen:

3024 throw_exception = True

3025 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

3026 throw_exception = True

3027 elif self.asKeyword and (

3028 (start > 0 and instring[start - 1] in body_chars)

3029 or (loc < instrlen and instring[loc] in body_chars)

3030 ):

3031 throw_exception = True

3032

3033 if throw_exception:

3034 raise ParseException(instring, loc, self.errmsg, self)

3035

3036 return loc, instring[start:loc]

3037

3038 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3039 result = self.re_match(instring, loc)

3040 if not result:

3041 raise ParseException(instring, loc, self.errmsg, self)

3042

3043 loc = result.end()

3044 return loc, result.group()

3045

3046

3047class Char(Word):

3048 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3049 when defining a match of any single character in a string of

3050 characters.

3051 """

3052

3053 def __init__(

3054 self,

3055 charset: str,

3056 as_keyword: bool = False,

3057 exclude_chars: typing.Optional[str] = None,

3058 *,

3059 asKeyword: bool = False,

3060 excludeChars: typing.Optional[str] = None,

3061 ) -> None:

3062 asKeyword = asKeyword or as_keyword

3063 excludeChars = excludeChars or exclude_chars

3064 super().__init__(

3065 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3066 )

3067

3068

3069class Regex(Token):

3070 r"""Token for matching strings that match a given regular

3071 expression. Defined with string specifying the regular expression in

3072 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3073 If the given regex contains named groups (defined using ``(?P<name>...)``),

3074 these will be preserved as named :class:`ParseResults`.

3075

3076 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3077 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3078 a compiled RE that was compiled using ``regex``.

3079

3080 The parameters ``pattern`` and ``flags`` are passed

3081 to the ``re.compile()`` function as-is. See the Python

3082 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3083 explanation of the acceptable patterns and flags.

3084

3085 Example::

3086

3087 realnum = Regex(r"[+-]?\d+\.\d*")

3088 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3089 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3090

3091 # named fields in a regex will be returned as named results

3092 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3093

3094 # the Regex class will accept re's compiled using the regex module

3095 import regex

3096 parser = pp.Regex(regex.compile(r'[0-9]'))

3097 """

3098

3099 def __init__(

3100 self,

3101 pattern: Any,

3102 flags: Union[re.RegexFlag, int] = 0,

3103 as_group_list: bool = False,

3104 as_match: bool = False,

3105 *,

3106 asGroupList: bool = False,

3107 asMatch: bool = False,

3108 ) -> None:

3109 super().__init__()

3110 asGroupList = asGroupList or as_group_list

3111 asMatch = asMatch or as_match

3112

3113 if isinstance(pattern, str_type):

3114 if not pattern:

3115 raise ValueError("null string passed to Regex; use Empty() instead")

3116

3117 self._re = None

3118 self._may_return_empty = None # type: ignore [assignment]

3119 self.reString = self.pattern = pattern

3120

3121 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3122 self._re = pattern

3123 self._may_return_empty = None # type: ignore [assignment]

3124 self.pattern = self.reString = pattern.pattern

3125

3126 elif callable(pattern):

3127 # defer creating this pattern until we really need it

3128 self.pattern = pattern

3129 self._may_return_empty = None # type: ignore [assignment]

3130 self._re = None

3131

3132 else:

3133 raise TypeError(

3134 "Regex may only be constructed with a string or a compiled RE object,"

3135 " or a callable that takes no arguments and returns a string or a"

3136 " compiled RE object"

3137 )

3138

3139 self.flags = flags

3140 self.errmsg = f"Expected {self.name}"

3141 self.mayIndexError = False

3142 self.asGroupList = asGroupList

3143 self.asMatch = asMatch

3144 if self.asGroupList:

3145 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3146 if self.asMatch:

3147 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3148

3149 @cached_property

3150 def re(self) -> re.Pattern:

3151 if self._re:

3152 return self._re

3153

3154 if callable(self.pattern):

3155 # replace self.pattern with the string returned by calling self.pattern()

3156 self.pattern = cast(Callable[[], str], self.pattern)()

3157

3158 # see if we got a compiled RE back instead of a str - if so, we're done

3159 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3160 self._re = cast(re.Pattern[str], self.pattern)

3161 self.pattern = self.reString = self._re.pattern

3162 return self._re

3163

3164 try:

3165 self._re = re.compile(self.pattern, self.flags)

3166 except re.error:

3167 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3168 else:

3169 self._may_return_empty = self.re.match("", pos=0) is not None

3170 return self._re

3171

3172 @cached_property

3173 def re_match(self) -> Callable[[str, int], Any]:

3174 return self.re.match

3175

3176 @property

3177 def mayReturnEmpty(self):

3178 if self._may_return_empty is None:

3179 # force compile of regex pattern, to set may_return_empty flag

3180 self.re # noqa

3181 return self._may_return_empty

3182

3183 @mayReturnEmpty.setter

3184 def mayReturnEmpty(self, value):

3185 self._may_return_empty = value

3186

3187 def _generateDefaultName(self) -> str:

3188 unescaped = repr(self.pattern).replace("\\\\", "\\")

3189 return f"Re:({unescaped})"

3190

3191 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3192 # explicit check for matching past the length of the string;

3193 # this is done because the re module will not complain about

3194 # a match with `pos > len(instring)`, it will just return ""

3195 if loc > len(instring) and self.mayReturnEmpty:

3196 raise ParseException(instring, loc, self.errmsg, self)

3197

3198 result = self.re_match(instring, loc)

3199 if not result:

3200 raise ParseException(instring, loc, self.errmsg, self)

3201

3202 loc = result.end()

3203 ret = ParseResults(result.group())

3204 d = result.groupdict()

3205

3206 for k, v in d.items():

3207 ret[k] = v

3208

3209 return loc, ret

3210

3211 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3212 if loc > len(instring) and self.mayReturnEmpty:

3213 raise ParseException(instring, loc, self.errmsg, self)

3214

3215 result = self.re_match(instring, loc)

3216 if not result:

3217 raise ParseException(instring, loc, self.errmsg, self)

3218

3219 loc = result.end()

3220 ret = result.groups()

3221 return loc, ret

3222

3223 def parseImplAsMatch(self, instring, loc, do_actions=True):

3224 if loc > len(instring) and self.mayReturnEmpty:

3225 raise ParseException(instring, loc, self.errmsg, self)

3226

3227 result = self.re_match(instring, loc)

3228 if not result:

3229 raise ParseException(instring, loc, self.errmsg, self)

3230

3231 loc = result.end()

3232 ret = result

3233 return loc, ret

3234

3235 def sub(self, repl: str) -> ParserElement:

3236 r"""

3237 Return :class:`Regex` with an attached parse action to transform the parsed

3238 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3239

3240 Example::

3241

3242 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3243 print(make_html.transform_string("h1:main title:"))

3244 # prints "<h1>main title</h1>"

3245 """

3246 if self.asGroupList:

3247 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3248

3249 if self.asMatch and callable(repl):

3250 raise TypeError(

3251 "cannot use sub() with a callable with Regex(as_match=True)"

3252 )

3253

3254 if self.asMatch:

3255

3256 def pa(tokens):

3257 return tokens[0].expand(repl)

3258

3259 else:

3260

3261 def pa(tokens):

3262 return self.re.sub(repl, tokens[0])

3263

3264 return self.add_parse_action(pa)

3265

3266

3267class QuotedString(Token):

3268 r"""

3269 Token for matching strings that are delimited by quoting characters.

3270

3271 Defined with the following parameters:

3272

3273 - ``quote_char`` - string of one or more characters defining the

3274 quote delimiting string

3275 - ``esc_char`` - character to re_escape quotes, typically backslash

3276 (default= ``None``)

3277 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3278 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3279 (default= ``None``)

3280 - ``multiline`` - boolean indicating whether quotes can span

3281 multiple lines (default= ``False``)

3282 - ``unquote_results`` - boolean indicating whether the matched text

3283 should be unquoted (default= ``True``)

3284 - ``end_quote_char`` - string of one or more characters defining the

3285 end of the quote delimited string (default= ``None`` => same as

3286 quote_char)

3287 - ``convert_whitespace_escapes`` - convert escaped whitespace

3288 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3289 (default= ``True``)

3290

3291 .. caution:: ``convert_whitespace_escapes`` has no effect if

3292 ``unquote_results`` is ``False``.

3293

3294 Example::

3295

3296 qs = QuotedString('"')

3297 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3298 complex_qs = QuotedString('{{', end_quote_char='}}')

3299 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3300 sql_qs = QuotedString('"', esc_quote='""')

3301 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3302

3303 prints::

3304

3305 [['This is the quote']]

3306 [['This is the "quote"']]

3307 [['This is the quote with "embedded" quotes']]

3308 """

3309

3310 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3311

3312 def __init__(

3313 self,

3314 quote_char: str = "",

3315 esc_char: typing.Optional[str] = None,

3316 esc_quote: typing.Optional[str] = None,

3317 multiline: bool = False,

3318 unquote_results: bool = True,

3319 end_quote_char: typing.Optional[str] = None,

3320 convert_whitespace_escapes: bool = True,

3321 *,

3322 quoteChar: str = "",

3323 escChar: typing.Optional[str] = None,

3324 escQuote: typing.Optional[str] = None,

3325 unquoteResults: bool = True,

3326 endQuoteChar: typing.Optional[str] = None,

3327 convertWhitespaceEscapes: bool = True,

3328 ) -> None:

3329 super().__init__()

3330 esc_char = escChar or esc_char

3331 esc_quote = escQuote or esc_quote

3332 unquote_results = unquoteResults and unquote_results

3333 end_quote_char = endQuoteChar or end_quote_char

3334 convert_whitespace_escapes = (

3335 convertWhitespaceEscapes and convert_whitespace_escapes

3336 )

3337 quote_char = quoteChar or quote_char

3338

3339 # remove white space from quote chars

3340 quote_char = quote_char.strip()

3341 if not quote_char:

3342 raise ValueError("quote_char cannot be the empty string")

3343

3344 if end_quote_char is None:

3345 end_quote_char = quote_char

3346 else:

3347 end_quote_char = end_quote_char.strip()

3348 if not end_quote_char:

3349 raise ValueError("end_quote_char cannot be the empty string")

3350

3351 self.quote_char: str = quote_char

3352 self.quote_char_len: int = len(quote_char)

3353 self.first_quote_char: str = quote_char[0]

3354 self.end_quote_char: str = end_quote_char

3355 self.end_quote_char_len: int = len(end_quote_char)

3356 self.esc_char: str = esc_char or ""

3357 self.has_esc_char: bool = esc_char is not None

3358 self.esc_quote: str = esc_quote or ""

3359 self.unquote_results: bool = unquote_results

3360 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3361 self.multiline = multiline

3362 self.re_flags = re.RegexFlag(0)

3363

3364 # fmt: off

3365 # build up re pattern for the content between the quote delimiters

3366 inner_pattern: list[str] = []

3367

3368 if esc_quote:

3369 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3370

3371 if esc_char:

3372 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3373

3374 if len(self.end_quote_char) > 1:

3375 inner_pattern.append(

3376 "(?:"

3377 + "|".join(

3378 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3379 for i in range(len(self.end_quote_char) - 1, 0, -1)

3380 )

3381 + ")"

3382 )

3383

3384 if self.multiline:

3385 self.re_flags |= re.MULTILINE | re.DOTALL

3386 inner_pattern.append(

3387 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3388 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3389 )

3390 else:

3391 inner_pattern.append(

3392 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3393 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3394 )

3395

3396 self.pattern = "".join(

3397 [

3398 re.escape(self.quote_char),

3399 "(?:",

3400 '|'.join(inner_pattern),

3401 ")*",

3402 re.escape(self.end_quote_char),

3403 ]

3404 )

3405

3406 if self.unquote_results:

3407 if self.convert_whitespace_escapes:

3408 self.unquote_scan_re = re.compile(

3409 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3410 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3411 rf"|({re.escape(self.esc_char)}.)"

3412 rf"|(\n|.)",

3413 flags=self.re_flags,

3414 )

3415 else:

3416 self.unquote_scan_re = re.compile(

3417 rf"({re.escape(self.esc_char)}.)"

3418 rf"|(\n|.)",

3419 flags=self.re_flags

3420 )

3421 # fmt: on

3422

3423 try:

3424 self.re = re.compile(self.pattern, self.re_flags)

3425 self.reString = self.pattern

3426 self.re_match = self.re.match

3427 except re.error:

3428 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3429

3430 self.errmsg = f"Expected {self.name}"

3431 self.mayIndexError = False

3432 self._may_return_empty = True

3433

3434 def _generateDefaultName(self) -> str:

3435 if self.quote_char == self.end_quote_char and isinstance(

3436 self.quote_char, str_type

3437 ):

3438 return f"string enclosed in {self.quote_char!r}"

3439

3440 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3441

3442 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3443 # check first character of opening quote to see if that is a match

3444 # before doing the more complicated regex match

3445 result = (

3446 instring[loc] == self.first_quote_char

3447 and self.re_match(instring, loc)

3448 or None

3449 )

3450 if not result:

3451 raise ParseException(instring, loc, self.errmsg, self)

3452

3453 # get ending loc and matched string from regex matching result

3454 loc = result.end()

3455 ret = result.group()

3456

3457 def convert_escaped_numerics(s: str) -> str:

3458 if s == "0":

3459 return "\0"

3460 if s.isdigit() and len(s) == 3:

3461 return chr(int(s, base=8))

3462 elif s.startswith(("u", "x")):

3463 return chr(int(s[1:], base=16))

3464 else:

3465 return s

3466

3467 if self.unquote_results:

3468 # strip off quotes

3469 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3470

3471 if isinstance(ret, str_type):

3472 # fmt: off

3473 if self.convert_whitespace_escapes:

3474 # as we iterate over matches in the input string,

3475 # collect from whichever match group of the unquote_scan_re

3476 # regex matches (only 1 group will match at any given time)

3477 ret = "".join(

3478 # match group 1 matches \t, \n, etc.

3479 self.ws_map[match.group(1)] if match.group(1)

3480 # match group 2 matches escaped octal, null, hex, and Unicode

3481 # sequences

3482 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3483 # match group 3 matches escaped characters

3484 else match.group(3)[-1] if match.group(3)

3485 # match group 4 matches any character

3486 else match.group(4)

3487 for match in self.unquote_scan_re.finditer(ret)

3488 )

3489 else:

3490 ret = "".join(

3491 # match group 1 matches escaped characters

3492 match.group(1)[-1] if match.group(1)

3493 # match group 2 matches any character

3494 else match.group(2)

3495 for match in self.unquote_scan_re.finditer(ret)

3496 )

3497 # fmt: on

3498

3499 # replace escaped quotes

3500 if self.esc_quote:

3501 ret = ret.replace(self.esc_quote, self.end_quote_char)

3502

3503 return loc, ret

3504

3505

3506class CharsNotIn(Token):

3507 """Token for matching words composed of characters *not* in a given

3508 set (will include whitespace in matched characters if not listed in

3509 the provided exclusion set - see example). Defined with string

3510 containing all disallowed characters, and an optional minimum,

3511 maximum, and/or exact length. The default value for ``min`` is

3512 1 (a minimum value < 1 is not valid); the default values for

3513 ``max`` and ``exact`` are 0, meaning no maximum or exact

3514 length restriction.

3515

3516 Example::

3517

3518 # define a comma-separated-value as anything that is not a ','

3519 csv_value = CharsNotIn(',')

3520 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3521

3522 prints::

3523

3524 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3525 """

3526

3527 def __init__(

3528 self,

3529 not_chars: str = "",

3530 min: int = 1,

3531 max: int = 0,

3532 exact: int = 0,

3533 *,

3534 notChars: str = "",

3535 ) -> None:

3536 super().__init__()

3537 self.skipWhitespace = False

3538 self.notChars = not_chars or notChars

3539 self.notCharsSet = set(self.notChars)

3540

3541 if min < 1:

3542 raise ValueError(

3543 "cannot specify a minimum length < 1; use"

3544 " Opt(CharsNotIn()) if zero-length char group is permitted"

3545 )

3546

3547 self.minLen = min

3548

3549 if max > 0:

3550 self.maxLen = max

3551 else:

3552 self.maxLen = _MAX_INT

3553

3554 if exact > 0:

3555 self.maxLen = exact

3556 self.minLen = exact

3557

3558 self.errmsg = f"Expected {self.name}"

3559 self._may_return_empty = self.minLen == 0

3560 self.mayIndexError = False

3561

3562 def _generateDefaultName(self) -> str:

3563 not_chars_str = _collapse_string_to_ranges(self.notChars)

3564 if len(not_chars_str) > 16:

3565 return f"!W:({self.notChars[: 16 - 3]}...)"

3566 else:

3567 return f"!W:({self.notChars})"

3568

3569 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3570 notchars = self.notCharsSet

3571 if instring[loc] in notchars:

3572 raise ParseException(instring, loc, self.errmsg, self)

3573

3574 start = loc

3575 loc += 1

3576 maxlen = min(start + self.maxLen, len(instring))

3577 while loc < maxlen and instring[loc] not in notchars:

3578 loc += 1

3579

3580 if loc - start < self.minLen:

3581 raise ParseException(instring, loc, self.errmsg, self)

3582

3583 return loc, instring[start:loc]

3584

3585

3586class White(Token):

3587 """Special matching class for matching whitespace. Normally,

3588 whitespace is ignored by pyparsing grammars. This class is included

3589 when some whitespace structures are significant. Define with

3590 a string containing the whitespace characters to be matched; default

3591 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3592 ``max``, and ``exact`` arguments, as defined for the

3593 :class:`Word` class.

3594 """

3595

3596 whiteStrs = {

3597 " ": "<SP>",

3598 "\t": "<TAB>",

3599 "\n": "<LF>",

3600 "\r": "<CR>",

3601 "\f": "<FF>",

3602 "\u00A0": "<NBSP>",

3603 "\u1680": "<OGHAM_SPACE_MARK>",

3604 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3605 "\u2000": "<EN_QUAD>",

3606 "\u2001": "<EM_QUAD>",

3607 "\u2002": "<EN_SPACE>",

3608 "\u2003": "<EM_SPACE>",

3609 "\u2004": "<THREE-PER-EM_SPACE>",

3610 "\u2005": "<FOUR-PER-EM_SPACE>",

3611 "\u2006": "<SIX-PER-EM_SPACE>",

3612 "\u2007": "<FIGURE_SPACE>",

3613 "\u2008": "<PUNCTUATION_SPACE>",

3614 "\u2009": "<THIN_SPACE>",

3615 "\u200A": "<HAIR_SPACE>",

3616 "\u200B": "<ZERO_WIDTH_SPACE>",

3617 "\u202F": "<NNBSP>",

3618 "\u205F": "<MMSP>",

3619 "\u3000": "<IDEOGRAPHIC_SPACE>",

3620 }

3621

3622 def __init__(

3623 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3624 ) -> None:

3625 super().__init__()

3626 self.matchWhite = ws

3627 self.set_whitespace_chars(

3628 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3629 copy_defaults=True,

3630 )

3631 # self.leave_whitespace()

3632 self._may_return_empty = True

3633 self.errmsg = f"Expected {self.name}"

3634

3635 self.minLen = min

3636

3637 if max > 0:

3638 self.maxLen = max

3639 else:

3640 self.maxLen = _MAX_INT

3641

3642 if exact > 0:

3643 self.maxLen = exact

3644 self.minLen = exact

3645

3646 def _generateDefaultName(self) -> str:

3647 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3648

3649 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3650 if instring[loc] not in self.matchWhite:

3651 raise ParseException(instring, loc, self.errmsg, self)

3652 start = loc

3653 loc += 1

3654 maxloc = start + self.maxLen

3655 maxloc = min(maxloc, len(instring))

3656 while loc < maxloc and instring[loc] in self.matchWhite:

3657 loc += 1

3658

3659 if loc - start < self.minLen:

3660 raise ParseException(instring, loc, self.errmsg, self)

3661

3662 return loc, instring[start:loc]

3663

3664

3665class PositionToken(Token):

3666 def __init__(self) -> None:

3667 super().__init__()

3668 self._may_return_empty = True

3669 self.mayIndexError = False

3670

3671

3672class GoToColumn(PositionToken):

3673 """Token to advance to a specific column of input text; useful for

3674 tabular report scraping.

3675 """

3676

3677 def __init__(self, colno: int) -> None:

3678 super().__init__()

3679 self.col = colno

3680

3681 def preParse(self, instring: str, loc: int) -> int:

3682 if col(loc, instring) == self.col:

3683 return loc

3684

3685 instrlen = len(instring)

3686 if self.ignoreExprs:

3687 loc = self._skipIgnorables(instring, loc)

3688 while (

3689 loc < instrlen

3690 and instring[loc].isspace()

3691 and col(loc, instring) != self.col

3692 ):

3693 loc += 1

3694

3695 return loc

3696

3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3698 thiscol = col(loc, instring)

3699 if thiscol > self.col:

3700 raise ParseException(instring, loc, "Text not in expected column", self)

3701 newloc = loc + self.col - thiscol

3702 ret = instring[loc:newloc]

3703 return newloc, ret

3704

3705

3706class LineStart(PositionToken):

3707 r"""Matches if current position is at the beginning of a line within

3708 the parse string

3709

3710 Example::

3711

3712 test = '''\

3713 AAA this line

3714 AAA and this line

3715 AAA but not this one

3716 B AAA and definitely not this one

3717 '''

3718

3719 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3720 print(t)

3721

3722 prints::

3723

3724 ['AAA', ' this line']

3725 ['AAA', ' and this line']

3726

3727 """

3728

3729 def __init__(self) -> None:

3730 super().__init__()

3731 self.leave_whitespace()

3732 self.orig_whiteChars = set() | self.whiteChars

3733 self.whiteChars.discard("\n")

3734 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3735 self.set_name("start of line")

3736

3737 def preParse(self, instring: str, loc: int) -> int:

3738 if loc == 0:

3739 return loc

3740

3741 ret = self.skipper.preParse(instring, loc)

3742

3743 if "\n" in self.orig_whiteChars:

3744 while instring[ret : ret + 1] == "\n":

3745 ret = self.skipper.preParse(instring, ret + 1)

3746

3747 return ret

3748

3749 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3750 if col(loc, instring) == 1:

3751 return loc, []

3752 raise ParseException(instring, loc, self.errmsg, self)

3753

3754

3755class LineEnd(PositionToken):

3756 """Matches if current position is at the end of a line within the

3757 parse string

3758 """

3759

3760 def __init__(self) -> None:

3761 super().__init__()

3762 self.whiteChars.discard("\n")

3763 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3764 self.set_name("end of line")

3765

3766 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3767 if loc < len(instring):

3768 if instring[loc] == "\n":

3769 return loc + 1, "\n"

3770 else:

3771 raise ParseException(instring, loc, self.errmsg, self)

3772 elif loc == len(instring):

3773 return loc + 1, []

3774 else:

3775 raise ParseException(instring, loc, self.errmsg, self)

3776

3777

3778class StringStart(PositionToken):

3779 """Matches if current position is at the beginning of the parse

3780 string

3781 """

3782

3783 def __init__(self) -> None:

3784 super().__init__()

3785 self.set_name("start of text")

3786

3787 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3788 # see if entire string up to here is just whitespace and ignoreables

3789 if loc != 0 and loc != self.preParse(instring, 0):

3790 raise ParseException(instring, loc, self.errmsg, self)

3791

3792 return loc, []

3793

3794

3795class StringEnd(PositionToken):

3796 """

3797 Matches if current position is at the end of the parse string

3798 """

3799

3800 def __init__(self) -> None:

3801 super().__init__()

3802 self.set_name("end of text")

3803

3804 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3805 if loc < len(instring):

3806 raise ParseException(instring, loc, self.errmsg, self)

3807 if loc == len(instring):

3808 return loc + 1, []

3809 if loc > len(instring):

3810 return loc, []

3811

3812 raise ParseException(instring, loc, self.errmsg, self)

3813

3814

3815class WordStart(PositionToken):

3816 """Matches if the current position is at the beginning of a

3817 :class:`Word`, and is not preceded by any character in a given

3818 set of ``word_chars`` (default= ``printables``). To emulate the

3819 ``\b`` behavior of regular expressions, use

3820 ``WordStart(alphanums)``. ``WordStart`` will also match at

3821 the beginning of the string being parsed, or at the beginning of

3822 a line.

3823 """

3824

3825 def __init__(

3826 self, word_chars: str = printables, *, wordChars: str = printables

3827 ) -> None:

3828 wordChars = word_chars if wordChars == printables else wordChars

3829 super().__init__()

3830 self.wordChars = set(wordChars)

3831 self.set_name("start of a word")

3832

3833 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3834 if loc != 0:

3835 if (

3836 instring[loc - 1] in self.wordChars

3837 or instring[loc] not in self.wordChars

3838 ):

3839 raise ParseException(instring, loc, self.errmsg, self)

3840 return loc, []

3841

3842

3843class WordEnd(PositionToken):

3844 """Matches if the current position is at the end of a :class:`Word`,

3845 and is not followed by any character in a given set of ``word_chars``

3846 (default= ``printables``). To emulate the ``\b`` behavior of

3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3848 will also match at the end of the string being parsed, or at the end

3849 of a line.

3850 """

3851

3852 def __init__(

3853 self, word_chars: str = printables, *, wordChars: str = printables

3854 ) -> None:

3855 wordChars = word_chars if wordChars == printables else wordChars

3856 super().__init__()

3857 self.wordChars = set(wordChars)

3858 self.skipWhitespace = False

3859 self.set_name("end of a word")

3860

3861 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3862 instrlen = len(instring)

3863 if instrlen > 0 and loc < instrlen:

3864 if (

3865 instring[loc] in self.wordChars

3866 or instring[loc - 1] not in self.wordChars

3867 ):

3868 raise ParseException(instring, loc, self.errmsg, self)

3869 return loc, []

3870

3871

3872class Tag(Token):

3873 """

3874 A meta-element for inserting a named result into the parsed

3875 tokens that may be checked later in a parse action or while

3876 processing the parsed results. Accepts an optional tag value,

3877 defaulting to `True`.

3878

3879 Example::

3880

3881 end_punc = "." | ("!" + Tag("enthusiastic"))

3882 greeting = "Hello," + Word(alphas) + end_punc

3883

3884 result = greeting.parse_string("Hello, World.")

3885 print(result.dump())

3886

3887 result = greeting.parse_string("Hello, World!")

3888 print(result.dump())

3889

3890 prints::

3891

3892 ['Hello,', 'World', '.']

3893

3894 ['Hello,', 'World', '!']

3895 - enthusiastic: True

3896

3897 .. versionadded:: 3.1.0

3898 """

3899

3900 def __init__(self, tag_name: str, value: Any = True) -> None:

3901 super().__init__()

3902 self._may_return_empty = True

3903 self.mayIndexError = False

3904 self.leave_whitespace()

3905 self.tag_name = tag_name

3906 self.tag_value = value

3907 self.add_parse_action(self._add_tag)

3908 self.show_in_diagram = False

3909

3910 def _add_tag(self, tokens: ParseResults):

3911 tokens[self.tag_name] = self.tag_value

3912

3913 def _generateDefaultName(self) -> str:

3914 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

3915

3916

3917class ParseExpression(ParserElement):

3918 """Abstract subclass of ParserElement, for combining and

3919 post-processing parsed tokens.

3920 """

3921

3922 def __init__(

3923 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

3924 ) -> None:

3925 super().__init__(savelist)

3926 self.exprs: list[ParserElement]

3927 if isinstance(exprs, _generatorType):

3928 exprs = list(exprs)

3929

3930 if isinstance(exprs, str_type):

3931 self.exprs = [self._literalStringClass(exprs)]

3932 elif isinstance(exprs, ParserElement):

3933 self.exprs = [exprs]

3934 elif isinstance(exprs, Iterable):

3935 exprs = list(exprs)

3936 # if sequence of strings provided, wrap with Literal

3937 if any(isinstance(expr, str_type) for expr in exprs):

3938 exprs = (

3939 self._literalStringClass(e) if isinstance(e, str_type) else e

3940 for e in exprs

3941 )

3942 self.exprs = list(exprs)

3943 else:

3944 try:

3945 self.exprs = list(exprs)

3946 except TypeError:

3947 self.exprs = [exprs]

3948 self.callPreparse = False

3949

3950 def recurse(self) -> list[ParserElement]:

3951 return self.exprs[:]

3952

3953 def append(self, other) -> ParserElement:

3954 self.exprs.append(other)

3955 self._defaultName = None

3956 return self

3957

3958 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3959 """

3960 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3961 all contained expressions.

3962 """

3963 super().leave_whitespace(recursive)

3964

3965 if recursive:

3966 self.exprs = [e.copy() for e in self.exprs]

3967 for e in self.exprs:

3968 e.leave_whitespace(recursive)

3969 return self

3970

3971 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3972 """

3973 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3974 all contained expressions.

3975 """

3976 super().ignore_whitespace(recursive)

3977 if recursive:

3978 self.exprs = [e.copy() for e in self.exprs]

3979 for e in self.exprs:

3980 e.ignore_whitespace(recursive)

3981 return self

3982

3983 def ignore(self, other) -> ParserElement:

3984 if isinstance(other, Suppress):

3985 if other not in self.ignoreExprs:

3986 super().ignore(other)

3987 for e in self.exprs:

3988 e.ignore(self.ignoreExprs[-1])

3989 else:

3990 super().ignore(other)

3991 for e in self.exprs:

3992 e.ignore(self.ignoreExprs[-1])

3993 return self

3994

3995 def _generateDefaultName(self) -> str:

3996 return f"{type(self).__name__}:({self.exprs})"

3997

3998 def streamline(self) -> ParserElement:

3999 if self.streamlined:

4000 return self

4001

4002 super().streamline()

4003

4004 for e in self.exprs:

4005 e.streamline()

4006

4007 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

4008 # but only if there are no parse actions or resultsNames on the nested And's

4009 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

4010 if len(self.exprs) == 2:

4011 other = self.exprs[0]

4012 if (

4013 isinstance(other, self.__class__)

4014 and not other.parseAction

4015 and other.resultsName is None

4016 and not other.debug

4017 ):

4018 self.exprs = other.exprs[:] + [self.exprs[1]]

4019 self._defaultName = None

4020 self._may_return_empty |= other.mayReturnEmpty

4021 self.mayIndexError |= other.mayIndexError

4022

4023 other = self.exprs[-1]

4024 if (

4025 isinstance(other, self.__class__)

4026 and not other.parseAction

4027 and other.resultsName is None

4028 and not other.debug

4029 ):

4030 self.exprs = self.exprs[:-1] + other.exprs[:]

4031 self._defaultName = None

4032 self._may_return_empty |= other.mayReturnEmpty

4033 self.mayIndexError |= other.mayIndexError

4034

4035 self.errmsg = f"Expected {self}"

4036

4037 return self

4038

4039 def validate(self, validateTrace=None) -> None:

4040 warnings.warn(

4041 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4042 DeprecationWarning,

4043 stacklevel=2,

4044 )

4045 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4046 for e in self.exprs:

4047 e.validate(tmp)

4048 self._checkRecursion([])

4049

4050 def copy(self) -> ParserElement:

4051 ret = super().copy()

4052 ret = typing.cast(ParseExpression, ret)

4053 ret.exprs = [e.copy() for e in self.exprs]

4054 return ret

4055

4056 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4057 if not (

4058 __diag__.warn_ungrouped_named_tokens_in_collection

4059 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4060 not in self.suppress_warnings_

4061 ):

4062 return super()._setResultsName(name, list_all_matches)

4063

4064 for e in self.exprs:

4065 if (

4066 isinstance(e, ParserElement)

4067 and e.resultsName

4068 and (

4069 Diagnostics.warn_ungrouped_named_tokens_in_collection

4070 not in e.suppress_warnings_

4071 )

4072 ):

4073 warning = (

4074 "warn_ungrouped_named_tokens_in_collection:"

4075 f" setting results name {name!r} on {type(self).__name__} expression"

4076 f" collides with {e.resultsName!r} on contained expression"

4077 )

4078 warnings.warn(warning, stacklevel=3)

4079 break

4080

4081 return super()._setResultsName(name, list_all_matches)

4082

4083 # Compatibility synonyms

4084 # fmt: off

4085 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4086 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4087 # fmt: on

4088

4089

4090class And(ParseExpression):

4091 """

4092 Requires all given :class:`ParserElement` s to be found in the given order.

4093 Expressions may be separated by whitespace.

4094 May be constructed using the ``'+'`` operator.

4095 May also be constructed using the ``'-'`` operator, which will

4096 suppress backtracking.

4097

4098 Example::

4099

4100 integer = Word(nums)

4101 name_expr = Word(alphas)[1, ...]

4102

4103 expr = And([integer("id"), name_expr("name"), integer("age")])

4104 # more easily written as:

4105 expr = integer("id") + name_expr("name") + integer("age")

4106 """

4107

4108 class _ErrorStop(Empty):

4109 def __init__(self, *args, **kwargs) -> None:

4110 super().__init__(*args, **kwargs)

4111 self.leave_whitespace()

4112

4113 def _generateDefaultName(self) -> str:

4114 return "-"

4115

4116 def __init__(

4117 self,

4118 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4119 savelist: bool = True,

4120 ) -> None:

4121 # instantiate exprs as a list, converting strs to ParserElements

4122 exprs: list[ParserElement] = [

4123 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4124 ]

4125

4126 # convert any Ellipsis elements to SkipTo

4127 if Ellipsis in exprs:

4128

4129 # Ellipsis cannot be the last element

4130 if exprs[-1] is Ellipsis:

4131 raise Exception("cannot construct And with sequence ending in ...")

4132

4133 tmp: list[ParserElement] = []

4134 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4135 if cur_expr is Ellipsis:

4136 tmp.append(SkipTo(next_expr)("_skipped*"))

4137 else:

4138 tmp.append(cur_expr)

4139

4140 exprs[:-1] = tmp

4141

4142 super().__init__(exprs, savelist)

4143 if self.exprs:

4144 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4145 if not isinstance(self.exprs[0], White):

4146 self.set_whitespace_chars(

4147 self.exprs[0].whiteChars,

4148 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4149 )

4150 self.skipWhitespace = self.exprs[0].skipWhitespace

4151 else:

4152 self.skipWhitespace = False

4153 else:

4154 self._may_return_empty = True

4155 self.callPreparse = True

4156

4157 def streamline(self) -> ParserElement:

4158 # collapse any _PendingSkip's

4159 if self.exprs and any(

4160 isinstance(e, ParseExpression)

4161 and e.exprs

4162 and isinstance(e.exprs[-1], _PendingSkip)

4163 for e in self.exprs[:-1]

4164 ):

4165 deleted_expr_marker = NoMatch()

4166 for i, e in enumerate(self.exprs[:-1]):

4167 if e is deleted_expr_marker:

4168 continue

4169 if (

4170 isinstance(e, ParseExpression)

4171 and e.exprs

4172 and isinstance(e.exprs[-1], _PendingSkip)

4173 ):

4174 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4175 self.exprs[i + 1] = deleted_expr_marker

4176 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4177

4178 super().streamline()

4179

4180 # link any IndentedBlocks to the prior expression

4181 prev: ParserElement

4182 cur: ParserElement

4183 for prev, cur in zip(self.exprs, self.exprs[1:]):

4184 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4185 # (but watch out for recursive grammar)

4186 seen = set()

4187 while True:

4188 if id(cur) in seen:

4189 break

4190 seen.add(id(cur))

4191 if isinstance(cur, IndentedBlock):

4192 prev.add_parse_action(

4193 lambda s, l, t, cur_=cur: setattr(

4194 cur_, "parent_anchor", col(l, s)

4195 )

4196 )

4197 break

4198 subs = cur.recurse()

4199 next_first = next(iter(subs), None)

4200 if next_first is None:

4201 break

4202 cur = typing.cast(ParserElement, next_first)

4203

4204 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4205 return self

4206

4207 def parseImpl(self, instring, loc, do_actions=True):

4208 # pass False as callPreParse arg to _parse for first element, since we already

4209 # pre-parsed the string as part of our And pre-parsing

4210 loc, resultlist = self.exprs[0]._parse(

4211 instring, loc, do_actions, callPreParse=False

4212 )

4213 errorStop = False

4214 for e in self.exprs[1:]:

4215 # if isinstance(e, And._ErrorStop):

4216 if type(e) is And._ErrorStop:

4217 errorStop = True

4218 continue

4219 if errorStop:

4220 try:

4221 loc, exprtokens = e._parse(instring, loc, do_actions)

4222 except ParseSyntaxException:

4223 raise

4224 except ParseBaseException as pe:

4225 pe.__traceback__ = None

4226 raise ParseSyntaxException._from_exception(pe)

4227 except IndexError:

4228 raise ParseSyntaxException(

4229 instring, len(instring), self.errmsg, self

4230 )

4231 else:

4232 loc, exprtokens = e._parse(instring, loc, do_actions)

4233 resultlist += exprtokens

4234 return loc, resultlist

4235

4236 def __iadd__(self, other):

4237 if isinstance(other, str_type):

4238 other = self._literalStringClass(other)

4239 if not isinstance(other, ParserElement):

4240 return NotImplemented

4241 return self.append(other) # And([self, other])

4242

4243 def _checkRecursion(self, parseElementList):

4244 subRecCheckList = parseElementList[:] + [self]

4245 for e in self.exprs:

4246 e._checkRecursion(subRecCheckList)

4247 if not e.mayReturnEmpty:

4248 break

4249

4250 def _generateDefaultName(self) -> str:

4251 inner = " ".join(str(e) for e in self.exprs)

4252 # strip off redundant inner {}'s

4253 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4254 inner = inner[1:-1]

4255 return f"{{{inner}}}"

4256

4257

4258class Or(ParseExpression):

4259 """Requires that at least one :class:`ParserElement` is found. If

4260 two expressions match, the expression that matches the longest

4261 string will be used. May be constructed using the ``'^'``

4262 operator.

4263

4264 Example::

4265

4266 # construct Or using '^' operator

4267

4268 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4269 print(number.search_string("123 3.1416 789"))

4270

4271 prints::

4272

4273 [['123'], ['3.1416'], ['789']]

4274 """

4275

4276 def __init__(

4277 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4278 ) -> None:

4279 super().__init__(exprs, savelist)

4280 if self.exprs:

4281 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4282 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4283 else:

4284 self._may_return_empty = True

4285

4286 def streamline(self) -> ParserElement:

4287 super().streamline()

4288 if self.exprs:

4289 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4290 self.saveAsList = any(e.saveAsList for e in self.exprs)

4291 self.skipWhitespace = all(

4292 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4293 )

4294 else:

4295 self.saveAsList = False

4296 return self

4297

4298 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4299 maxExcLoc = -1

4300 maxException = None

4301 matches: list[tuple[int, ParserElement]] = []

4302 fatals: list[ParseFatalException] = []

4303 if all(e.callPreparse for e in self.exprs):

4304 loc = self.preParse(instring, loc)

4305 for e in self.exprs:

4306 try:

4307 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4308 except ParseFatalException as pfe:

4309 pfe.__traceback__ = None

4310 pfe.parser_element = e

4311 fatals.append(pfe)

4312 maxException = None

4313 maxExcLoc = -1

4314 except ParseException as err:

4315 if not fatals:

4316 err.__traceback__ = None

4317 if err.loc > maxExcLoc:

4318 maxException = err

4319 maxExcLoc = err.loc

4320 except IndexError:

4321 if len(instring) > maxExcLoc:

4322 maxException = ParseException(

4323 instring, len(instring), e.errmsg, self

4324 )

4325 maxExcLoc = len(instring)

4326 else:

4327 # save match among all matches, to retry longest to shortest

4328 matches.append((loc2, e))

4329

4330 if matches:

4331 # re-evaluate all matches in descending order of length of match, in case attached actions

4332 # might change whether or how much they match of the input.

4333 matches.sort(key=itemgetter(0), reverse=True)

4334

4335 if not do_actions:

4336 # no further conditions or parse actions to change the selection of

4337 # alternative, so the first match will be the best match

4338 best_expr = matches[0][1]

4339 return best_expr._parse(instring, loc, do_actions)

4340

4341 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4342 for loc1, expr1 in matches:

4343 if loc1 <= longest[0]:

4344 # already have a longer match than this one will deliver, we are done

4345 return longest

4346

4347 try:

4348 loc2, toks = expr1._parse(instring, loc, do_actions)

4349 except ParseException as err:

4350 err.__traceback__ = None

4351 if err.loc > maxExcLoc:

4352 maxException = err

4353 maxExcLoc = err.loc

4354 else:

4355 if loc2 >= loc1:

4356 return loc2, toks

4357 # didn't match as much as before

4358 elif loc2 > longest[0]:

4359 longest = loc2, toks

4360

4361 if longest != (-1, None):

4362 return longest

4363

4364 if fatals:

4365 if len(fatals) > 1:

4366 fatals.sort(key=lambda e: -e.loc)

4367 if fatals[0].loc == fatals[1].loc:

4368 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4369 max_fatal = fatals[0]

4370 raise max_fatal

4371

4372 if maxException is not None:

4373 # infer from this check that all alternatives failed at the current position

4374 # so emit this collective error message instead of any single error message

4375 parse_start_loc = self.preParse(instring, loc)

4376 if maxExcLoc == parse_start_loc:

4377 maxException.msg = self.errmsg or ""

4378 raise maxException

4379

4380 raise ParseException(instring, loc, "no defined alternatives to match", self)

4381

4382 def __ixor__(self, other):

4383 if isinstance(other, str_type):

4384 other = self._literalStringClass(other)

4385 if not isinstance(other, ParserElement):

4386 return NotImplemented

4387 return self.append(other) # Or([self, other])

4388

4389 def _generateDefaultName(self) -> str:

4390 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4391

4392 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4393 if (

4394 __diag__.warn_multiple_tokens_in_named_alternation

4395 and Diagnostics.warn_multiple_tokens_in_named_alternation

4396 not in self.suppress_warnings_

4397 ):

4398 if any(

4399 isinstance(e, And)

4400 and Diagnostics.warn_multiple_tokens_in_named_alternation

4401 not in e.suppress_warnings_

4402 for e in self.exprs

4403 ):

4404 warning = (

4405 "warn_multiple_tokens_in_named_alternation:"

4406 f" setting results name {name!r} on {type(self).__name__} expression"

4407 " will return a list of all parsed tokens in an And alternative,"

4408 " in prior versions only the first token was returned; enclose"

4409 " contained argument in Group"

4410 )

4411 warnings.warn(warning, stacklevel=3)

4412

4413 return super()._setResultsName(name, list_all_matches)

4414

4415

4416class MatchFirst(ParseExpression):

4417 """Requires that at least one :class:`ParserElement` is found. If

4418 more than one expression matches, the first one listed is the one that will

4419 match. May be constructed using the ``'|'`` operator.

4420

4421 Example::

4422

4423 # construct MatchFirst using '|' operator

4424

4425 # watch the order of expressions to match

4426 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4427 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4428

4429 # put more selective expression first

4430 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4431 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4432 """

4433

4434 def __init__(

4435 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4436 ) -> None:

4437 super().__init__(exprs, savelist)

4438 if self.exprs:

4439 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4440 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4441 else:

4442 self._may_return_empty = True

4443

4444 def streamline(self) -> ParserElement:

4445 if self.streamlined:

4446 return self

4447

4448 super().streamline()

4449 if self.exprs:

4450 self.saveAsList = any(e.saveAsList for e in self.exprs)

4451 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4452 self.skipWhitespace = all(

4453 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4454 )

4455 else:

4456 self.saveAsList = False

4457 self._may_return_empty = True

4458 return self

4459

4460 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4461 maxExcLoc = -1

4462 maxException = None

4463

4464 for e in self.exprs:

4465 try:

4466 return e._parse(instring, loc, do_actions)

4467 except ParseFatalException as pfe:

4468 pfe.__traceback__ = None

4469 pfe.parser_element = e

4470 raise

4471 except ParseException as err:

4472 if err.loc > maxExcLoc:

4473 maxException = err

4474 maxExcLoc = err.loc

4475 except IndexError:

4476 if len(instring) > maxExcLoc:

4477 maxException = ParseException(

4478 instring, len(instring), e.errmsg, self

4479 )

4480 maxExcLoc = len(instring)

4481

4482 if maxException is not None:

4483 # infer from this check that all alternatives failed at the current position

4484 # so emit this collective error message instead of any individual error message

4485 parse_start_loc = self.preParse(instring, loc)

4486 if maxExcLoc == parse_start_loc:

4487 maxException.msg = self.errmsg or ""

4488 raise maxException

4489

4490 raise ParseException(instring, loc, "no defined alternatives to match", self)

4491

4492 def __ior__(self, other):

4493 if isinstance(other, str_type):

4494 other = self._literalStringClass(other)

4495 if not isinstance(other, ParserElement):

4496 return NotImplemented

4497 return self.append(other) # MatchFirst([self, other])

4498

4499 def _generateDefaultName(self) -> str:

4500 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4501

4502 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4503 if (

4504 __diag__.warn_multiple_tokens_in_named_alternation

4505 and Diagnostics.warn_multiple_tokens_in_named_alternation

4506 not in self.suppress_warnings_

4507 ):

4508 if any(

4509 isinstance(e, And)

4510 and Diagnostics.warn_multiple_tokens_in_named_alternation

4511 not in e.suppress_warnings_

4512 for e in self.exprs

4513 ):

4514 warning = (

4515 "warn_multiple_tokens_in_named_alternation:"

4516 f" setting results name {name!r} on {type(self).__name__} expression"

4517 " will return a list of all parsed tokens in an And alternative,"

4518 " in prior versions only the first token was returned; enclose"

4519 " contained argument in Group"

4520 )

4521 warnings.warn(warning, stacklevel=3)

4522

4523 return super()._setResultsName(name, list_all_matches)

4524

4525

4526class Each(ParseExpression):

4527 """Requires all given :class:`ParserElement` s to be found, but in

4528 any order. Expressions may be separated by whitespace.

4529

4530 May be constructed using the ``'&'`` operator.

4531

4532 Example::

4533

4534 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4535 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4536 integer = Word(nums)

4537 shape_attr = "shape:" + shape_type("shape")

4538 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4539 color_attr = "color:" + color("color")

4540 size_attr = "size:" + integer("size")

4541

4542 # use Each (using operator '&') to accept attributes in any order

4543 # (shape and posn are required, color and size are optional)

4544 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4545

4546 shape_spec.run_tests('''

4547 shape: SQUARE color: BLACK posn: 100, 120

4548 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4549 color:GREEN size:20 shape:TRIANGLE posn:20,40

4550 '''

4551 )

4552

4553 prints::

4554

4555 shape: SQUARE color: BLACK posn: 100, 120

4556 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4557 - color: BLACK

4558 - posn: ['100', ',', '120']

4559 - x: 100

4560 - y: 120

4561 - shape: SQUARE

4562

4563

4564 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4565 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4566 - color: BLUE

4567 - posn: ['50', ',', '80']

4568 - x: 50

4569 - y: 80

4570 - shape: CIRCLE

4571 - size: 50

4572

4573

4574 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4575 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4576 - color: GREEN

4577 - posn: ['20', ',', '40']

4578 - x: 20

4579 - y: 40

4580 - shape: TRIANGLE

4581 - size: 20

4582 """

4583

4584 def __init__(

4585 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

4586 ) -> None:

4587 super().__init__(exprs, savelist)

4588 if self.exprs:

4589 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4590 else:

4591 self._may_return_empty = True

4592 self.skipWhitespace = True

4593 self.initExprGroups = True

4594 self.saveAsList = True

4595

4596 def __iand__(self, other):

4597 if isinstance(other, str_type):

4598 other = self._literalStringClass(other)

4599 if not isinstance(other, ParserElement):

4600 return NotImplemented

4601 return self.append(other) # Each([self, other])

4602

4603 def streamline(self) -> ParserElement:

4604 super().streamline()

4605 if self.exprs:

4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4607 else:

4608 self._may_return_empty = True

4609 return self

4610

4611 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4612 if self.initExprGroups:

4613 self.opt1map = dict(

4614 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4615 )

4616 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4617 opt2 = [

4618 e

4619 for e in self.exprs

4620 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4621 ]

4622 self.optionals = opt1 + opt2

4623 self.multioptionals = [

4624 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4625 for e in self.exprs

4626 if isinstance(e, _MultipleMatch)

4627 ]

4628 self.multirequired = [

4629 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4630 for e in self.exprs

4631 if isinstance(e, OneOrMore)

4632 ]

4633 self.required = [

4634 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4635 ]

4636 self.required += self.multirequired

4637 self.initExprGroups = False

4638

4639 tmpLoc = loc

4640 tmpReqd = self.required[:]

4641 tmpOpt = self.optionals[:]

4642 multis = self.multioptionals[:]

4643 matchOrder: list[ParserElement] = []

4644

4645 keepMatching = True

4646 failed: list[ParserElement] = []

4647 fatals: list[ParseFatalException] = []

4648 while keepMatching:

4649 tmpExprs = tmpReqd + tmpOpt + multis

4650 failed.clear()

4651 fatals.clear()

4652 for e in tmpExprs:

4653 try:

4654 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4655 except ParseFatalException as pfe:

4656 pfe.__traceback__ = None

4657 pfe.parser_element = e

4658 fatals.append(pfe)

4659 failed.append(e)

4660 except ParseException:

4661 failed.append(e)

4662 else:

4663 matchOrder.append(self.opt1map.get(id(e), e))

4664 if e in tmpReqd:

4665 tmpReqd.remove(e)

4666 elif e in tmpOpt:

4667 tmpOpt.remove(e)

4668 if len(failed) == len(tmpExprs):

4669 keepMatching = False

4670

4671 # look for any ParseFatalExceptions

4672 if fatals:

4673 if len(fatals) > 1:

4674 fatals.sort(key=lambda e: -e.loc)

4675 if fatals[0].loc == fatals[1].loc:

4676 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4677 max_fatal = fatals[0]

4678 raise max_fatal

4679

4680 if tmpReqd:

4681 missing = ", ".join([str(e) for e in tmpReqd])

4682 raise ParseException(

4683 instring,

4684 loc,

4685 f"Missing one or more required elements ({missing})",

4686 )

4687

4688 # add any unmatched Opts, in case they have default values defined

4689 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4690

4691 total_results = ParseResults([])

4692 for e in matchOrder:

4693 loc, results = e._parse(instring, loc, do_actions)

4694 total_results += results

4695

4696 return loc, total_results

4697

4698 def _generateDefaultName(self) -> str:

4699 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4700

4701

4702class ParseElementEnhance(ParserElement):

4703 """Abstract subclass of :class:`ParserElement`, for combining and

4704 post-processing parsed tokens.

4705 """

4706

4707 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

4708 super().__init__(savelist)

4709 if isinstance(expr, str_type):

4710 expr_str = typing.cast(str, expr)

4711 if issubclass(self._literalStringClass, Token):

4712 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4713 elif issubclass(type(self), self._literalStringClass):

4714 expr = Literal(expr_str)

4715 else:

4716 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4717 expr = typing.cast(ParserElement, expr)

4718 self.expr = expr

4719 if expr is not None:

4720 self.mayIndexError = expr.mayIndexError

4721 self._may_return_empty = expr.mayReturnEmpty

4722 self.set_whitespace_chars(

4723 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4724 )

4725 self.skipWhitespace = expr.skipWhitespace

4726 self.saveAsList = expr.saveAsList

4727 self.callPreparse = expr.callPreparse

4728 self.ignoreExprs.extend(expr.ignoreExprs)

4729

4730 def recurse(self) -> list[ParserElement]:

4731 return [self.expr] if self.expr is not None else []

4732

4733 def parseImpl(self, instring, loc, do_actions=True):

4734 if self.expr is None:

4735 raise ParseException(instring, loc, "No expression defined", self)

4736

4737 try:

4738 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

4739 except ParseSyntaxException:

4740 raise

4741 except ParseBaseException as pbe:

4742 pbe.pstr = pbe.pstr or instring

4743 pbe.loc = pbe.loc or loc

4744 pbe.parser_element = pbe.parser_element or self

4745 if not isinstance(self, Forward) and self.customName is not None:

4746 if self.errmsg:

4747 pbe.msg = self.errmsg

4748 raise

4749

4750 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4751 super().leave_whitespace(recursive)

4752

4753 if recursive:

4754 if self.expr is not None:

4755 self.expr = self.expr.copy()

4756 self.expr.leave_whitespace(recursive)

4757 return self

4758

4759 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4760 super().ignore_whitespace(recursive)

4761

4762 if recursive:

4763 if self.expr is not None:

4764 self.expr = self.expr.copy()

4765 self.expr.ignore_whitespace(recursive)

4766 return self

4767

4768 def ignore(self, other) -> ParserElement:

4769 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4770 super().ignore(other)

4771 if self.expr is not None:

4772 self.expr.ignore(self.ignoreExprs[-1])

4773

4774 return self

4775

4776 def streamline(self) -> ParserElement:

4777 super().streamline()

4778 if self.expr is not None:

4779 self.expr.streamline()

4780 return self

4781

4782 def _checkRecursion(self, parseElementList):

4783 if self in parseElementList:

4784 raise RecursiveGrammarException(parseElementList + [self])

4785 subRecCheckList = parseElementList[:] + [self]

4786 if self.expr is not None:

4787 self.expr._checkRecursion(subRecCheckList)

4788

4789 def validate(self, validateTrace=None) -> None:

4790 warnings.warn(

4791 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4792 DeprecationWarning,

4793 stacklevel=2,

4794 )

4795 if validateTrace is None:

4796 validateTrace = []

4797 tmp = validateTrace[:] + [self]

4798 if self.expr is not None:

4799 self.expr.validate(tmp)

4800 self._checkRecursion([])

4801

4802 def _generateDefaultName(self) -> str:

4803 return f"{type(self).__name__}:({self.expr})"

4804

4805 # Compatibility synonyms

4806 # fmt: off

4807 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4808 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4809 # fmt: on

4810

4811

4812class IndentedBlock(ParseElementEnhance):

4813 """

4814 Expression to match one or more expressions at a given indentation level.

4815 Useful for parsing text where structure is implied by indentation (like Python source code).

4816 """

4817

4818 class _Indent(Empty):

4819 def __init__(self, ref_col: int) -> None:

4820 super().__init__()

4821 self.errmsg = f"expected indent at column {ref_col}"

4822 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4823

4824 class _IndentGreater(Empty):

4825 def __init__(self, ref_col: int) -> None:

4826 super().__init__()

4827 self.errmsg = f"expected indent at column greater than {ref_col}"

4828 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4829

4830 def __init__(

4831 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4832 ) -> None:

4833 super().__init__(expr, savelist=True)

4834 # if recursive:

4835 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4836 self._recursive = recursive

4837 self._grouped = grouped

4838 self.parent_anchor = 1

4839

4840 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4841 # advance parse position to non-whitespace by using an Empty()

4842 # this should be the column to be used for all subsequent indented lines

4843 anchor_loc = Empty().preParse(instring, loc)

4844

4845 # see if self.expr matches at the current location - if not it will raise an exception

4846 # and no further work is necessary

4847 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

4848

4849 indent_col = col(anchor_loc, instring)

4850 peer_detect_expr = self._Indent(indent_col)

4851

4852 inner_expr = Empty() + peer_detect_expr + self.expr

4853 if self._recursive:

4854 sub_indent = self._IndentGreater(indent_col)

4855 nested_block = IndentedBlock(

4856 self.expr, recursive=self._recursive, grouped=self._grouped

4857 )

4858 nested_block.set_debug(self.debug)

4859 nested_block.parent_anchor = indent_col

4860 inner_expr += Opt(sub_indent + nested_block)

4861

4862 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4863 block = OneOrMore(inner_expr)

4864

4865 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4866

4867 if self._grouped:

4868 wrapper = Group

4869 else:

4870 wrapper = lambda expr: expr # type: ignore[misc, assignment]

4871 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4872 instring, anchor_loc, do_actions

4873 )

4874

4875

4876class AtStringStart(ParseElementEnhance):

4877 """Matches if expression matches at the beginning of the parse

4878 string::

4879

4880 AtStringStart(Word(nums)).parse_string("123")

4881 # prints ["123"]

4882

4883 AtStringStart(Word(nums)).parse_string(" 123")

4884 # raises ParseException

4885 """

4886

4887 def __init__(self, expr: Union[ParserElement, str]) -> None:

4888 super().__init__(expr)

4889 self.callPreparse = False

4890

4891 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4892 if loc != 0:

4893 raise ParseException(instring, loc, "not found at string start")

4894 return super().parseImpl(instring, loc, do_actions)

4895

4896

4897class AtLineStart(ParseElementEnhance):

4898 r"""Matches if an expression matches at the beginning of a line within

4899 the parse string

4900

4901 Example::

4902

4903 test = '''\

4904 AAA this line

4905 AAA and this line

4906 AAA but not this one

4907 B AAA and definitely not this one

4908 '''

4909

4910 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4911 print(t)

4912

4913 prints::

4914

4915 ['AAA', ' this line']

4916 ['AAA', ' and this line']

4917

4918 """

4919

4920 def __init__(self, expr: Union[ParserElement, str]) -> None:

4921 super().__init__(expr)

4922 self.callPreparse = False

4923

4924 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4925 if col(loc, instring) != 1:

4926 raise ParseException(instring, loc, "not found at line start")

4927 return super().parseImpl(instring, loc, do_actions)

4928

4929

4930class FollowedBy(ParseElementEnhance):

4931 """Lookahead matching of the given parse expression.

4932 ``FollowedBy`` does *not* advance the parsing position within

4933 the input string, it only verifies that the specified parse

4934 expression matches at the current position. ``FollowedBy``

4935 always returns a null token list. If any results names are defined

4936 in the lookahead expression, those *will* be returned for access by

4937 name.

4938

4939 Example::

4940

4941 # use FollowedBy to match a label only if it is followed by a ':'

4942 data_word = Word(alphas)

4943 label = data_word + FollowedBy(':')

4944 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4945

4946 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4947

4948 prints::

4949

4950 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4951 """

4952

4953 def __init__(self, expr: Union[ParserElement, str]) -> None:

4954 super().__init__(expr)

4955 self._may_return_empty = True

4956

4957 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4958 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4959 # we keep any named results that were defined in the FollowedBy expression

4960 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

4961 del ret[:]

4962

4963 return loc, ret

4964

4965

4966class PrecededBy(ParseElementEnhance):

4967 """Lookbehind matching of the given parse expression.

4968 ``PrecededBy`` does not advance the parsing position within the

4969 input string, it only verifies that the specified parse expression

4970 matches prior to the current position. ``PrecededBy`` always

4971 returns a null token list, but if a results name is defined on the

4972 given expression, it is returned.

4973

4974 Parameters:

4975

4976 - ``expr`` - expression that must match prior to the current parse

4977 location

4978 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4979 to lookbehind prior to the current parse location

4980

4981 If the lookbehind expression is a string, :class:`Literal`,

4982 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4983 with a specified exact or maximum length, then the retreat

4984 parameter is not required. Otherwise, retreat must be specified to

4985 give a maximum number of characters to look back from

4986 the current parse position for a lookbehind match.

4987

4988 Example::

4989

4990 # VB-style variable names with type prefixes

4991 int_var = PrecededBy("#") + pyparsing_common.identifier

4992 str_var = PrecededBy("$") + pyparsing_common.identifier

4993

4994 """

4995

4996 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

4997 super().__init__(expr)

4998 self.expr = self.expr().leave_whitespace()

4999 self._may_return_empty = True

5000 self.mayIndexError = False

5001 self.exact = False

5002 if isinstance(expr, str_type):

5003 expr = typing.cast(str, expr)

5004 retreat = len(expr)

5005 self.exact = True

5006 elif isinstance(expr, (Literal, Keyword)):

5007 retreat = expr.matchLen

5008 self.exact = True

5009 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

5010 retreat = expr.maxLen

5011 self.exact = True

5012 elif isinstance(expr, PositionToken):

5013 retreat = 0

5014 self.exact = True

5015 self.retreat = retreat

5016 self.errmsg = f"not preceded by {expr}"

5017 self.skipWhitespace = False

5018 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

5019

5020 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

5021 if self.exact:

5022 if loc < self.retreat:

5023 raise ParseException(instring, loc, self.errmsg, self)

5024 start = loc - self.retreat

5025 _, ret = self.expr._parse(instring, start)

5026 return loc, ret

5027

5028 # retreat specified a maximum lookbehind window, iterate

5029 test_expr = self.expr + StringEnd()

5030 instring_slice = instring[max(0, loc - self.retreat) : loc]

5031 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

5032

5033 for offset in range(1, min(loc, self.retreat + 1) + 1):

5034 try:

5035 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5036 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5037 except ParseBaseException as pbe:

5038 last_expr = pbe

5039 else:

5040 break

5041 else:

5042 raise last_expr

5043

5044 return loc, ret

5045

5046

5047class Located(ParseElementEnhance):

5048 """

5049 Decorates a returned token with its starting and ending

5050 locations in the input string.

5051

5052 This helper adds the following results names:

5053

5054 - ``locn_start`` - location where matched expression begins

5055 - ``locn_end`` - location where matched expression ends

5056 - ``value`` - the actual parsed results

5057

5058 Be careful if the input text contains ``<TAB>`` characters, you

5059 may want to call :class:`ParserElement.parse_with_tabs`

5060

5061 Example::

5062

5063 wd = Word(alphas)

5064 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5065 print(match)

5066

5067 prints::

5068

5069 [0, ['ljsdf'], 5]

5070 [8, ['lksdjjf'], 15]

5071 [18, ['lkkjj'], 23]

5072

5073 """

5074

5075 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5076 start = loc

5077 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5078 ret_tokens = ParseResults([start, tokens, loc])

5079 ret_tokens["locn_start"] = start

5080 ret_tokens["value"] = tokens

5081 ret_tokens["locn_end"] = loc

5082 if self.resultsName:

5083 # must return as a list, so that the name will be attached to the complete group

5084 return loc, [ret_tokens]

5085 else:

5086 return loc, ret_tokens

5087

5088

5089class NotAny(ParseElementEnhance):

5090 """

5091 Lookahead to disallow matching with the given parse expression.

5092 ``NotAny`` does *not* advance the parsing position within the

5093 input string, it only verifies that the specified parse expression

5094 does *not* match at the current position. Also, ``NotAny`` does

5095 *not* skip over leading whitespace. ``NotAny`` always returns

5096 a null token list. May be constructed using the ``'~'`` operator.

5097

5098 Example::

5099

5100 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5101

5102 # take care not to mistake keywords for identifiers

5103 ident = ~(AND | OR | NOT) + Word(alphas)

5104 boolean_term = Opt(NOT) + ident

5105

5106 # very crude boolean expression - to support parenthesis groups and

5107 # operation hierarchy, use infix_notation

5108 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5109

5110 # integers that are followed by "." are actually floats

5111 integer = Word(nums) + ~Char(".")

5112 """

5113

5114 def __init__(self, expr: Union[ParserElement, str]) -> None:

5115 super().__init__(expr)

5116 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5117 # self.leave_whitespace()

5118 self.skipWhitespace = False

5119

5120 self._may_return_empty = True

5121 self.errmsg = f"Found unwanted token, {self.expr}"

5122

5123 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5124 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5125 raise ParseException(instring, loc, self.errmsg, self)

5126 return loc, []

5127

5128 def _generateDefaultName(self) -> str:

5129 return f"~{{{self.expr}}}"

5130

5131

5132class _MultipleMatch(ParseElementEnhance):

5133 def __init__(

5134 self,

5135 expr: Union[str, ParserElement],

5136 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5137 *,

5138 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5139 ) -> None:

5140 super().__init__(expr)

5141 stopOn = stopOn or stop_on

5142 self.saveAsList = True

5143 ender = stopOn

5144 if isinstance(ender, str_type):

5145 ender = self._literalStringClass(ender)

5146 self.stopOn(ender)

5147

5148 def stopOn(self, ender) -> ParserElement:

5149 if isinstance(ender, str_type):

5150 ender = self._literalStringClass(ender)

5151 self.not_ender = ~ender if ender is not None else None

5152 return self

5153

5154 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5155 self_expr_parse = self.expr._parse

5156 self_skip_ignorables = self._skipIgnorables

5157 check_ender = False

5158 if self.not_ender is not None:

5159 try_not_ender = self.not_ender.try_parse

5160 check_ender = True

5161

5162 # must be at least one (but first see if we are the stopOn sentinel;

5163 # if so, fail)

5164 if check_ender:

5165 try_not_ender(instring, loc)

5166 loc, tokens = self_expr_parse(instring, loc, do_actions)

5167 try:

5168 hasIgnoreExprs = not not self.ignoreExprs

5169 while 1:

5170 if check_ender:

5171 try_not_ender(instring, loc)

5172 if hasIgnoreExprs:

5173 preloc = self_skip_ignorables(instring, loc)

5174 else:

5175 preloc = loc

5176 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5177 tokens += tmptokens

5178 except (ParseException, IndexError):

5179 pass

5180

5181 return loc, tokens

5182

5183 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5184 if (

5185 __diag__.warn_ungrouped_named_tokens_in_collection

5186 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5187 not in self.suppress_warnings_

5188 ):

5189 for e in [self.expr] + self.expr.recurse():

5190 if (

5191 isinstance(e, ParserElement)

5192 and e.resultsName

5193 and (

5194 Diagnostics.warn_ungrouped_named_tokens_in_collection

5195 not in e.suppress_warnings_

5196 )

5197 ):

5198 warning = (

5199 "warn_ungrouped_named_tokens_in_collection:"

5200 f" setting results name {name!r} on {type(self).__name__} expression"

5201 f" collides with {e.resultsName!r} on contained expression"

5202 )

5203 warnings.warn(warning, stacklevel=3)

5204 break

5205

5206 return super()._setResultsName(name, list_all_matches)

5207

5208

5209class OneOrMore(_MultipleMatch):

5210 """

5211 Repetition of one or more of the given expression.

5212

5213 Parameters:

5214

5215 - ``expr`` - expression that must match one or more times

5216 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5217 (only required if the sentinel would ordinarily match the repetition

5218 expression)

5219

5220 Example::

5221

5222 data_word = Word(alphas)

5223 label = data_word + FollowedBy(':')

5224 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5225

5226 text = "shape: SQUARE posn: upper left color: BLACK"

5227 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5228

5229 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5230 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5231 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5232

5233 # could also be written as

5234 (attr_expr * (1,)).parse_string(text).pprint()

5235 """

5236

5237 def _generateDefaultName(self) -> str:

5238 return f"{{{self.expr}}}..."

5239

5240

5241class ZeroOrMore(_MultipleMatch):

5242 """

5243 Optional repetition of zero or more of the given expression.

5244

5245 Parameters:

5246

5247 - ``expr`` - expression that must match zero or more times

5248 - ``stop_on`` - expression for a terminating sentinel

5249 (only required if the sentinel would ordinarily match the repetition

5250 expression) - (default= ``None``)

5251

5252 Example: similar to :class:`OneOrMore`

5253 """

5254

5255 def __init__(

5256 self,

5257 expr: Union[str, ParserElement],

5258 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5259 *,

5260 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5261 ) -> None:

5262 super().__init__(expr, stopOn=stopOn or stop_on)

5263 self._may_return_empty = True

5264

5265 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5266 try:

5267 return super().parseImpl(instring, loc, do_actions)

5268 except (ParseException, IndexError):

5269 return loc, ParseResults([], name=self.resultsName)

5270

5271 def _generateDefaultName(self) -> str:

5272 return f"[{self.expr}]..."

5273

5274

5275class DelimitedList(ParseElementEnhance):

5276 """Helper to define a delimited list of expressions - the delimiter

5277 defaults to ','. By default, the list elements and delimiters can

5278 have intervening whitespace, and comments, but this can be

5279 overridden by passing ``combine=True`` in the constructor. If

5280 ``combine`` is set to ``True``, the matching tokens are

5281 returned as a single token string, with the delimiters included;

5282 otherwise, the matching tokens are returned as a list of tokens,

5283 with the delimiters suppressed.

5284

5285 If ``allow_trailing_delim`` is set to True, then the list may end with

5286 a delimiter.

5287

5288 Example::

5289

5290 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5291 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5292

5293 .. versionadded:: 3.1.0

5294 """

5295

5296 def __init__(

5297 self,

5298 expr: Union[str, ParserElement],

5299 delim: Union[str, ParserElement] = ",",

5300 combine: bool = False,

5301 min: typing.Optional[int] = None,

5302 max: typing.Optional[int] = None,

5303 *,

5304 allow_trailing_delim: bool = False,

5305 ) -> None:

5306 if isinstance(expr, str_type):

5307 expr = ParserElement._literalStringClass(expr)

5308 expr = typing.cast(ParserElement, expr)

5309

5310 if min is not None and min < 1:

5311 raise ValueError("min must be greater than 0")

5312

5313 if max is not None and min is not None and max < min:

5314 raise ValueError("max must be greater than, or equal to min")

5315

5316 self.content = expr

5317 self.raw_delim = str(delim)

5318 self.delim = delim

5319 self.combine = combine

5320 if not combine:

5321 self.delim = Suppress(delim)

5322 self.min = min or 1

5323 self.max = max

5324 self.allow_trailing_delim = allow_trailing_delim

5325

5326 delim_list_expr = self.content + (self.delim + self.content) * (

5327 self.min - 1,

5328 None if self.max is None else self.max - 1,

5329 )

5330 if self.allow_trailing_delim:

5331 delim_list_expr += Opt(self.delim)

5332

5333 if self.combine:

5334 delim_list_expr = Combine(delim_list_expr)

5335

5336 super().__init__(delim_list_expr, savelist=True)

5337

5338 def _generateDefaultName(self) -> str:

5339 content_expr = self.content.streamline()

5340 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5341

5342

5343class _NullToken:

5344 def __bool__(self):

5345 return False

5346

5347 def __str__(self):

5348 return ""

5349

5350

5351class Opt(ParseElementEnhance):

5352 """

5353 Optional matching of the given expression.

5354

5355 Parameters:

5356

5357 - ``expr`` - expression that must match zero or more times

5358 - ``default`` (optional) - value to be returned if the optional expression is not found.

5359

5360 Example::

5361

5362 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5363 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5364 zip.run_tests('''

5365 # traditional ZIP code

5366 12345

5367

5368 # ZIP+4 form

5369 12101-0001

5370

5371 # invalid ZIP

5372 98765-

5373 ''')

5374

5375 prints::

5376

5377 # traditional ZIP code

5378 12345

5379 ['12345']

5380

5381 # ZIP+4 form

5382 12101-0001

5383 ['12101-0001']

5384

5385 # invalid ZIP

5386 98765-

5387 ^

5388 FAIL: Expected end of text (at char 5), (line:1, col:6)

5389 """

5390

5391 __optionalNotMatched = _NullToken()

5392

5393 def __init__(

5394 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5395 ) -> None:

5396 super().__init__(expr, savelist=False)

5397 self.saveAsList = self.expr.saveAsList

5398 self.defaultValue = default

5399 self._may_return_empty = True

5400

5401 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5402 self_expr = self.expr

5403 try:

5404 loc, tokens = self_expr._parse(

5405 instring, loc, do_actions, callPreParse=False

5406 )

5407 except (ParseException, IndexError):

5408 default_value = self.defaultValue

5409 if default_value is not self.__optionalNotMatched:

5410 if self_expr.resultsName:

5411 tokens = ParseResults([default_value])

5412 tokens[self_expr.resultsName] = default_value

5413 else:

5414 tokens = [default_value] # type: ignore[assignment]

5415 else:

5416 tokens = [] # type: ignore[assignment]

5417 return loc, tokens

5418

5419 def _generateDefaultName(self) -> str:

5420 inner = str(self.expr)

5421 # strip off redundant inner {}'s

5422 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5423 inner = inner[1:-1]

5424 return f"[{inner}]"

5427Optional = Opt

5430class SkipTo(ParseElementEnhance):

5431 """

5432 Token for skipping over all undefined text until the matched

5433 expression is found.

5434

5435 Parameters:

5436

5437 - ``expr`` - target expression marking the end of the data to be skipped

5438 - ``include`` - if ``True``, the target expression is also parsed

5439 (the skipped text and target expression are returned as a 2-element

5440 list) (default= ``False``).

5441 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5442 comments) that might contain false matches to the target expression

5443 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5444 included in the skipped test; if found before the target expression is found,

5445 the :class:`SkipTo` is not a match

5446

5447 Example::

5448

5449 report = '''

5450 Outstanding Issues Report - 1 Jan 2000

5451

5452 # | Severity | Description | Days Open

5453 -----+----------+-------------------------------------------+-----------

5454 101 | Critical | Intermittent system crash | 6

5455 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5456 79 | Minor | System slow when running too many reports | 47

5457 '''

5458 integer = Word(nums)

5459 SEP = Suppress('|')

5460 # use SkipTo to simply match everything up until the next SEP

5461 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5462 # - parse action will call token.strip() for each matched token, i.e., the description body

5463 string_data = SkipTo(SEP, ignore=quoted_string)

5464 string_data.set_parse_action(token_map(str.strip))

5465 ticket_expr = (integer("issue_num") + SEP

5466 + string_data("sev") + SEP

5467 + string_data("desc") + SEP

5468 + integer("days_open"))

5469

5470 for tkt in ticket_expr.search_string(report):

5471 print tkt.dump()

5472

5473 prints::

5474

5475 ['101', 'Critical', 'Intermittent system crash', '6']

5476 - days_open: '6'

5477 - desc: 'Intermittent system crash'

5478 - issue_num: '101'

5479 - sev: 'Critical'

5480 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5481 - days_open: '14'

5482 - desc: "Spelling error on Login ('log|n')"

5483 - issue_num: '94'

5484 - sev: 'Cosmetic'

5485 ['79', 'Minor', 'System slow when running too many reports', '47']

5486 - days_open: '47'

5487 - desc: 'System slow when running too many reports'

5488 - issue_num: '79'

5489 - sev: 'Minor'

5490 """

5491

5492 def __init__(

5493 self,

5494 other: Union[ParserElement, str],

5495 include: bool = False,

5496 ignore: typing.Optional[Union[ParserElement, str]] = None,

5497 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5498 *,

5499 failOn: typing.Optional[Union[ParserElement, str]] = None,

5500 ) -> None:

5501 super().__init__(other)

5502 failOn = failOn or fail_on

5503 self.ignoreExpr = ignore

5504 self._may_return_empty = True

5505 self.mayIndexError = False

5506 self.includeMatch = include

5507 self.saveAsList = False

5508 if isinstance(failOn, str_type):

5509 self.failOn = self._literalStringClass(failOn)

5510 else:

5511 self.failOn = failOn

5512 self.errmsg = f"No match found for {self.expr}"

5513 self.ignorer = Empty().leave_whitespace()

5514 self._update_ignorer()

5515

5516 def _update_ignorer(self):

5517 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5518 self.ignorer.ignoreExprs.clear()

5519 for e in self.expr.ignoreExprs:

5520 self.ignorer.ignore(e)

5521 if self.ignoreExpr:

5522 self.ignorer.ignore(self.ignoreExpr)

5523

5524 def ignore(self, expr):

5525 super().ignore(expr)

5526 self._update_ignorer()

5527

5528 def parseImpl(self, instring, loc, do_actions=True):

5529 startloc = loc

5530 instrlen = len(instring)

5531 self_expr_parse = self.expr._parse

5532 self_failOn_canParseNext = (

5533 self.failOn.canParseNext if self.failOn is not None else None

5534 )

5535 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5536

5537 tmploc = loc

5538 while tmploc <= instrlen:

5539 if self_failOn_canParseNext is not None:

5540 # break if failOn expression matches

5541 if self_failOn_canParseNext(instring, tmploc):

5542 break

5543

5544 if ignorer_try_parse is not None:

5545 # advance past ignore expressions

5546 prev_tmploc = tmploc

5547 while 1:

5548 try:

5549 tmploc = ignorer_try_parse(instring, tmploc)

5550 except ParseBaseException:

5551 break

5552 # see if all ignorers matched, but didn't actually ignore anything

5553 if tmploc == prev_tmploc:

5554 break

5555 prev_tmploc = tmploc

5556

5557 try:

5558 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5559 except (ParseException, IndexError):

5560 # no match, advance loc in string

5561 tmploc += 1

5562 else:

5563 # matched skipto expr, done

5564 break

5565

5566 else:

5567 # ran off the end of the input string without matching skipto expr, fail

5568 raise ParseException(instring, loc, self.errmsg, self)

5569

5570 # build up return values

5571 loc = tmploc

5572 skiptext = instring[startloc:loc]

5573 skipresult = ParseResults(skiptext)

5574

5575 if self.includeMatch:

5576 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5577 skipresult += mat

5578

5579 return loc, skipresult

5580

5581

5582class Forward(ParseElementEnhance):

5583 """

5584 Forward declaration of an expression to be defined later -

5585 used for recursive grammars, such as algebraic infix notation.

5586 When the expression is known, it is assigned to the ``Forward``

5587 variable using the ``'<<'`` operator.

5588

5589 Note: take care when assigning to ``Forward`` not to overlook

5590 precedence of operators.

5591

5592 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5593

5594 fwd_expr << a | b | c

5595

5596 will actually be evaluated as::

5597

5598 (fwd_expr << a) | b | c

5599

5600 thereby leaving b and c out as parseable alternatives. It is recommended that you

5601 explicitly group the values inserted into the ``Forward``::

5602

5603 fwd_expr << (a | b | c)

5604

5605 Converting to use the ``'<<='`` operator instead will avoid this problem.

5606

5607 See :class:`ParseResults.pprint` for an example of a recursive

5608 parser created using ``Forward``.

5609 """

5610

5611 def __init__(

5612 self, other: typing.Optional[Union[ParserElement, str]] = None

5613 ) -> None:

5614 self.caller_frame = traceback.extract_stack(limit=2)[0]

5615 super().__init__(other, savelist=False) # type: ignore[arg-type]

5616 self.lshift_line = None

5617

5618 def __lshift__(self, other) -> Forward:

5619 if hasattr(self, "caller_frame"):

5620 del self.caller_frame

5621 if isinstance(other, str_type):

5622 other = self._literalStringClass(other)

5623

5624 if not isinstance(other, ParserElement):

5625 return NotImplemented

5626

5627 self.expr = other

5628 self.streamlined = other.streamlined

5629 self.mayIndexError = self.expr.mayIndexError

5630 self._may_return_empty = self.expr.mayReturnEmpty

5631 self.set_whitespace_chars(

5632 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5633 )

5634 self.skipWhitespace = self.expr.skipWhitespace

5635 self.saveAsList = self.expr.saveAsList

5636 self.ignoreExprs.extend(self.expr.ignoreExprs)

5637 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5638 return self

5639

5640 def __ilshift__(self, other) -> Forward:

5641 if not isinstance(other, ParserElement):

5642 return NotImplemented

5643

5644 return self << other

5645

5646 def __or__(self, other) -> ParserElement:

5647 caller_line = traceback.extract_stack(limit=2)[-2]

5648 if (

5649 __diag__.warn_on_match_first_with_lshift_operator

5650 and caller_line == self.lshift_line

5651 and Diagnostics.warn_on_match_first_with_lshift_operator

5652 not in self.suppress_warnings_

5653 ):

5654 warnings.warn(

5655 "warn_on_match_first_with_lshift_operator:"

5656 " using '<<' operator with '|' is probably an error, use '<<='",

5657 stacklevel=2,

5658 )

5659 ret = super().__or__(other)

5660 return ret

5661

5662 def __del__(self):

5663 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5664 if (

5665 self.expr is None

5666 and __diag__.warn_on_assignment_to_Forward

5667 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5668 ):

5669 warnings.warn_explicit(

5670 "warn_on_assignment_to_Forward:"

5671 " Forward defined here but no expression attached later using '<<=' or '<<'",

5672 UserWarning,

5673 filename=self.caller_frame.filename,

5674 lineno=self.caller_frame.lineno,

5675 )

5676

5677 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5678 if (

5679 self.expr is None

5680 and __diag__.warn_on_parse_using_empty_Forward

5681 and Diagnostics.warn_on_parse_using_empty_Forward

5682 not in self.suppress_warnings_

5683 ):

5684 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5685 parse_fns = (

5686 "parse_string",

5687 "scan_string",

5688 "search_string",

5689 "transform_string",

5690 )

5691 tb = traceback.extract_stack(limit=200)

5692 for i, frm in enumerate(reversed(tb), start=1):

5693 if frm.name in parse_fns:

5694 stacklevel = i + 1

5695 break

5696 else:

5697 stacklevel = 2

5698 warnings.warn(

5699 "warn_on_parse_using_empty_Forward:"

5700 " Forward expression was never assigned a value, will not parse any input",

5701 stacklevel=stacklevel,

5702 )

5703 if not ParserElement._left_recursion_enabled:

5704 return super().parseImpl(instring, loc, do_actions)

5705 # ## Bounded Recursion algorithm ##

5706 # Recursion only needs to be processed at ``Forward`` elements, since they are

5707 # the only ones that can actually refer to themselves. The general idea is

5708 # to handle recursion stepwise: We start at no recursion, then recurse once,

5709 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5710 #

5711 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5712 # - to *match* a specific recursion level, and

5713 # - to *search* the bounded recursion level

5714 # and the two run concurrently. The *search* must *match* each recursion level

5715 # to find the best possible match. This is handled by a memo table, which

5716 # provides the previous match to the next level match attempt.

5717 #

5718 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5719 #

5720 # There is a complication since we not only *parse* but also *transform* via

5721 # actions: We do not want to run the actions too often while expanding. Thus,

5722 # we expand using `do_actions=False` and only run `do_actions=True` if the next

5723 # recursion level is acceptable.

5724 with ParserElement.recursion_lock:

5725 memo = ParserElement.recursion_memos

5726 try:

5727 # we are parsing at a specific recursion expansion - use it as-is

5728 prev_loc, prev_result = memo[loc, self, do_actions]

5729 if isinstance(prev_result, Exception):

5730 raise prev_result

5731 return prev_loc, prev_result.copy()

5732 except KeyError:

5733 act_key = (loc, self, True)

5734 peek_key = (loc, self, False)

5735 # we are searching for the best recursion expansion - keep on improving

5736 # both `do_actions` cases must be tracked separately here!

5737 prev_loc, prev_peek = memo[peek_key] = (

5738 loc - 1,

5739 ParseException(

5740 instring, loc, "Forward recursion without base case", self

5741 ),

5742 )

5743 if do_actions:

5744 memo[act_key] = memo[peek_key]

5745 while True:

5746 try:

5747 new_loc, new_peek = super().parseImpl(instring, loc, False)

5748 except ParseException:

5749 # we failed before getting any match - do not hide the error

5750 if isinstance(prev_peek, Exception):

5751 raise

5752 new_loc, new_peek = prev_loc, prev_peek

5753 # the match did not get better: we are done

5754 if new_loc <= prev_loc:

5755 if do_actions:

5756 # replace the match for do_actions=False as well,

5757 # in case the action did backtrack

5758 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5759 del memo[peek_key], memo[act_key]

5760 return prev_loc, copy.copy(prev_result)

5761 del memo[peek_key]

5762 return prev_loc, copy.copy(prev_peek)

5763 # the match did get better: see if we can improve further

5764 if do_actions:

5765 try:

5766 memo[act_key] = super().parseImpl(instring, loc, True)

5767 except ParseException as e:

5768 memo[peek_key] = memo[act_key] = (new_loc, e)

5769 raise

5770 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5771

5772 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5773 self.skipWhitespace = False

5774 return self

5775

5776 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5777 self.skipWhitespace = True

5778 return self

5779

5780 def streamline(self) -> ParserElement:

5781 if not self.streamlined:

5782 self.streamlined = True

5783 if self.expr is not None:

5784 self.expr.streamline()

5785 return self

5786

5787 def validate(self, validateTrace=None) -> None:

5788 warnings.warn(

5789 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5790 DeprecationWarning,

5791 stacklevel=2,

5792 )

5793 if validateTrace is None:

5794 validateTrace = []

5795

5796 if self not in validateTrace:

5797 tmp = validateTrace[:] + [self]

5798 if self.expr is not None:

5799 self.expr.validate(tmp)

5800 self._checkRecursion([])

5801

5802 def _generateDefaultName(self) -> str:

5803 # Avoid infinite recursion by setting a temporary _defaultName

5804 save_default_name = self._defaultName

5805 self._defaultName = ": ..."

5806

5807 # Use the string representation of main expression.

5808 try:

5809 if self.expr is not None:

5810 ret_string = str(self.expr)[:1000]

5811 else:

5812 ret_string = "None"

5813 except Exception:

5814 ret_string = "..."

5815

5816 self._defaultName = save_default_name

5817 return f"{type(self).__name__}: {ret_string}"

5818

5819 def copy(self) -> ParserElement:

5820 if self.expr is not None:

5821 return super().copy()

5822 else:

5823 ret = Forward()

5824 ret <<= self

5825 return ret

5826

5827 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5828 # fmt: off

5829 if (

5830 __diag__.warn_name_set_on_empty_Forward

5831 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5832 and self.expr is None

5833 ):

5834 warning = (

5835 "warn_name_set_on_empty_Forward:"

5836 f" setting results name {name!r} on {type(self).__name__} expression"

5837 " that has no contained expression"

5838 )

5839 warnings.warn(warning, stacklevel=3)

5840 # fmt: on

5841

5842 return super()._setResultsName(name, list_all_matches)

5843

5844 # Compatibility synonyms

5845 # fmt: off

5846 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5847 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5848 # fmt: on

5849

5850

5851class TokenConverter(ParseElementEnhance):

5852 """

5853 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

5854 """

5855

5856 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

5857 super().__init__(expr) # , savelist)

5858 self.saveAsList = False

5859

5860

5861class Combine(TokenConverter):

5862 """Converter to concatenate all matching tokens to a single string.

5863 By default, the matching patterns must also be contiguous in the

5864 input string; this can be disabled by specifying

5865 ``'adjacent=False'`` in the constructor.

5866

5867 Example::

5868

5869 real = Word(nums) + '.' + Word(nums)

5870 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5871 # will also erroneously match the following

5872 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5873

5874 real = Combine(Word(nums) + '.' + Word(nums))

5875 print(real.parse_string('3.1416')) # -> ['3.1416']

5876 # no match when there are internal spaces

5877 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5878 """

5879

5880 def __init__(

5881 self,

5882 expr: ParserElement,

5883 join_string: str = "",

5884 adjacent: bool = True,

5885 *,

5886 joinString: typing.Optional[str] = None,

5887 ) -> None:

5888 super().__init__(expr)

5889 joinString = joinString if joinString is not None else join_string

5890 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5891 if adjacent:

5892 self.leave_whitespace()

5893 self.adjacent = adjacent

5894 self.skipWhitespace = True

5895 self.joinString = joinString

5896 self.callPreparse = True

5897

5898 def ignore(self, other) -> ParserElement:

5899 if self.adjacent:

5900 ParserElement.ignore(self, other)

5901 else:

5902 super().ignore(other)

5903 return self

5904

5905 def postParse(self, instring, loc, tokenlist):

5906 retToks = tokenlist.copy()

5907 del retToks[:]

5908 retToks += ParseResults(

5909 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5910 )

5911

5912 if self.resultsName and retToks.haskeys():

5913 return [retToks]

5914 else:

5915 return retToks

5916

5917

5918class Group(TokenConverter):

5919 """Converter to return the matched tokens as a list - useful for

5920 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5921

5922 The optional ``aslist`` argument when set to True will return the

5923 parsed tokens as a Python list instead of a pyparsing ParseResults.

5924

5925 Example::

5926

5927 ident = Word(alphas)

5928 num = Word(nums)

5929 term = ident | num

5930 func = ident + Opt(DelimitedList(term))

5931 print(func.parse_string("fn a, b, 100"))

5932 # -> ['fn', 'a', 'b', '100']

5933

5934 func = ident + Group(Opt(DelimitedList(term)))

5935 print(func.parse_string("fn a, b, 100"))

5936 # -> ['fn', ['a', 'b', '100']]

5937 """

5938

5939 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

5940 super().__init__(expr)

5941 self.saveAsList = True

5942 self._asPythonList = aslist

5943

5944 def postParse(self, instring, loc, tokenlist):

5945 if self._asPythonList:

5946 return ParseResults.List(

5947 tokenlist.asList()

5948 if isinstance(tokenlist, ParseResults)

5949 else list(tokenlist)

5950 )

5951

5952 return [tokenlist]

5953

5954

5955class Dict(TokenConverter):

5956 """Converter to return a repetitive expression as a list, but also

5957 as a dictionary. Each element can also be referenced using the first

5958 token in the expression as its key. Useful for tabular report

5959 scraping when the first column can be used as a item key.

5960

5961 The optional ``asdict`` argument when set to True will return the

5962 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5963

5964 Example::

5965

5966 data_word = Word(alphas)

5967 label = data_word + FollowedBy(':')

5968

5969 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5970 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5971

5972 # print attributes as plain groups

5973 print(attr_expr[1, ...].parse_string(text).dump())

5974

5975 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5976 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5977 print(result.dump())

5978

5979 # access named fields as dict entries, or output as dict

5980 print(result['shape'])

5981 print(result.as_dict())

5982

5983 prints::

5984

5985 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5986 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5987 - color: 'light blue'

5988 - posn: 'upper left'

5989 - shape: 'SQUARE'

5990 - texture: 'burlap'

5991 SQUARE

5992 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5993

5994 See more examples at :class:`ParseResults` of accessing fields by results name.

5995 """

5996

5997 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

5998 super().__init__(expr)

5999 self.saveAsList = True

6000 self._asPythonDict = asdict

6001

6002 def postParse(self, instring, loc, tokenlist):

6003 for i, tok in enumerate(tokenlist):

6004 if len(tok) == 0:

6005 continue

6006

6007 ikey = tok[0]

6008 if isinstance(ikey, int):

6009 ikey = str(ikey).strip()

6010

6011 if len(tok) == 1:

6012 tokenlist[ikey] = _ParseResultsWithOffset("", i)

6013

6014 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

6015 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

6016

6017 else:

6018 try:

6019 dictvalue = tok.copy() # ParseResults(i)

6020 except Exception:

6021 exc = TypeError(

6022 "could not extract dict values from parsed results"

6023 " - Dict expression must contain Grouped expressions"

6024 )

6025 raise exc from None

6026

6027 del dictvalue[0]

6028

6029 if len(dictvalue) != 1 or (

6030 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

6031 ):

6032 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

6033 else:

6034 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

6035

6036 if self._asPythonDict:

6037 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6038

6039 return [tokenlist] if self.resultsName else tokenlist

6040

6041

6042class Suppress(TokenConverter):

6043 """Converter for ignoring the results of a parsed expression.

6044

6045 Example::

6046

6047 source = "a, b, c,d"

6048 wd = Word(alphas)

6049 wd_list1 = wd + (',' + wd)[...]

6050 print(wd_list1.parse_string(source))

6051

6052 # often, delimiters that are useful during parsing are just in the

6053 # way afterward - use Suppress to keep them out of the parsed output

6054 wd_list2 = wd + (Suppress(',') + wd)[...]

6055 print(wd_list2.parse_string(source))

6056

6057 # Skipped text (using '...') can be suppressed as well

6058 source = "lead in START relevant text END trailing text"

6059 start_marker = Keyword("START")

6060 end_marker = Keyword("END")

6061 find_body = Suppress(...) + start_marker + ... + end_marker

6062 print(find_body.parse_string(source)

6063

6064 prints::

6065

6066 ['a', ',', 'b', ',', 'c', ',', 'd']

6067 ['a', 'b', 'c', 'd']

6068 ['START', 'relevant text ', 'END']

6069

6070 (See also :class:`DelimitedList`.)

6071 """

6072

6073 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6074 if expr is ...:

6075 expr = _PendingSkip(NoMatch())

6076 super().__init__(expr)

6077

6078 def __add__(self, other) -> ParserElement:

6079 if isinstance(self.expr, _PendingSkip):

6080 return Suppress(SkipTo(other)) + other

6081

6082 return super().__add__(other)

6083

6084 def __sub__(self, other) -> ParserElement:

6085 if isinstance(self.expr, _PendingSkip):

6086 return Suppress(SkipTo(other)) - other

6087

6088 return super().__sub__(other)

6089

6090 def postParse(self, instring, loc, tokenlist):

6091 return []

6092

6093 def suppress(self) -> ParserElement:

6094 return self

6095

6096

6097# XXX: Example needs to be re-done for updated output

6098def trace_parse_action(f: ParseAction) -> ParseAction:

6099 """Decorator for debugging parse actions.

6100

6101 When the parse action is called, this decorator will print

6102 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6103 When the parse action completes, the decorator will print

6104 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6105

6106 Example::

6107

6108 wd = Word(alphas)

6109

6110 @trace_parse_action

6111 def remove_duplicate_chars(tokens):

6112 return ''.join(sorted(set(''.join(tokens))))

6113

6114 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6115 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6116

6117 prints::

6118

6119 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6120 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6121 ['dfjkls']

6122

6123 .. versionchanged:: 3.1.0

6124 Exception type added to output

6125 """

6126 f = _trim_arity(f)

6127

6128 def z(*paArgs):

6129 thisFunc = f.__name__

6130 s, l, t = paArgs[-3:]

6131 if len(paArgs) > 3:

6132 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6133 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6134 try:

6135 ret = f(*paArgs)

6136 except Exception as exc:

6137 sys.stderr.write(

6138 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6139 )

6140 raise

6141 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6142 return ret

6143

6144 z.__name__ = f.__name__

6145 return z

6146

6147

6148# convenience constants for positional expressions

6149empty = Empty().set_name("empty")

6150line_start = LineStart().set_name("line_start")

6151line_end = LineEnd().set_name("line_end")

6152string_start = StringStart().set_name("string_start")

6153string_end = StringEnd().set_name("string_end")

6154

6155_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6156 lambda s, l, t: t[0][1]

6157)

6158_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6159 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6160)

6161_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6162 lambda s, l, t: chr(int(t[0][1:], 8))

6163)

6164_singleChar = (

6165 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6166)

6167_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6168_reBracketExpr = (

6169 Literal("[")

6170 + Opt("^").set_results_name("negate")

6171 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6172 + Literal("]")

6173)

6174

6175

6176def srange(s: str) -> str:

6177 r"""Helper to easily define string ranges for use in :class:`Word`

6178 construction. Borrows syntax from regexp ``'[]'`` string range

6179 definitions::

6180

6181 srange("[0-9]") -> "0123456789"

6182 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6183 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6184

6185 The input string must be enclosed in []'s, and the returned string

6186 is the expanded character set joined into a single string. The

6187 values enclosed in the []'s may be:

6188

6189 - a single character

6190 - an escaped character with a leading backslash (such as ``\-``

6191 or ``\]``)

6192 - an escaped hex character with a leading ``'\x'``

6193 (``\x21``, which is a ``'!'`` character) (``\0x##``

6194 is also supported for backwards compatibility)

6195 - an escaped octal character with a leading ``'\0'``

6196 (``\041``, which is a ``'!'`` character)

6197 - a range of any of the above, separated by a dash (``'a-z'``,

6198 etc.)

6199 - any combination of the above (``'aeiouy'``,

6200 ``'a-zA-Z0-9_$'``, etc.)

6201 """

6202

6203 def _expanded(p):

6204 if isinstance(p, ParseResults):

6205 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6206 else:

6207 yield p

6208

6209 try:

6210 return "".join(

6211 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6212 )

6213 except Exception as e:

6214 return ""

6215

6216

6217def token_map(func, *args) -> ParseAction:

6218 """Helper to define a parse action by mapping a function to all

6219 elements of a :class:`ParseResults` list. If any additional args are passed,

6220 they are forwarded to the given function as additional arguments

6221 after the token, as in

6222 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6223 which will convert the parsed data to an integer using base 16.

6224

6225 Example (compare the last to example in :class:`ParserElement.transform_string`::

6226

6227 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6228 hex_ints.run_tests('''

6229 00 11 22 aa FF 0a 0d 1a

6230 ''')

6231

6232 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6233 upperword[1, ...].run_tests('''

6234 my kingdom for a horse

6235 ''')

6236

6237 wd = Word(alphas).set_parse_action(token_map(str.title))

6238 wd[1, ...].set_parse_action(' '.join).run_tests('''

6239 now is the winter of our discontent made glorious summer by this sun of york

6240 ''')

6241

6242 prints::

6243

6244 00 11 22 aa FF 0a 0d 1a

6245 [0, 17, 34, 170, 255, 10, 13, 26]

6246

6247 my kingdom for a horse

6248 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6249

6250 now is the winter of our discontent made glorious summer by this sun of york

6251 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6252 """

6253

6254 def pa(s, l, t):

6255 return [func(tokn, *args) for tokn in t]

6256

6257 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6258 pa.__name__ = func_name

6259

6260 return pa

6261

6262

6263def autoname_elements() -> None:

6264 """

6265 Utility to simplify mass-naming of parser elements, for

6266 generating railroad diagram with named subdiagrams.

6267 """

6268

6269 # guard against _getframe not being implemented in the current Python

6270 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6271 calling_frame = getframe_fn(1)

6272 if calling_frame is None:

6273 return

6274

6275 # find all locals in the calling frame that are ParserElements

6276 calling_frame = typing.cast(types.FrameType, calling_frame)

6277 for name, var in calling_frame.f_locals.items():

6278 # if no custom name defined, set the name to the var name

6279 if isinstance(var, ParserElement) and not var.customName:

6280 var.set_name(name)

6281

6282

6283dbl_quoted_string = Combine(

6284 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6285).set_name("string enclosed in double quotes")

6286

6287sgl_quoted_string = Combine(

6288 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6289).set_name("string enclosed in single quotes")

6290

6291quoted_string = Combine(

6292 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6293 "double quoted string"

6294 )

6295 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6296 "single quoted string"

6297 )

6298).set_name("quoted string using single or double quotes")

6299

6300# XXX: Is there some way to make this show up in API docs?

6301# .. versionadded:: 3.1.0

6302python_quoted_string = Combine(

6303 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6304 "multiline double quoted string"

6305 )

6306 ^ (

6307 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6308 ).set_name("multiline single quoted string")

6309 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6310 "double quoted string"

6311 )

6312 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6313 "single quoted string"

6314 )

6315).set_name("Python quoted string")

6316

6317unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6318

6319

6320alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6321punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6322

6323# build list of built-in expressions, for future reference if a global default value

6324# gets updated

6325_builtin_exprs: list[ParserElement] = [

6326 v for v in vars().values() if isinstance(v, ParserElement)

6327]

6328

6329# Compatibility synonyms

6330# fmt: off

6331sglQuotedString = sgl_quoted_string

6332dblQuotedString = dbl_quoted_string

6333quotedString = quoted_string

6334unicodeString = unicode_string

6335lineStart = line_start

6336lineEnd = line_end

6337stringStart = string_start

6338stringEnd = string_end

6339nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6340traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6341conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6342tokenMap = replaced_by_pep8("tokenMap", token_map)

6343# fmt: on