Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%

1855 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1856

1857 - ``exception_action`` - method to be called when expression fails to parse;

1858 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1859 """

1860 self.debugActions = self.DebugActions(

1861 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1862 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1863 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1864 )

1865 self.debug = True

1866 return self

1867

1868 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

1869 """

1870 Enable display of debugging messages while doing pattern matching.

1871 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1872 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1873

1874 Example::

1875

1876 wd = Word(alphas).set_name("alphaword")

1877 integer = Word(nums).set_name("numword")

1878 term = wd | integer

1879

1880 # turn on debugging for wd

1881 wd.set_debug()

1882

1883 term[1, ...].parse_string("abc 123 xyz 890")

1884

1885 prints::

1886

1887 Match alphaword at loc 0(1,1)

1888 Matched alphaword -> ['abc']

1889 Match alphaword at loc 3(1,4)

1890 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1891 Match alphaword at loc 7(1,8)

1892 Matched alphaword -> ['xyz']

1893 Match alphaword at loc 11(1,12)

1894 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1895 Match alphaword at loc 15(1,16)

1896 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1897

1898 The output shown is that produced by the default debug actions - custom debug actions can be

1899 specified using :class:`set_debug_actions`. Prior to attempting

1900 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1901 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1902 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1903 which makes debugging and exception messages easier to understand - for instance, the default

1904 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1905

1906 .. versionchanged:: 3.1.0

1907 ``recurse`` argument added.

1908 """

1909 if recurse:

1910 for expr in self.visit_all():

1911 expr.set_debug(flag, recurse=False)

1912 return self

1913

1914 if flag:

1915 self.set_debug_actions(

1916 _default_start_debug_action,

1917 _default_success_debug_action,

1918 _default_exception_debug_action,

1919 )

1920 else:

1921 self.debug = False

1922 return self

1923

1924 @property

1925 def default_name(self) -> str:

1926 if self._defaultName is None:

1927 self._defaultName = self._generateDefaultName()

1928 return self._defaultName

1929

1930 @abstractmethod

1931 def _generateDefaultName(self) -> str:

1932 """

1933 Child classes must define this method, which defines how the ``default_name`` is set.

1934 """

1935

1936 def set_name(self, name: typing.Optional[str]) -> ParserElement:

1937 """

1938 Define name for this expression, makes debugging and exception messages clearer. If

1939 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

1940 enable debug for this expression.

1941

1942 If `name` is None, clears any custom name for this expression, and clears the

1943 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

1944

1945 Example::

1946

1947 integer = Word(nums)

1948 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1949

1950 integer.set_name("integer")

1951 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1952

1953 .. versionchanged:: 3.1.0

1954 Accept ``None`` as the ``name`` argument.

1955 """

1956 self.customName = name # type: ignore[assignment]

1957 self.errmsg = f"Expected {str(self)}"

1958

1959 if __diag__.enable_debug_on_named_expressions:

1960 self.set_debug(name is not None)

1961

1962 return self

1963

1964 @property

1965 def name(self) -> str:

1966 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1967 return self.customName if self.customName is not None else self.default_name

1968

1969 @name.setter

1970 def name(self, new_name) -> None:

1971 self.set_name(new_name)

1972

1973 def __str__(self) -> str:

1974 return self.name

1975

1976 def __repr__(self) -> str:

1977 return str(self)

1978

1979 def streamline(self) -> ParserElement:

1980 self.streamlined = True

1981 self._defaultName = None

1982 return self

1983

1984 def recurse(self) -> list[ParserElement]:

1985 return []

1986

1987 def _checkRecursion(self, parseElementList):

1988 subRecCheckList = parseElementList[:] + [self]

1989 for e in self.recurse():

1990 e._checkRecursion(subRecCheckList)

1991

1992 def validate(self, validateTrace=None) -> None:

1993 """

1994 .. deprecated:: 3.0.0

1995 Do not use to check for left recursion.

1996

1997 Check defined expressions for valid structure, check for infinite recursive definitions.

1998

1999 """

2000 warnings.warn(

2001 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

2002 DeprecationWarning,

2003 stacklevel=2,

2004 )

2005 self._checkRecursion([])

2006

2007 def parse_file(

2008 self,

2009 file_or_filename: Union[str, Path, TextIO],

2010 encoding: str = "utf-8",

2011 parse_all: bool = False,

2012 *,

2013 parseAll: bool = False,

2014 ) -> ParseResults:

2015 """

2016 Execute the parse expression on the given file or filename.

2017 If a filename is specified (instead of a file object),

2018 the entire file is opened, read, and closed before parsing.

2019 """

2020 parseAll = parseAll or parse_all

2021 try:

2022 file_or_filename = typing.cast(TextIO, file_or_filename)

2023 file_contents = file_or_filename.read()

2024 except AttributeError:

2025 file_or_filename = typing.cast(str, file_or_filename)

2026 with open(file_or_filename, "r", encoding=encoding) as f:

2027 file_contents = f.read()

2028 try:

2029 return self.parse_string(file_contents, parseAll)

2030 except ParseBaseException as exc:

2031 if ParserElement.verbose_stacktrace:

2032 raise

2033

2034 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2035 raise exc.with_traceback(None)

2036

2037 def __eq__(self, other):

2038 if self is other:

2039 return True

2040 elif isinstance(other, str_type):

2041 return self.matches(other, parse_all=True)

2042 elif isinstance(other, ParserElement):

2043 return vars(self) == vars(other)

2044 return False

2045

2046 def __hash__(self):

2047 return id(self)

2048

2049 def matches(

2050 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

2051 ) -> bool:

2052 """

2053 Method for quick testing of a parser against a test string. Good for simple

2054 inline microtests of sub expressions while building up larger parser.

2055

2056 Parameters:

2057

2058 - ``test_string`` - to test against this expression for a match

2059 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2060

2061 Example::

2062

2063 expr = Word(nums)

2064 assert expr.matches("100")

2065 """

2066 parseAll = parseAll and parse_all

2067 try:

2068 self.parse_string(str(test_string), parse_all=parseAll)

2069 return True

2070 except ParseBaseException:

2071 return False

2072

2073 def run_tests(

2074 self,

2075 tests: Union[str, list[str]],

2076 parse_all: bool = True,

2077 comment: typing.Optional[Union[ParserElement, str]] = "#",

2078 full_dump: bool = True,

2079 print_results: bool = True,

2080 failure_tests: bool = False,

2081 post_parse: typing.Optional[

2082 Callable[[str, ParseResults], typing.Optional[str]]

2083 ] = None,

2084 file: typing.Optional[TextIO] = None,

2085 with_line_numbers: bool = False,

2086 *,

2087 parseAll: bool = True,

2088 fullDump: bool = True,

2089 printResults: bool = True,

2090 failureTests: bool = False,

2091 postParse: typing.Optional[

2092 Callable[[str, ParseResults], typing.Optional[str]]

2093 ] = None,

2094 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2095 """

2096 Execute the parse expression on a series of test strings, showing each

2097 test, the parsed results or where the parse failed. Quick and easy way to

2098 run a parse expression against a list of sample strings.

2099

2100 Parameters:

2101

2102 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2103 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2104 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2105 string; pass None to disable comment filtering

2106 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2107 if False, only dump nested list

2108 - ``print_results`` - (default= ``True``) prints test output to stdout

2109 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2110 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2111 `fn(test_string, parse_results)` and returns a string to be added to the test output

2112 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2113 if None, will default to ``sys.stdout``

2114 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2115

2116 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2117 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2118 test's output

2119

2120 Example::

2121

2122 number_expr = pyparsing_common.number.copy()

2123

2124 result = number_expr.run_tests('''

2125 # unsigned integer

2126 100

2127 # negative integer

2128 -100

2129 # float with scientific notation

2130 6.02e23

2131 # integer with scientific notation

2132 1e-12

2133 ''')

2134 print("Success" if result[0] else "Failed!")

2135

2136 result = number_expr.run_tests('''

2137 # stray character

2138 100Z

2139 # missing leading digit before '.'

2140 -.100

2141 # too many '.'

2142 3.14.159

2143 ''', failure_tests=True)

2144 print("Success" if result[0] else "Failed!")

2145

2146 prints::

2147

2148 # unsigned integer

2149 100

2150 [100]

2151

2152 # negative integer

2153 -100

2154 [-100]

2155

2156 # float with scientific notation

2157 6.02e23

2158 [6.02e+23]

2159

2160 # integer with scientific notation

2161 1e-12

2162 [1e-12]

2163

2164 Success

2165

2166 # stray character

2167 100Z

2168 ^

2169 FAIL: Expected end of text (at char 3), (line:1, col:4)

2170

2171 # missing leading digit before '.'

2172 -.100

2173 ^

2174 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2175

2176 # too many '.'

2177 3.14.159

2178 ^

2179 FAIL: Expected end of text (at char 4), (line:1, col:5)

2180

2181 Success

2182

2183 Each test string must be on a single line. If you want to test a string that spans multiple

2184 lines, create a test like this::

2185

2186 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2187

2188 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2189 """

2190 from .testing import pyparsing_test

2191

2192 parseAll = parseAll and parse_all

2193 fullDump = fullDump and full_dump

2194 printResults = printResults and print_results

2195 failureTests = failureTests or failure_tests

2196 postParse = postParse or post_parse

2197 if isinstance(tests, str_type):

2198 tests = typing.cast(str, tests)

2199 line_strip = type(tests).strip

2200 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2201 comment_specified = comment is not None

2202 if comment_specified:

2203 if isinstance(comment, str_type):

2204 comment = typing.cast(str, comment)

2205 comment = Literal(comment)

2206 comment = typing.cast(ParserElement, comment)

2207 if file is None:

2208 file = sys.stdout

2209 print_ = file.write

2210

2211 result: Union[ParseResults, Exception]

2212 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2213 comments: list[str] = []

2214 success = True

2215 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2216 BOM = "\ufeff"

2217 nlstr = "\n"

2218 for t in tests:

2219 if comment_specified and comment.matches(t, False) or comments and not t:

2220 comments.append(

2221 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2222 )

2223 continue

2224 if not t:

2225 continue

2226 out = [

2227 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2228 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2229 ]

2230 comments.clear()

2231 try:

2232 # convert newline marks to actual newlines, and strip leading BOM if present

2233 t = NL.transform_string(t.lstrip(BOM))

2234 result = self.parse_string(t, parse_all=parseAll)

2235 except ParseBaseException as pe:

2236 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2237 out.append(pe.explain())

2238 out.append(f"FAIL: {fatal}{pe}")

2239 if ParserElement.verbose_stacktrace:

2240 out.extend(traceback.format_tb(pe.__traceback__))

2241 success = success and failureTests

2242 result = pe

2243 except Exception as exc:

2244 tag = "FAIL-EXCEPTION"

2245

2246 # see if this exception was raised in a parse action

2247 tb = exc.__traceback__

2248 it = iter(traceback.walk_tb(tb))

2249 for f, line in it:

2250 if (f.f_code.co_filename, line) == pa_call_line_synth:

2251 next_f = next(it)[0]

2252 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2253 break

2254

2255 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2256 if ParserElement.verbose_stacktrace:

2257 out.extend(traceback.format_tb(exc.__traceback__))

2258 success = success and failureTests

2259 result = exc

2260 else:

2261 success = success and not failureTests

2262 if postParse is not None:

2263 try:

2264 pp_value = postParse(t, result)

2265 if pp_value is not None:

2266 if isinstance(pp_value, ParseResults):

2267 out.append(pp_value.dump())

2268 else:

2269 out.append(str(pp_value))

2270 else:

2271 out.append(result.dump())

2272 except Exception as e:

2273 out.append(result.dump(full=fullDump))

2274 out.append(

2275 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2276 )

2277 else:

2278 out.append(result.dump(full=fullDump))

2279 out.append("")

2280

2281 if printResults:

2282 print_("\n".join(out))

2283

2284 allResults.append((t, result))

2285

2286 return success, allResults

2287

2288 def create_diagram(

2289 self,

2290 output_html: Union[TextIO, Path, str],

2291 vertical: int = 3,

2292 show_results_names: bool = False,

2293 show_groups: bool = False,

2294 embed: bool = False,

2295 show_hidden: bool = False,

2296 **kwargs,

2297 ) -> None:

2298 """

2299 Create a railroad diagram for the parser.

2300

2301 Parameters:

2302

2303 - ``output_html`` (str or file-like object) - output target for generated

2304 diagram HTML

2305 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2306 instead of horizontally (default=3)

2307 - ``show_results_names`` - bool flag whether diagram should show annotations for

2308 defined results names

2309 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2310 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2311 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2312 the resulting HTML in an enclosing HTML source

2313 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2314 can be used to insert custom CSS styling

2315 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2316 generated code

2317

2318 Additional diagram-formatting keyword arguments can also be included;

2319 see railroad.Diagram class.

2320

2321 .. versionchanged:: 3.1.0

2322 ``embed`` argument added.

2323 """

2324

2325 try:

2326 from .diagram import to_railroad, railroad_to_html

2327 except ImportError as ie:

2328 raise Exception(

2329 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2330 ) from ie

2331

2332 self.streamline()

2333

2334 railroad = to_railroad(

2335 self,

2336 vertical=vertical,

2337 show_results_names=show_results_names,

2338 show_groups=show_groups,

2339 show_hidden=show_hidden,

2340 diagram_kwargs=kwargs,

2341 )

2342 if not isinstance(output_html, (str, Path)):

2343 # we were passed a file-like object, just write to it

2344 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2345 return

2346

2347 with open(output_html, "w", encoding="utf-8") as diag_file:

2348 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2349

2350 # Compatibility synonyms

2351 # fmt: off

2352 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2353 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2354 "setDefaultWhitespaceChars", set_default_whitespace_chars

2355 ))

2356 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2357 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2358 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2359 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2360

2361 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2362 setBreak = replaced_by_pep8("setBreak", set_break)

2363 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2364 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2365 addCondition = replaced_by_pep8("addCondition", add_condition)

2366 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2367 tryParse = replaced_by_pep8("tryParse", try_parse)

2368 parseString = replaced_by_pep8("parseString", parse_string)

2369 scanString = replaced_by_pep8("scanString", scan_string)

2370 transformString = replaced_by_pep8("transformString", transform_string)

2371 searchString = replaced_by_pep8("searchString", search_string)

2372 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2373 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2374 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2375 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2376 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2377 setDebug = replaced_by_pep8("setDebug", set_debug)

2378 setName = replaced_by_pep8("setName", set_name)

2379 parseFile = replaced_by_pep8("parseFile", parse_file)

2380 runTests = replaced_by_pep8("runTests", run_tests)

2381 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2382 defaultName = default_name

2383 # fmt: on

2384

2385

2386class _PendingSkip(ParserElement):

2387 # internal placeholder class to hold a place were '...' is added to a parser element,

2388 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2389 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2390 super().__init__()

2391 self.anchor = expr

2392 self.must_skip = must_skip

2393

2394 def _generateDefaultName(self) -> str:

2395 return str(self.anchor + Empty()).replace("Empty", "...")

2396

2397 def __add__(self, other) -> ParserElement:

2398 skipper = SkipTo(other).set_name("...")("_skipped*")

2399 if self.must_skip:

2400

2401 def must_skip(t):

2402 if not t._skipped or t._skipped.as_list() == [""]:

2403 del t[0]

2404 t.pop("_skipped", None)

2405

2406 def show_skip(t):

2407 if t._skipped.as_list()[-1:] == [""]:

2408 t.pop("_skipped")

2409 t["_skipped"] = f"missing <{self.anchor!r}>"

2410

2411 return (

2412 self.anchor + skipper().add_parse_action(must_skip)

2413 | skipper().add_parse_action(show_skip)

2414 ) + other

2415

2416 return self.anchor + skipper + other

2417

2418 def __repr__(self):

2419 return self.defaultName

2420

2421 def parseImpl(self, *args) -> ParseImplReturnType:

2422 raise Exception(

2423 "use of `...` expression without following SkipTo target expression"

2424 )

2425

2426

2427class Token(ParserElement):

2428 """Abstract :class:`ParserElement` subclass, for defining atomic

2429 matching patterns.

2430 """

2431

2432 def __init__(self) -> None:

2433 super().__init__(savelist=False)

2434

2435 def _generateDefaultName(self) -> str:

2436 return type(self).__name__

2437

2438

2439class NoMatch(Token):

2440 """

2441 A token that will never match.

2442 """

2443

2444 def __init__(self) -> None:

2445 super().__init__()

2446 self._may_return_empty = True

2447 self.mayIndexError = False

2448 self.errmsg = "Unmatchable token"

2449

2450 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2451 raise ParseException(instring, loc, self.errmsg, self)

2452

2453

2454class Literal(Token):

2455 """

2456 Token to exactly match a specified string.

2457

2458 Example::

2459

2460 Literal('abc').parse_string('abc') # -> ['abc']

2461 Literal('abc').parse_string('abcdef') # -> ['abc']

2462 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2463

2464 For case-insensitive matching, use :class:`CaselessLiteral`.

2465

2466 For keyword matching (force word break before and after the matched string),

2467 use :class:`Keyword` or :class:`CaselessKeyword`.

2468 """

2469

2470 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2471 # Performance tuning: select a subclass with optimized parseImpl

2472 if cls is Literal:

2473 match_string = matchString or match_string

2474 if not match_string:

2475 return super().__new__(Empty)

2476 if len(match_string) == 1:

2477 return super().__new__(_SingleCharLiteral)

2478

2479 # Default behavior

2480 return super().__new__(cls)

2481

2482 # Needed to make copy.copy() work correctly if we customize __new__

2483 def __getnewargs__(self):

2484 return (self.match,)

2485

2486 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2487 super().__init__()

2488 match_string = matchString or match_string

2489 self.match = match_string

2490 self.matchLen = len(match_string)

2491 self.firstMatchChar = match_string[:1]

2492 self.errmsg = f"Expected {self.name}"

2493 self._may_return_empty = False

2494 self.mayIndexError = False

2495

2496 def _generateDefaultName(self) -> str:

2497 return repr(self.match)

2498

2499 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2500 if instring[loc] == self.firstMatchChar and instring.startswith(

2501 self.match, loc

2502 ):

2503 return loc + self.matchLen, self.match

2504 raise ParseException(instring, loc, self.errmsg, self)

2505

2506

2507class Empty(Literal):

2508 """

2509 An empty token, will always match.

2510 """

2511

2512 def __init__(self, match_string="", *, matchString="") -> None:

2513 super().__init__("")

2514 self._may_return_empty = True

2515 self.mayIndexError = False

2516

2517 def _generateDefaultName(self) -> str:

2518 return "Empty"

2519

2520 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2521 return loc, []

2522

2523

2524class _SingleCharLiteral(Literal):

2525 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2526 if instring[loc] == self.firstMatchChar:

2527 return loc + 1, self.match

2528 raise ParseException(instring, loc, self.errmsg, self)

2529

2530

2531ParserElement._literalStringClass = Literal

2532

2533

2534class Keyword(Token):

2535 """

2536 Token to exactly match a specified string as a keyword, that is,

2537 it must be immediately preceded and followed by whitespace or

2538 non-keyword characters. Compare with :class:`Literal`:

2539

2540 - ``Literal("if")`` will match the leading ``'if'`` in

2541 ``'ifAndOnlyIf'``.

2542 - ``Keyword("if")`` will not; it will only match the leading

2543 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2544

2545 Accepts two optional constructor arguments in addition to the

2546 keyword string:

2547

2548 - ``ident_chars`` is a string of characters that would be valid

2549 identifier characters, defaulting to all alphanumerics + "_" and

2550 "$"

2551 - ``caseless`` allows case-insensitive matching, default is ``False``.

2552

2553 Example::

2554

2555 Keyword("start").parse_string("start") # -> ['start']

2556 Keyword("start").parse_string("starting") # -> Exception

2557

2558 For case-insensitive matching, use :class:`CaselessKeyword`.

2559 """

2560

2561 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2562

2563 def __init__(

2564 self,

2565 match_string: str = "",

2566 ident_chars: typing.Optional[str] = None,

2567 caseless: bool = False,

2568 *,

2569 matchString: str = "",

2570 identChars: typing.Optional[str] = None,

2571 ) -> None:

2572 super().__init__()

2573 identChars = identChars or ident_chars

2574 if identChars is None:

2575 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2576 match_string = matchString or match_string

2577 self.match = match_string

2578 self.matchLen = len(match_string)

2579 self.firstMatchChar = match_string[:1]

2580 if not self.firstMatchChar:

2581 raise ValueError("null string passed to Keyword; use Empty() instead")

2582 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2583 self._may_return_empty = False

2584 self.mayIndexError = False

2585 self.caseless = caseless

2586 if caseless:

2587 self.caselessmatch = match_string.upper()

2588 identChars = identChars.upper()

2589 self.identChars = set(identChars)

2590

2591 def _generateDefaultName(self) -> str:

2592 return repr(self.match)

2593

2594 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2595 errmsg = self.errmsg or ""

2596 errloc = loc

2597 if self.caseless:

2598 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2599 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2600 if (

2601 loc >= len(instring) - self.matchLen

2602 or instring[loc + self.matchLen].upper() not in self.identChars

2603 ):

2604 return loc + self.matchLen, self.match

2605

2606 # followed by keyword char

2607 errmsg += ", was immediately followed by keyword character"

2608 errloc = loc + self.matchLen

2609 else:

2610 # preceded by keyword char

2611 errmsg += ", keyword was immediately preceded by keyword character"

2612 errloc = loc - 1

2613 # else no match just raise plain exception

2614

2615 elif (

2616 instring[loc] == self.firstMatchChar

2617 and self.matchLen == 1

2618 or instring.startswith(self.match, loc)

2619 ):

2620 if loc == 0 or instring[loc - 1] not in self.identChars:

2621 if (

2622 loc >= len(instring) - self.matchLen

2623 or instring[loc + self.matchLen] not in self.identChars

2624 ):

2625 return loc + self.matchLen, self.match

2626

2627 # followed by keyword char

2628 errmsg += ", keyword was immediately followed by keyword character"

2629 errloc = loc + self.matchLen

2630 else:

2631 # preceded by keyword char

2632 errmsg += ", keyword was immediately preceded by keyword character"

2633 errloc = loc - 1

2634 # else no match just raise plain exception

2635

2636 raise ParseException(instring, errloc, errmsg, self)

2637

2638 @staticmethod

2639 def set_default_keyword_chars(chars) -> None:

2640 """

2641 Overrides the default characters used by :class:`Keyword` expressions.

2642 """

2643 Keyword.DEFAULT_KEYWORD_CHARS = chars

2644

2645 # Compatibility synonyms

2646 setDefaultKeywordChars = staticmethod(

2647 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2648 )

2649

2650

2651class CaselessLiteral(Literal):

2652 """

2653 Token to match a specified string, ignoring case of letters.

2654 Note: the matched results will always be in the case of the given

2655 match string, NOT the case of the input text.

2656

2657 Example::

2658

2659 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2660 # -> ['CMD', 'CMD', 'CMD']

2661

2662 (Contrast with example for :class:`CaselessKeyword`.)

2663 """

2664

2665 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2666 match_string = matchString or match_string

2667 super().__init__(match_string.upper())

2668 # Preserve the defining literal.

2669 self.returnString = match_string

2670 self.errmsg = f"Expected {self.name}"

2671

2672 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2673 if instring[loc : loc + self.matchLen].upper() == self.match:

2674 return loc + self.matchLen, self.returnString

2675 raise ParseException(instring, loc, self.errmsg, self)

2676

2677

2678class CaselessKeyword(Keyword):

2679 """

2680 Caseless version of :class:`Keyword`.

2681

2682 Example::

2683

2684 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2685 # -> ['CMD', 'CMD']

2686

2687 (Contrast with example for :class:`CaselessLiteral`.)

2688 """

2689

2690 def __init__(

2691 self,

2692 match_string: str = "",

2693 ident_chars: typing.Optional[str] = None,

2694 *,

2695 matchString: str = "",

2696 identChars: typing.Optional[str] = None,

2697 ) -> None:

2698 identChars = identChars or ident_chars

2699 match_string = matchString or match_string

2700 super().__init__(match_string, identChars, caseless=True)

2701

2702

2703class CloseMatch(Token):

2704 """A variation on :class:`Literal` which matches "close" matches,

2705 that is, strings with at most 'n' mismatching characters.

2706 :class:`CloseMatch` takes parameters:

2707

2708 - ``match_string`` - string to be matched

2709 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2710 - ``max_mismatches`` - (``default=1``) maximum number of

2711 mismatches allowed to count as a match

2712

2713 The results from a successful parse will contain the matched text

2714 from the input string and the following named results:

2715

2716 - ``mismatches`` - a list of the positions within the

2717 match_string where mismatches were found

2718 - ``original`` - the original match_string used to compare

2719 against the input string

2720

2721 If ``mismatches`` is an empty list, then the match was an exact

2722 match.

2723

2724 Example::

2725

2726 patt = CloseMatch("ATCATCGAATGGA")

2727 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2728 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2729

2730 # exact match

2731 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2732

2733 # close match allowing up to 2 mismatches

2734 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2735 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2736 """

2737

2738 def __init__(

2739 self,

2740 match_string: str,

2741 max_mismatches: typing.Optional[int] = None,

2742 *,

2743 maxMismatches: int = 1,

2744 caseless=False,

2745 ) -> None:

2746 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2747 super().__init__()

2748 self.match_string = match_string

2749 self.maxMismatches = maxMismatches

2750 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2751 self.caseless = caseless

2752 self.mayIndexError = False

2753 self._may_return_empty = False

2754

2755 def _generateDefaultName(self) -> str:

2756 return f"{type(self).__name__}:{self.match_string!r}"

2757

2758 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2759 start = loc

2760 instrlen = len(instring)

2761 maxloc = start + len(self.match_string)

2762

2763 if maxloc <= instrlen:

2764 match_string = self.match_string

2765 match_stringloc = 0

2766 mismatches = []

2767 maxMismatches = self.maxMismatches

2768

2769 for match_stringloc, s_m in enumerate(

2770 zip(instring[loc:maxloc], match_string)

2771 ):

2772 src, mat = s_m

2773 if self.caseless:

2774 src, mat = src.lower(), mat.lower()

2775

2776 if src != mat:

2777 mismatches.append(match_stringloc)

2778 if len(mismatches) > maxMismatches:

2779 break

2780 else:

2781 loc = start + match_stringloc + 1

2782 results = ParseResults([instring[start:loc]])

2783 results["original"] = match_string

2784 results["mismatches"] = mismatches

2785 return loc, results

2786

2787 raise ParseException(instring, loc, self.errmsg, self)

2788

2789

2790class Word(Token):

2791 """Token for matching words composed of allowed character sets.

2792

2793 Parameters:

2794

2795 - ``init_chars`` - string of all characters that should be used to

2796 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2797 if ``body_chars`` is also specified, then this is the string of

2798 initial characters

2799 - ``body_chars`` - string of characters that

2800 can be used for matching after a matched initial character as

2801 given in ``init_chars``; if omitted, same as the initial characters

2802 (default=``None``)

2803 - ``min`` - minimum number of characters to match (default=1)

2804 - ``max`` - maximum number of characters to match (default=0)

2805 - ``exact`` - exact number of characters to match (default=0)

2806 - ``as_keyword`` - match as a keyword (default=``False``)

2807 - ``exclude_chars`` - characters that might be

2808 found in the input ``body_chars`` string but which should not be

2809 accepted for matching ;useful to define a word of all

2810 printables except for one or two characters, for instance

2811 (default=``None``)

2812

2813 :class:`srange` is useful for defining custom character set strings

2814 for defining :class:`Word` expressions, using range notation from

2815 regular expression character sets.

2816

2817 A common mistake is to use :class:`Word` to match a specific literal

2818 string, as in ``Word("Address")``. Remember that :class:`Word`

2819 uses the string argument to define *sets* of matchable characters.

2820 This expression would match "Add", "AAA", "dAred", or any other word

2821 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2822 exact literal string, use :class:`Literal` or :class:`Keyword`.

2823

2824 pyparsing includes helper strings for building Words:

2825

2826 - :class:`alphas`

2827 - :class:`nums`

2828 - :class:`alphanums`

2829 - :class:`hexnums`

2830 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2831 - accented, tilded, umlauted, etc.)

2832 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2833 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2834 - :class:`printables` (any non-whitespace character)

2835

2836 ``alphas``, ``nums``, and ``printables`` are also defined in several

2837 Unicode sets - see :class:`pyparsing_unicode`.

2838

2839 Example::

2840

2841 # a word composed of digits

2842 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2843

2844 # a word with a leading capital, and zero or more lowercase

2845 capitalized_word = Word(alphas.upper(), alphas.lower())

2846

2847 # hostnames are alphanumeric, with leading alpha, and '-'

2848 hostname = Word(alphas, alphanums + '-')

2849

2850 # roman numeral (not a strict parser, accepts invalid mix of characters)

2851 roman = Word("IVXLCDM")

2852

2853 # any string of non-whitespace characters, except for ','

2854 csv_value = Word(printables, exclude_chars=",")

2855

2856 :raises ValueError: If ``min`` and ``max`` are both specified

2857 and the test ``min <= max`` fails.

2858

2859 .. versionchanged:: 3.1.0

2860 Raises :exc:`ValueError` if ``min`` > ``max``.

2861 """

2862

2863 def __init__(

2864 self,

2865 init_chars: str = "",

2866 body_chars: typing.Optional[str] = None,

2867 min: int = 1,

2868 max: int = 0,

2869 exact: int = 0,

2870 as_keyword: bool = False,

2871 exclude_chars: typing.Optional[str] = None,

2872 *,

2873 initChars: typing.Optional[str] = None,

2874 bodyChars: typing.Optional[str] = None,

2875 asKeyword: bool = False,

2876 excludeChars: typing.Optional[str] = None,

2877 ) -> None:

2878 initChars = initChars or init_chars

2879 bodyChars = bodyChars or body_chars

2880 asKeyword = asKeyword or as_keyword

2881 excludeChars = excludeChars or exclude_chars

2882 super().__init__()

2883 if not initChars:

2884 raise ValueError(

2885 f"invalid {type(self).__name__}, initChars cannot be empty string"

2886 )

2887

2888 initChars_set = set(initChars)

2889 if excludeChars:

2890 excludeChars_set = set(excludeChars)

2891 initChars_set -= excludeChars_set

2892 if bodyChars:

2893 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2894 self.initChars = initChars_set

2895 self.initCharsOrig = "".join(sorted(initChars_set))

2896

2897 if bodyChars:

2898 self.bodyChars = set(bodyChars)

2899 self.bodyCharsOrig = "".join(sorted(bodyChars))

2900 else:

2901 self.bodyChars = initChars_set

2902 self.bodyCharsOrig = self.initCharsOrig

2903

2904 self.maxSpecified = max > 0

2905

2906 if min < 1:

2907 raise ValueError(

2908 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2909 )

2910

2911 if self.maxSpecified and min > max:

2912 raise ValueError(

2913 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2914 )

2915

2916 self.minLen = min

2917

2918 if max > 0:

2919 self.maxLen = max

2920 else:

2921 self.maxLen = _MAX_INT

2922

2923 if exact > 0:

2924 min = max = exact

2925 self.maxLen = exact

2926 self.minLen = exact

2927

2928 self.errmsg = f"Expected {self.name}"

2929 self.mayIndexError = False

2930 self.asKeyword = asKeyword

2931 if self.asKeyword:

2932 self.errmsg += " as a keyword"

2933

2934 # see if we can make a regex for this Word

2935 if " " not in (self.initChars | self.bodyChars):

2936 if len(self.initChars) == 1:

2937 re_leading_fragment = re.escape(self.initCharsOrig)

2938 else:

2939 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2940

2941 if self.bodyChars == self.initChars:

2942 if max == 0 and self.minLen == 1:

2943 repeat = "+"

2944 elif max == 1:

2945 repeat = ""

2946 else:

2947 if self.minLen != self.maxLen:

2948 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2949 else:

2950 repeat = f"{{{self.minLen}}}"

2951 self.reString = f"{re_leading_fragment}{repeat}"

2952 else:

2953 if max == 1:

2954 re_body_fragment = ""

2955 repeat = ""

2956 else:

2957 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2958 if max == 0 and self.minLen == 1:

2959 repeat = "*"

2960 elif max == 2:

2961 repeat = "?" if min <= 1 else ""

2962 else:

2963 if min != max:

2964 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2965 else:

2966 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2967

2968 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2969

2970 if self.asKeyword:

2971 self.reString = rf"\b{self.reString}\b"

2972

2973 try:

2974 self.re = re.compile(self.reString)

2975 except re.error:

2976 self.re = None # type: ignore[assignment]

2977 else:

2978 self.re_match = self.re.match

2979 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

2980

2981 def copy(self) -> Word:

2982 ret: Word = cast(Word, super().copy())

2983 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]

2984 return ret

2985

2986 def _generateDefaultName(self) -> str:

2987 def charsAsStr(s):

2988 max_repr_len = 16

2989 s = _collapse_string_to_ranges(s, re_escape=False)

2990

2991 if len(s) > max_repr_len:

2992 return s[: max_repr_len - 3] + "..."

2993

2994 return s

2995

2996 if self.initChars != self.bodyChars:

2997 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2998 else:

2999 base = f"W:({charsAsStr(self.initChars)})"

3000

3001 # add length specification

3002 if self.minLen > 1 or self.maxLen != _MAX_INT:

3003 if self.minLen == self.maxLen:

3004 if self.minLen == 1:

3005 return base[2:]

3006 else:

3007 return base + f"{{{self.minLen}}}"

3008 elif self.maxLen == _MAX_INT:

3009 return base + f"{{{self.minLen},...}}"

3010 else:

3011 return base + f"{{{self.minLen},{self.maxLen}}}"

3012 return base

3013

3014 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3015 if instring[loc] not in self.initChars:

3016 raise ParseException(instring, loc, self.errmsg, self)

3017

3018 start = loc

3019 loc += 1

3020 instrlen = len(instring)

3021 body_chars: set[str] = self.bodyChars

3022 maxloc = start + self.maxLen

3023 maxloc = min(maxloc, instrlen)

3024 while loc < maxloc and instring[loc] in body_chars:

3025 loc += 1

3026

3027 throw_exception = False

3028 if loc - start < self.minLen:

3029 throw_exception = True

3030 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

3031 throw_exception = True

3032 elif self.asKeyword and (

3033 (start > 0 and instring[start - 1] in body_chars)

3034 or (loc < instrlen and instring[loc] in body_chars)

3035 ):

3036 throw_exception = True

3037

3038 if throw_exception:

3039 raise ParseException(instring, loc, self.errmsg, self)

3040

3041 return loc, instring[start:loc]

3042

3043 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3044 result = self.re_match(instring, loc)

3045 if not result:

3046 raise ParseException(instring, loc, self.errmsg, self)

3047

3048 loc = result.end()

3049 return loc, result.group()

3050

3051

3052class Char(Word):

3053 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3054 when defining a match of any single character in a string of

3055 characters.

3056 """

3057

3058 def __init__(

3059 self,

3060 charset: str,

3061 as_keyword: bool = False,

3062 exclude_chars: typing.Optional[str] = None,

3063 *,

3064 asKeyword: bool = False,

3065 excludeChars: typing.Optional[str] = None,

3066 ) -> None:

3067 asKeyword = asKeyword or as_keyword

3068 excludeChars = excludeChars or exclude_chars

3069 super().__init__(

3070 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3071 )

3072

3073

3074class Regex(Token):

3075 r"""Token for matching strings that match a given regular

3076 expression. Defined with string specifying the regular expression in

3077 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3078 If the given regex contains named groups (defined using ``(?P<name>...)``),

3079 these will be preserved as named :class:`ParseResults`.

3080

3081 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3082 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3083 a compiled RE that was compiled using ``regex``.

3084

3085 The parameters ``pattern`` and ``flags`` are passed

3086 to the ``re.compile()`` function as-is. See the Python

3087 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3088 explanation of the acceptable patterns and flags.

3089

3090 Example::

3091

3092 realnum = Regex(r"[+-]?\d+\.\d*")

3093 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3094 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3095

3096 # named fields in a regex will be returned as named results

3097 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3098

3099 # the Regex class will accept re's compiled using the regex module

3100 import regex

3101 parser = pp.Regex(regex.compile(r'[0-9]'))

3102 """

3103

3104 def __init__(

3105 self,

3106 pattern: Any,

3107 flags: Union[re.RegexFlag, int] = 0,

3108 as_group_list: bool = False,

3109 as_match: bool = False,

3110 *,

3111 asGroupList: bool = False,

3112 asMatch: bool = False,

3113 ) -> None:

3114 super().__init__()

3115 asGroupList = asGroupList or as_group_list

3116 asMatch = asMatch or as_match

3117

3118 if isinstance(pattern, str_type):

3119 if not pattern:

3120 raise ValueError("null string passed to Regex; use Empty() instead")

3121

3122 self._re = None

3123 self._may_return_empty = None # type: ignore [assignment]

3124 self.reString = self.pattern = pattern

3125

3126 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3127 self._re = pattern

3128 self._may_return_empty = None # type: ignore [assignment]

3129 self.pattern = self.reString = pattern.pattern

3130

3131 elif callable(pattern):

3132 # defer creating this pattern until we really need it

3133 self.pattern = pattern

3134 self._may_return_empty = None # type: ignore [assignment]

3135 self._re = None

3136

3137 else:

3138 raise TypeError(

3139 "Regex may only be constructed with a string or a compiled RE object,"

3140 " or a callable that takes no arguments and returns a string or a"

3141 " compiled RE object"

3142 )

3143

3144 self.flags = flags

3145 self.errmsg = f"Expected {self.name}"

3146 self.mayIndexError = False

3147 self.asGroupList = asGroupList

3148 self.asMatch = asMatch

3149 if self.asGroupList:

3150 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3151 if self.asMatch:

3152 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3153

3154 def copy(self):

3155 ret: Regex = cast(Regex, super().copy())

3156 if self.asGroupList:

3157 ret.parseImpl = ret.parseImplAsGroupList

3158 if self.asMatch:

3159 ret.parseImpl = ret.parseImplAsMatch

3160 return ret

3161

3162 @cached_property

3163 def re(self) -> re.Pattern:

3164 if self._re:

3165 return self._re

3166

3167 if callable(self.pattern):

3168 # replace self.pattern with the string returned by calling self.pattern()

3169 self.pattern = cast(Callable[[], str], self.pattern)()

3170

3171 # see if we got a compiled RE back instead of a str - if so, we're done

3172 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3173 self._re = cast(re.Pattern[str], self.pattern)

3174 self.pattern = self.reString = self._re.pattern

3175 return self._re

3176

3177 try:

3178 self._re = re.compile(self.pattern, self.flags)

3179 except re.error:

3180 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3181 else:

3182 self._may_return_empty = self.re.match("", pos=0) is not None

3183 return self._re

3184

3185 @cached_property

3186 def re_match(self) -> Callable[[str, int], Any]:

3187 return self.re.match

3188

3189 @property

3190 def mayReturnEmpty(self):

3191 if self._may_return_empty is None:

3192 # force compile of regex pattern, to set may_return_empty flag

3193 self.re # noqa

3194 return self._may_return_empty

3195

3196 @mayReturnEmpty.setter

3197 def mayReturnEmpty(self, value):

3198 self._may_return_empty = value

3199

3200 def _generateDefaultName(self) -> str:

3201 unescaped = repr(self.pattern).replace("\\\\", "\\")

3202 return f"Re:({unescaped})"

3203

3204 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3205 # explicit check for matching past the length of the string;

3206 # this is done because the re module will not complain about

3207 # a match with `pos > len(instring)`, it will just return ""

3208 if loc > len(instring) and self.mayReturnEmpty:

3209 raise ParseException(instring, loc, self.errmsg, self)

3210

3211 result = self.re_match(instring, loc)

3212 if not result:

3213 raise ParseException(instring, loc, self.errmsg, self)

3214

3215 loc = result.end()

3216 ret = ParseResults(result.group())

3217 d = result.groupdict()

3218

3219 for k, v in d.items():

3220 ret[k] = v

3221

3222 return loc, ret

3223

3224 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3225 if loc > len(instring) and self.mayReturnEmpty:

3226 raise ParseException(instring, loc, self.errmsg, self)

3227

3228 result = self.re_match(instring, loc)

3229 if not result:

3230 raise ParseException(instring, loc, self.errmsg, self)

3231

3232 loc = result.end()

3233 ret = result.groups()

3234 return loc, ret

3235

3236 def parseImplAsMatch(self, instring, loc, do_actions=True):

3237 if loc > len(instring) and self.mayReturnEmpty:

3238 raise ParseException(instring, loc, self.errmsg, self)

3239

3240 result = self.re_match(instring, loc)

3241 if not result:

3242 raise ParseException(instring, loc, self.errmsg, self)

3243

3244 loc = result.end()

3245 ret = result

3246 return loc, ret

3247

3248 def sub(self, repl: str) -> ParserElement:

3249 r"""

3250 Return :class:`Regex` with an attached parse action to transform the parsed

3251 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3252

3253 Example::

3254

3255 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3256 print(make_html.transform_string("h1:main title:"))

3257 # prints "<h1>main title</h1>"

3258 """

3259 if self.asGroupList:

3260 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3261

3262 if self.asMatch and callable(repl):

3263 raise TypeError(

3264 "cannot use sub() with a callable with Regex(as_match=True)"

3265 )

3266

3267 if self.asMatch:

3268

3269 def pa(tokens):

3270 return tokens[0].expand(repl)

3271

3272 else:

3273

3274 def pa(tokens):

3275 return self.re.sub(repl, tokens[0])

3276

3277 return self.add_parse_action(pa)

3278

3279

3280class QuotedString(Token):

3281 r"""

3282 Token for matching strings that are delimited by quoting characters.

3283

3284 Defined with the following parameters:

3285

3286 - ``quote_char`` - string of one or more characters defining the

3287 quote delimiting string

3288 - ``esc_char`` - character to re_escape quotes, typically backslash

3289 (default= ``None``)

3290 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3291 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3292 (default= ``None``)

3293 - ``multiline`` - boolean indicating whether quotes can span

3294 multiple lines (default= ``False``)

3295 - ``unquote_results`` - boolean indicating whether the matched text

3296 should be unquoted (default= ``True``)

3297 - ``end_quote_char`` - string of one or more characters defining the

3298 end of the quote delimited string (default= ``None`` => same as

3299 quote_char)

3300 - ``convert_whitespace_escapes`` - convert escaped whitespace

3301 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3302 (default= ``True``)

3303

3304 .. caution:: ``convert_whitespace_escapes`` has no effect if

3305 ``unquote_results`` is ``False``.

3306

3307 Example::

3308

3309 qs = QuotedString('"')

3310 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3311 complex_qs = QuotedString('{{', end_quote_char='}}')

3312 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3313 sql_qs = QuotedString('"', esc_quote='""')

3314 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3315

3316 prints::

3317

3318 [['This is the quote']]

3319 [['This is the "quote"']]

3320 [['This is the quote with "embedded" quotes']]

3321 """

3322

3323 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3324

3325 def __init__(

3326 self,

3327 quote_char: str = "",

3328 esc_char: typing.Optional[str] = None,

3329 esc_quote: typing.Optional[str] = None,

3330 multiline: bool = False,

3331 unquote_results: bool = True,

3332 end_quote_char: typing.Optional[str] = None,

3333 convert_whitespace_escapes: bool = True,

3334 *,

3335 quoteChar: str = "",

3336 escChar: typing.Optional[str] = None,

3337 escQuote: typing.Optional[str] = None,

3338 unquoteResults: bool = True,

3339 endQuoteChar: typing.Optional[str] = None,

3340 convertWhitespaceEscapes: bool = True,

3341 ) -> None:

3342 super().__init__()

3343 esc_char = escChar or esc_char

3344 esc_quote = escQuote or esc_quote

3345 unquote_results = unquoteResults and unquote_results

3346 end_quote_char = endQuoteChar or end_quote_char

3347 convert_whitespace_escapes = (

3348 convertWhitespaceEscapes and convert_whitespace_escapes

3349 )

3350 quote_char = quoteChar or quote_char

3351

3352 # remove white space from quote chars

3353 quote_char = quote_char.strip()

3354 if not quote_char:

3355 raise ValueError("quote_char cannot be the empty string")

3356

3357 if end_quote_char is None:

3358 end_quote_char = quote_char

3359 else:

3360 end_quote_char = end_quote_char.strip()

3361 if not end_quote_char:

3362 raise ValueError("end_quote_char cannot be the empty string")

3363

3364 self.quote_char: str = quote_char

3365 self.quote_char_len: int = len(quote_char)

3366 self.first_quote_char: str = quote_char[0]

3367 self.end_quote_char: str = end_quote_char

3368 self.end_quote_char_len: int = len(end_quote_char)

3369 self.esc_char: str = esc_char or ""

3370 self.has_esc_char: bool = esc_char is not None

3371 self.esc_quote: str = esc_quote or ""

3372 self.unquote_results: bool = unquote_results

3373 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3374 self.multiline = multiline

3375 self.re_flags = re.RegexFlag(0)

3376

3377 # fmt: off

3378 # build up re pattern for the content between the quote delimiters

3379 inner_pattern: list[str] = []

3380

3381 if esc_quote:

3382 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3383

3384 if esc_char:

3385 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3386

3387 if len(self.end_quote_char) > 1:

3388 inner_pattern.append(

3389 "(?:"

3390 + "|".join(

3391 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3392 for i in range(len(self.end_quote_char) - 1, 0, -1)

3393 )

3394 + ")"

3395 )

3396

3397 if self.multiline:

3398 self.re_flags |= re.MULTILINE | re.DOTALL

3399 inner_pattern.append(

3400 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3401 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3402 )

3403 else:

3404 inner_pattern.append(

3405 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3406 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3407 )

3408

3409 self.pattern = "".join(

3410 [

3411 re.escape(self.quote_char),

3412 "(?:",

3413 '|'.join(inner_pattern),

3414 ")*",

3415 re.escape(self.end_quote_char),

3416 ]

3417 )

3418

3419 if self.unquote_results:

3420 if self.convert_whitespace_escapes:

3421 self.unquote_scan_re = re.compile(

3422 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3423 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3424 rf"|({re.escape(self.esc_char)}.)"

3425 rf"|(\n|.)",

3426 flags=self.re_flags,

3427 )

3428 else:

3429 self.unquote_scan_re = re.compile(

3430 rf"({re.escape(self.esc_char)}.)"

3431 rf"|(\n|.)",

3432 flags=self.re_flags

3433 )

3434 # fmt: on

3435

3436 try:

3437 self.re = re.compile(self.pattern, self.re_flags)

3438 self.reString = self.pattern

3439 self.re_match = self.re.match

3440 except re.error:

3441 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3442

3443 self.errmsg = f"Expected {self.name}"

3444 self.mayIndexError = False

3445 self._may_return_empty = True

3446

3447 def _generateDefaultName(self) -> str:

3448 if self.quote_char == self.end_quote_char and isinstance(

3449 self.quote_char, str_type

3450 ):

3451 return f"string enclosed in {self.quote_char!r}"

3452

3453 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3454

3455 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3456 # check first character of opening quote to see if that is a match

3457 # before doing the more complicated regex match

3458 result = (

3459 instring[loc] == self.first_quote_char

3460 and self.re_match(instring, loc)

3461 or None

3462 )

3463 if not result:

3464 raise ParseException(instring, loc, self.errmsg, self)

3465

3466 # get ending loc and matched string from regex matching result

3467 loc = result.end()

3468 ret = result.group()

3469

3470 def convert_escaped_numerics(s: str) -> str:

3471 if s == "0":

3472 return "\0"

3473 if s.isdigit() and len(s) == 3:

3474 return chr(int(s, base=8))

3475 elif s.startswith(("u", "x")):

3476 return chr(int(s[1:], base=16))

3477 else:

3478 return s

3479

3480 if self.unquote_results:

3481 # strip off quotes

3482 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3483

3484 if isinstance(ret, str_type):

3485 # fmt: off

3486 if self.convert_whitespace_escapes:

3487 # as we iterate over matches in the input string,

3488 # collect from whichever match group of the unquote_scan_re

3489 # regex matches (only 1 group will match at any given time)

3490 ret = "".join(

3491 # match group 1 matches \t, \n, etc.

3492 self.ws_map[match.group(1)] if match.group(1)

3493 # match group 2 matches escaped octal, null, hex, and Unicode

3494 # sequences

3495 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3496 # match group 3 matches escaped characters

3497 else match.group(3)[-1] if match.group(3)

3498 # match group 4 matches any character

3499 else match.group(4)

3500 for match in self.unquote_scan_re.finditer(ret)

3501 )

3502 else:

3503 ret = "".join(

3504 # match group 1 matches escaped characters

3505 match.group(1)[-1] if match.group(1)

3506 # match group 2 matches any character

3507 else match.group(2)

3508 for match in self.unquote_scan_re.finditer(ret)

3509 )

3510 # fmt: on

3511

3512 # replace escaped quotes

3513 if self.esc_quote:

3514 ret = ret.replace(self.esc_quote, self.end_quote_char)

3515

3516 return loc, ret

3517

3518

3519class CharsNotIn(Token):

3520 """Token for matching words composed of characters *not* in a given

3521 set (will include whitespace in matched characters if not listed in

3522 the provided exclusion set - see example). Defined with string

3523 containing all disallowed characters, and an optional minimum,

3524 maximum, and/or exact length. The default value for ``min`` is

3525 1 (a minimum value < 1 is not valid); the default values for

3526 ``max`` and ``exact`` are 0, meaning no maximum or exact

3527 length restriction.

3528

3529 Example::

3530

3531 # define a comma-separated-value as anything that is not a ','

3532 csv_value = CharsNotIn(',')

3533 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3534

3535 prints::

3536

3537 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3538 """

3539

3540 def __init__(

3541 self,

3542 not_chars: str = "",

3543 min: int = 1,

3544 max: int = 0,

3545 exact: int = 0,

3546 *,

3547 notChars: str = "",

3548 ) -> None:

3549 super().__init__()

3550 self.skipWhitespace = False

3551 self.notChars = not_chars or notChars

3552 self.notCharsSet = set(self.notChars)

3553

3554 if min < 1:

3555 raise ValueError(

3556 "cannot specify a minimum length < 1; use"

3557 " Opt(CharsNotIn()) if zero-length char group is permitted"

3558 )

3559

3560 self.minLen = min

3561

3562 if max > 0:

3563 self.maxLen = max

3564 else:

3565 self.maxLen = _MAX_INT

3566

3567 if exact > 0:

3568 self.maxLen = exact

3569 self.minLen = exact

3570

3571 self.errmsg = f"Expected {self.name}"

3572 self._may_return_empty = self.minLen == 0

3573 self.mayIndexError = False

3574

3575 def _generateDefaultName(self) -> str:

3576 not_chars_str = _collapse_string_to_ranges(self.notChars)

3577 if len(not_chars_str) > 16:

3578 return f"!W:({self.notChars[: 16 - 3]}...)"

3579 else:

3580 return f"!W:({self.notChars})"

3581

3582 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3583 notchars = self.notCharsSet

3584 if instring[loc] in notchars:

3585 raise ParseException(instring, loc, self.errmsg, self)

3586

3587 start = loc

3588 loc += 1

3589 maxlen = min(start + self.maxLen, len(instring))

3590 while loc < maxlen and instring[loc] not in notchars:

3591 loc += 1

3592

3593 if loc - start < self.minLen:

3594 raise ParseException(instring, loc, self.errmsg, self)

3595

3596 return loc, instring[start:loc]

3597

3598

3599class White(Token):

3600 """Special matching class for matching whitespace. Normally,

3601 whitespace is ignored by pyparsing grammars. This class is included

3602 when some whitespace structures are significant. Define with

3603 a string containing the whitespace characters to be matched; default

3604 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3605 ``max``, and ``exact`` arguments, as defined for the

3606 :class:`Word` class.

3607 """

3608

3609 whiteStrs = {

3610 " ": "<SP>",

3611 "\t": "<TAB>",

3612 "\n": "<LF>",

3613 "\r": "<CR>",

3614 "\f": "<FF>",

3615 "\u00A0": "<NBSP>",

3616 "\u1680": "<OGHAM_SPACE_MARK>",

3617 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3618 "\u2000": "<EN_QUAD>",

3619 "\u2001": "<EM_QUAD>",

3620 "\u2002": "<EN_SPACE>",

3621 "\u2003": "<EM_SPACE>",

3622 "\u2004": "<THREE-PER-EM_SPACE>",

3623 "\u2005": "<FOUR-PER-EM_SPACE>",

3624 "\u2006": "<SIX-PER-EM_SPACE>",

3625 "\u2007": "<FIGURE_SPACE>",

3626 "\u2008": "<PUNCTUATION_SPACE>",

3627 "\u2009": "<THIN_SPACE>",

3628 "\u200A": "<HAIR_SPACE>",

3629 "\u200B": "<ZERO_WIDTH_SPACE>",

3630 "\u202F": "<NNBSP>",

3631 "\u205F": "<MMSP>",

3632 "\u3000": "<IDEOGRAPHIC_SPACE>",

3633 }

3634

3635 def __init__(

3636 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3637 ) -> None:

3638 super().__init__()

3639 self.matchWhite = ws

3640 self.set_whitespace_chars(

3641 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3642 copy_defaults=True,

3643 )

3644 # self.leave_whitespace()

3645 self._may_return_empty = True

3646 self.errmsg = f"Expected {self.name}"

3647

3648 self.minLen = min

3649

3650 if max > 0:

3651 self.maxLen = max

3652 else:

3653 self.maxLen = _MAX_INT

3654

3655 if exact > 0:

3656 self.maxLen = exact

3657 self.minLen = exact

3658

3659 def _generateDefaultName(self) -> str:

3660 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3661

3662 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3663 if instring[loc] not in self.matchWhite:

3664 raise ParseException(instring, loc, self.errmsg, self)

3665 start = loc

3666 loc += 1

3667 maxloc = start + self.maxLen

3668 maxloc = min(maxloc, len(instring))

3669 while loc < maxloc and instring[loc] in self.matchWhite:

3670 loc += 1

3671

3672 if loc - start < self.minLen:

3673 raise ParseException(instring, loc, self.errmsg, self)

3674

3675 return loc, instring[start:loc]

3676

3677

3678class PositionToken(Token):

3679 def __init__(self) -> None:

3680 super().__init__()

3681 self._may_return_empty = True

3682 self.mayIndexError = False

3683

3684

3685class GoToColumn(PositionToken):

3686 """Token to advance to a specific column of input text; useful for

3687 tabular report scraping.

3688 """

3689

3690 def __init__(self, colno: int) -> None:

3691 super().__init__()

3692 self.col = colno

3693

3694 def preParse(self, instring: str, loc: int) -> int:

3695 if col(loc, instring) == self.col:

3696 return loc

3697

3698 instrlen = len(instring)

3699 if self.ignoreExprs:

3700 loc = self._skipIgnorables(instring, loc)

3701 while (

3702 loc < instrlen

3703 and instring[loc].isspace()

3704 and col(loc, instring) != self.col

3705 ):

3706 loc += 1

3707

3708 return loc

3709

3710 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3711 thiscol = col(loc, instring)

3712 if thiscol > self.col:

3713 raise ParseException(instring, loc, "Text not in expected column", self)

3714 newloc = loc + self.col - thiscol

3715 ret = instring[loc:newloc]

3716 return newloc, ret

3717

3718

3719class LineStart(PositionToken):

3720 r"""Matches if current position is at the beginning of a line within

3721 the parse string

3722

3723 Example::

3724

3725 test = '''\

3726 AAA this line

3727 AAA and this line

3728 AAA but not this one

3729 B AAA and definitely not this one

3730 '''

3731

3732 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3733 print(t)

3734

3735 prints::

3736

3737 ['AAA', ' this line']

3738 ['AAA', ' and this line']

3739

3740 """

3741

3742 def __init__(self) -> None:

3743 super().__init__()

3744 self.leave_whitespace()

3745 self.orig_whiteChars = set() | self.whiteChars

3746 self.whiteChars.discard("\n")

3747 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3748 self.set_name("start of line")

3749

3750 def preParse(self, instring: str, loc: int) -> int:

3751 if loc == 0:

3752 return loc

3753

3754 ret = self.skipper.preParse(instring, loc)

3755

3756 if "\n" in self.orig_whiteChars:

3757 while instring[ret : ret + 1] == "\n":

3758 ret = self.skipper.preParse(instring, ret + 1)

3759

3760 return ret

3761

3762 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3763 if col(loc, instring) == 1:

3764 return loc, []

3765 raise ParseException(instring, loc, self.errmsg, self)

3766

3767

3768class LineEnd(PositionToken):

3769 """Matches if current position is at the end of a line within the

3770 parse string

3771 """

3772

3773 def __init__(self) -> None:

3774 super().__init__()

3775 self.whiteChars.discard("\n")

3776 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3777 self.set_name("end of line")

3778

3779 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3780 if loc < len(instring):

3781 if instring[loc] == "\n":

3782 return loc + 1, "\n"

3783 else:

3784 raise ParseException(instring, loc, self.errmsg, self)

3785 elif loc == len(instring):

3786 return loc + 1, []

3787 else:

3788 raise ParseException(instring, loc, self.errmsg, self)

3789

3790

3791class StringStart(PositionToken):

3792 """Matches if current position is at the beginning of the parse

3793 string

3794 """

3795

3796 def __init__(self) -> None:

3797 super().__init__()

3798 self.set_name("start of text")

3799

3800 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3801 # see if entire string up to here is just whitespace and ignoreables

3802 if loc != 0 and loc != self.preParse(instring, 0):

3803 raise ParseException(instring, loc, self.errmsg, self)

3804

3805 return loc, []

3806

3807

3808class StringEnd(PositionToken):

3809 """

3810 Matches if current position is at the end of the parse string

3811 """

3812

3813 def __init__(self) -> None:

3814 super().__init__()

3815 self.set_name("end of text")

3816

3817 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3818 if loc < len(instring):

3819 raise ParseException(instring, loc, self.errmsg, self)

3820 if loc == len(instring):

3821 return loc + 1, []

3822 if loc > len(instring):

3823 return loc, []

3824

3825 raise ParseException(instring, loc, self.errmsg, self)

3826

3827

3828class WordStart(PositionToken):

3829 """Matches if the current position is at the beginning of a

3830 :class:`Word`, and is not preceded by any character in a given

3831 set of ``word_chars`` (default= ``printables``). To emulate the

3832 ``\b`` behavior of regular expressions, use

3833 ``WordStart(alphanums)``. ``WordStart`` will also match at

3834 the beginning of the string being parsed, or at the beginning of

3835 a line.

3836 """

3837

3838 def __init__(

3839 self, word_chars: str = printables, *, wordChars: str = printables

3840 ) -> None:

3841 wordChars = word_chars if wordChars == printables else wordChars

3842 super().__init__()

3843 self.wordChars = set(wordChars)

3844 self.set_name("start of a word")

3845

3846 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3847 if loc != 0:

3848 if (

3849 instring[loc - 1] in self.wordChars

3850 or instring[loc] not in self.wordChars

3851 ):

3852 raise ParseException(instring, loc, self.errmsg, self)

3853 return loc, []

3854

3855

3856class WordEnd(PositionToken):

3857 """Matches if the current position is at the end of a :class:`Word`,

3858 and is not followed by any character in a given set of ``word_chars``

3859 (default= ``printables``). To emulate the ``\b`` behavior of

3860 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3861 will also match at the end of the string being parsed, or at the end

3862 of a line.

3863 """

3864

3865 def __init__(

3866 self, word_chars: str = printables, *, wordChars: str = printables

3867 ) -> None:

3868 wordChars = word_chars if wordChars == printables else wordChars

3869 super().__init__()

3870 self.wordChars = set(wordChars)

3871 self.skipWhitespace = False

3872 self.set_name("end of a word")

3873

3874 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3875 instrlen = len(instring)

3876 if instrlen > 0 and loc < instrlen:

3877 if (

3878 instring[loc] in self.wordChars

3879 or instring[loc - 1] not in self.wordChars

3880 ):

3881 raise ParseException(instring, loc, self.errmsg, self)

3882 return loc, []

3883

3884

3885class Tag(Token):

3886 """

3887 A meta-element for inserting a named result into the parsed

3888 tokens that may be checked later in a parse action or while

3889 processing the parsed results. Accepts an optional tag value,

3890 defaulting to `True`.

3891

3892 Example::

3893

3894 end_punc = "." | ("!" + Tag("enthusiastic"))

3895 greeting = "Hello," + Word(alphas) + end_punc

3896

3897 result = greeting.parse_string("Hello, World.")

3898 print(result.dump())

3899

3900 result = greeting.parse_string("Hello, World!")

3901 print(result.dump())

3902

3903 prints::

3904

3905 ['Hello,', 'World', '.']

3906

3907 ['Hello,', 'World', '!']

3908 - enthusiastic: True

3909

3910 .. versionadded:: 3.1.0

3911 """

3912

3913 def __init__(self, tag_name: str, value: Any = True) -> None:

3914 super().__init__()

3915 self._may_return_empty = True

3916 self.mayIndexError = False

3917 self.leave_whitespace()

3918 self.tag_name = tag_name

3919 self.tag_value = value

3920 self.add_parse_action(self._add_tag)

3921 self.show_in_diagram = False

3922

3923 def _add_tag(self, tokens: ParseResults):

3924 tokens[self.tag_name] = self.tag_value

3925

3926 def _generateDefaultName(self) -> str:

3927 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

3928

3929

3930class ParseExpression(ParserElement):

3931 """Abstract subclass of ParserElement, for combining and

3932 post-processing parsed tokens.

3933 """

3934

3935 def __init__(

3936 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

3937 ) -> None:

3938 super().__init__(savelist)

3939 self.exprs: list[ParserElement]

3940 if isinstance(exprs, _generatorType):

3941 exprs = list(exprs)

3942

3943 if isinstance(exprs, str_type):

3944 self.exprs = [self._literalStringClass(exprs)]

3945 elif isinstance(exprs, ParserElement):

3946 self.exprs = [exprs]

3947 elif isinstance(exprs, Iterable):

3948 exprs = list(exprs)

3949 # if sequence of strings provided, wrap with Literal

3950 if any(isinstance(expr, str_type) for expr in exprs):

3951 exprs = (

3952 self._literalStringClass(e) if isinstance(e, str_type) else e

3953 for e in exprs

3954 )

3955 self.exprs = list(exprs)

3956 else:

3957 try:

3958 self.exprs = list(exprs)

3959 except TypeError:

3960 self.exprs = [exprs]

3961 self.callPreparse = False

3962

3963 def recurse(self) -> list[ParserElement]:

3964 return self.exprs[:]

3965

3966 def append(self, other) -> ParserElement:

3967 self.exprs.append(other)

3968 self._defaultName = None

3969 return self

3970

3971 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3972 """

3973 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3974 all contained expressions.

3975 """

3976 super().leave_whitespace(recursive)

3977

3978 if recursive:

3979 self.exprs = [e.copy() for e in self.exprs]

3980 for e in self.exprs:

3981 e.leave_whitespace(recursive)

3982 return self

3983

3984 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3985 """

3986 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3987 all contained expressions.

3988 """

3989 super().ignore_whitespace(recursive)

3990 if recursive:

3991 self.exprs = [e.copy() for e in self.exprs]

3992 for e in self.exprs:

3993 e.ignore_whitespace(recursive)

3994 return self

3995

3996 def ignore(self, other) -> ParserElement:

3997 if isinstance(other, Suppress):

3998 if other not in self.ignoreExprs:

3999 super().ignore(other)

4000 for e in self.exprs:

4001 e.ignore(self.ignoreExprs[-1])

4002 else:

4003 super().ignore(other)

4004 for e in self.exprs:

4005 e.ignore(self.ignoreExprs[-1])

4006 return self

4007

4008 def _generateDefaultName(self) -> str:

4009 return f"{type(self).__name__}:({self.exprs})"

4010

4011 def streamline(self) -> ParserElement:

4012 if self.streamlined:

4013 return self

4014

4015 super().streamline()

4016

4017 for e in self.exprs:

4018 e.streamline()

4019

4020 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

4021 # but only if there are no parse actions or resultsNames on the nested And's

4022 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

4023 if len(self.exprs) == 2:

4024 other = self.exprs[0]

4025 if (

4026 isinstance(other, self.__class__)

4027 and not other.parseAction

4028 and other.resultsName is None

4029 and not other.debug

4030 ):

4031 self.exprs = other.exprs[:] + [self.exprs[1]]

4032 self._defaultName = None

4033 self._may_return_empty |= other.mayReturnEmpty

4034 self.mayIndexError |= other.mayIndexError

4035

4036 other = self.exprs[-1]

4037 if (

4038 isinstance(other, self.__class__)

4039 and not other.parseAction

4040 and other.resultsName is None

4041 and not other.debug

4042 ):

4043 self.exprs = self.exprs[:-1] + other.exprs[:]

4044 self._defaultName = None

4045 self._may_return_empty |= other.mayReturnEmpty

4046 self.mayIndexError |= other.mayIndexError

4047

4048 self.errmsg = f"Expected {self}"

4049

4050 return self

4051

4052 def validate(self, validateTrace=None) -> None:

4053 warnings.warn(

4054 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4055 DeprecationWarning,

4056 stacklevel=2,

4057 )

4058 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4059 for e in self.exprs:

4060 e.validate(tmp)

4061 self._checkRecursion([])

4062

4063 def copy(self) -> ParserElement:

4064 ret = super().copy()

4065 ret = typing.cast(ParseExpression, ret)

4066 ret.exprs = [e.copy() for e in self.exprs]

4067 return ret

4068

4069 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4070 if not (

4071 __diag__.warn_ungrouped_named_tokens_in_collection

4072 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4073 not in self.suppress_warnings_

4074 ):

4075 return super()._setResultsName(name, list_all_matches)

4076

4077 for e in self.exprs:

4078 if (

4079 isinstance(e, ParserElement)

4080 and e.resultsName

4081 and (

4082 Diagnostics.warn_ungrouped_named_tokens_in_collection

4083 not in e.suppress_warnings_

4084 )

4085 ):

4086 warning = (

4087 "warn_ungrouped_named_tokens_in_collection:"

4088 f" setting results name {name!r} on {type(self).__name__} expression"

4089 f" collides with {e.resultsName!r} on contained expression"

4090 )

4091 warnings.warn(warning, stacklevel=3)

4092 break

4093

4094 return super()._setResultsName(name, list_all_matches)

4095

4096 # Compatibility synonyms

4097 # fmt: off

4098 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4099 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4100 # fmt: on

4101

4102

4103class And(ParseExpression):

4104 """

4105 Requires all given :class:`ParserElement` s to be found in the given order.

4106 Expressions may be separated by whitespace.

4107 May be constructed using the ``'+'`` operator.

4108 May also be constructed using the ``'-'`` operator, which will

4109 suppress backtracking.

4110

4111 Example::

4112

4113 integer = Word(nums)

4114 name_expr = Word(alphas)[1, ...]

4115

4116 expr = And([integer("id"), name_expr("name"), integer("age")])

4117 # more easily written as:

4118 expr = integer("id") + name_expr("name") + integer("age")

4119 """

4120

4121 class _ErrorStop(Empty):

4122 def __init__(self, *args, **kwargs) -> None:

4123 super().__init__(*args, **kwargs)

4124 self.leave_whitespace()

4125

4126 def _generateDefaultName(self) -> str:

4127 return "-"

4128

4129 def __init__(

4130 self,

4131 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4132 savelist: bool = True,

4133 ) -> None:

4134 # instantiate exprs as a list, converting strs to ParserElements

4135 exprs: list[ParserElement] = [

4136 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4137 ]

4138

4139 # convert any Ellipsis elements to SkipTo

4140 if Ellipsis in exprs:

4141

4142 # Ellipsis cannot be the last element

4143 if exprs[-1] is Ellipsis:

4144 raise Exception("cannot construct And with sequence ending in ...")

4145

4146 tmp: list[ParserElement] = []

4147 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4148 if cur_expr is Ellipsis:

4149 tmp.append(SkipTo(next_expr)("_skipped*"))

4150 else:

4151 tmp.append(cur_expr)

4152

4153 exprs[:-1] = tmp

4154

4155 super().__init__(exprs, savelist)

4156 if self.exprs:

4157 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4158 if not isinstance(self.exprs[0], White):

4159 self.set_whitespace_chars(

4160 self.exprs[0].whiteChars,

4161 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4162 )

4163 self.skipWhitespace = self.exprs[0].skipWhitespace

4164 else:

4165 self.skipWhitespace = False

4166 else:

4167 self._may_return_empty = True

4168 self.callPreparse = True

4169

4170 def streamline(self) -> ParserElement:

4171 # collapse any _PendingSkip's

4172 if self.exprs and any(

4173 isinstance(e, ParseExpression)

4174 and e.exprs

4175 and isinstance(e.exprs[-1], _PendingSkip)

4176 for e in self.exprs[:-1]

4177 ):

4178 deleted_expr_marker = NoMatch()

4179 for i, e in enumerate(self.exprs[:-1]):

4180 if e is deleted_expr_marker:

4181 continue

4182 if (

4183 isinstance(e, ParseExpression)

4184 and e.exprs

4185 and isinstance(e.exprs[-1], _PendingSkip)

4186 ):

4187 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4188 self.exprs[i + 1] = deleted_expr_marker

4189 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4190

4191 super().streamline()

4192

4193 # link any IndentedBlocks to the prior expression

4194 prev: ParserElement

4195 cur: ParserElement

4196 for prev, cur in zip(self.exprs, self.exprs[1:]):

4197 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4198 # (but watch out for recursive grammar)

4199 seen = set()

4200 while True:

4201 if id(cur) in seen:

4202 break

4203 seen.add(id(cur))

4204 if isinstance(cur, IndentedBlock):

4205 prev.add_parse_action(

4206 lambda s, l, t, cur_=cur: setattr(

4207 cur_, "parent_anchor", col(l, s)

4208 )

4209 )

4210 break

4211 subs = cur.recurse()

4212 next_first = next(iter(subs), None)

4213 if next_first is None:

4214 break

4215 cur = typing.cast(ParserElement, next_first)

4216

4217 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4218 return self

4219

4220 def parseImpl(self, instring, loc, do_actions=True):

4221 # pass False as callPreParse arg to _parse for first element, since we already

4222 # pre-parsed the string as part of our And pre-parsing

4223 loc, resultlist = self.exprs[0]._parse(

4224 instring, loc, do_actions, callPreParse=False

4225 )

4226 errorStop = False

4227 for e in self.exprs[1:]:

4228 # if isinstance(e, And._ErrorStop):

4229 if type(e) is And._ErrorStop:

4230 errorStop = True

4231 continue

4232 if errorStop:

4233 try:

4234 loc, exprtokens = e._parse(instring, loc, do_actions)

4235 except ParseSyntaxException:

4236 raise

4237 except ParseBaseException as pe:

4238 pe.__traceback__ = None

4239 raise ParseSyntaxException._from_exception(pe)

4240 except IndexError:

4241 raise ParseSyntaxException(

4242 instring, len(instring), self.errmsg, self

4243 )

4244 else:

4245 loc, exprtokens = e._parse(instring, loc, do_actions)

4246 resultlist += exprtokens

4247 return loc, resultlist

4248

4249 def __iadd__(self, other):

4250 if isinstance(other, str_type):

4251 other = self._literalStringClass(other)

4252 if not isinstance(other, ParserElement):

4253 return NotImplemented

4254 return self.append(other) # And([self, other])

4255

4256 def _checkRecursion(self, parseElementList):

4257 subRecCheckList = parseElementList[:] + [self]

4258 for e in self.exprs:

4259 e._checkRecursion(subRecCheckList)

4260 if not e.mayReturnEmpty:

4261 break

4262

4263 def _generateDefaultName(self) -> str:

4264 inner = " ".join(str(e) for e in self.exprs)

4265 # strip off redundant inner {}'s

4266 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4267 inner = inner[1:-1]

4268 return f"{{{inner}}}"

4269

4270

4271class Or(ParseExpression):

4272 """Requires that at least one :class:`ParserElement` is found. If

4273 two expressions match, the expression that matches the longest

4274 string will be used. May be constructed using the ``'^'``

4275 operator.

4276

4277 Example::

4278

4279 # construct Or using '^' operator

4280

4281 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4282 print(number.search_string("123 3.1416 789"))

4283

4284 prints::

4285

4286 [['123'], ['3.1416'], ['789']]

4287 """

4288

4289 def __init__(

4290 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4291 ) -> None:

4292 super().__init__(exprs, savelist)

4293 if self.exprs:

4294 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4295 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4296 else:

4297 self._may_return_empty = True

4298

4299 def streamline(self) -> ParserElement:

4300 super().streamline()

4301 if self.exprs:

4302 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4303 self.saveAsList = any(e.saveAsList for e in self.exprs)

4304 self.skipWhitespace = all(

4305 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4306 )

4307 else:

4308 self.saveAsList = False

4309 return self

4310

4311 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4312 maxExcLoc = -1

4313 maxException = None

4314 matches: list[tuple[int, ParserElement]] = []

4315 fatals: list[ParseFatalException] = []

4316 if all(e.callPreparse for e in self.exprs):

4317 loc = self.preParse(instring, loc)

4318 for e in self.exprs:

4319 try:

4320 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4321 except ParseFatalException as pfe:

4322 pfe.__traceback__ = None

4323 pfe.parser_element = e

4324 fatals.append(pfe)

4325 maxException = None

4326 maxExcLoc = -1

4327 except ParseException as err:

4328 if not fatals:

4329 err.__traceback__ = None

4330 if err.loc > maxExcLoc:

4331 maxException = err

4332 maxExcLoc = err.loc

4333 except IndexError:

4334 if len(instring) > maxExcLoc:

4335 maxException = ParseException(

4336 instring, len(instring), e.errmsg, self

4337 )

4338 maxExcLoc = len(instring)

4339 else:

4340 # save match among all matches, to retry longest to shortest

4341 matches.append((loc2, e))

4342

4343 if matches:

4344 # re-evaluate all matches in descending order of length of match, in case attached actions

4345 # might change whether or how much they match of the input.

4346 matches.sort(key=itemgetter(0), reverse=True)

4347

4348 if not do_actions:

4349 # no further conditions or parse actions to change the selection of

4350 # alternative, so the first match will be the best match

4351 best_expr = matches[0][1]

4352 return best_expr._parse(instring, loc, do_actions)

4353

4354 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4355 for loc1, expr1 in matches:

4356 if loc1 <= longest[0]:

4357 # already have a longer match than this one will deliver, we are done

4358 return longest

4359

4360 try:

4361 loc2, toks = expr1._parse(instring, loc, do_actions)

4362 except ParseException as err:

4363 err.__traceback__ = None

4364 if err.loc > maxExcLoc:

4365 maxException = err

4366 maxExcLoc = err.loc

4367 else:

4368 if loc2 >= loc1:

4369 return loc2, toks

4370 # didn't match as much as before

4371 elif loc2 > longest[0]:

4372 longest = loc2, toks

4373

4374 if longest != (-1, None):

4375 return longest

4376

4377 if fatals:

4378 if len(fatals) > 1:

4379 fatals.sort(key=lambda e: -e.loc)

4380 if fatals[0].loc == fatals[1].loc:

4381 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4382 max_fatal = fatals[0]

4383 raise max_fatal

4384

4385 if maxException is not None:

4386 # infer from this check that all alternatives failed at the current position

4387 # so emit this collective error message instead of any single error message

4388 parse_start_loc = self.preParse(instring, loc)

4389 if maxExcLoc == parse_start_loc:

4390 maxException.msg = self.errmsg or ""

4391 raise maxException

4392

4393 raise ParseException(instring, loc, "no defined alternatives to match", self)

4394

4395 def __ixor__(self, other):

4396 if isinstance(other, str_type):

4397 other = self._literalStringClass(other)

4398 if not isinstance(other, ParserElement):

4399 return NotImplemented

4400 return self.append(other) # Or([self, other])

4401

4402 def _generateDefaultName(self) -> str:

4403 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4404

4405 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4406 if (

4407 __diag__.warn_multiple_tokens_in_named_alternation

4408 and Diagnostics.warn_multiple_tokens_in_named_alternation

4409 not in self.suppress_warnings_

4410 ):

4411 if any(

4412 isinstance(e, And)

4413 and Diagnostics.warn_multiple_tokens_in_named_alternation

4414 not in e.suppress_warnings_

4415 for e in self.exprs

4416 ):

4417 warning = (

4418 "warn_multiple_tokens_in_named_alternation:"

4419 f" setting results name {name!r} on {type(self).__name__} expression"

4420 " will return a list of all parsed tokens in an And alternative,"

4421 " in prior versions only the first token was returned; enclose"

4422 " contained argument in Group"

4423 )

4424 warnings.warn(warning, stacklevel=3)

4425

4426 return super()._setResultsName(name, list_all_matches)

4427

4428

4429class MatchFirst(ParseExpression):

4430 """Requires that at least one :class:`ParserElement` is found. If

4431 more than one expression matches, the first one listed is the one that will

4432 match. May be constructed using the ``'|'`` operator.

4433

4434 Example::

4435

4436 # construct MatchFirst using '|' operator

4437

4438 # watch the order of expressions to match

4439 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4440 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4441

4442 # put more selective expression first

4443 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4444 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4445 """

4446

4447 def __init__(

4448 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4449 ) -> None:

4450 super().__init__(exprs, savelist)

4451 if self.exprs:

4452 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4453 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4454 else:

4455 self._may_return_empty = True

4456

4457 def streamline(self) -> ParserElement:

4458 if self.streamlined:

4459 return self

4460

4461 super().streamline()

4462 if self.exprs:

4463 self.saveAsList = any(e.saveAsList for e in self.exprs)

4464 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4465 self.skipWhitespace = all(

4466 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4467 )

4468 else:

4469 self.saveAsList = False

4470 self._may_return_empty = True

4471 return self

4472

4473 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4474 maxExcLoc = -1

4475 maxException = None

4476

4477 for e in self.exprs:

4478 try:

4479 return e._parse(instring, loc, do_actions)

4480 except ParseFatalException as pfe:

4481 pfe.__traceback__ = None

4482 pfe.parser_element = e

4483 raise

4484 except ParseException as err:

4485 if err.loc > maxExcLoc:

4486 maxException = err

4487 maxExcLoc = err.loc

4488 except IndexError:

4489 if len(instring) > maxExcLoc:

4490 maxException = ParseException(

4491 instring, len(instring), e.errmsg, self

4492 )

4493 maxExcLoc = len(instring)

4494

4495 if maxException is not None:

4496 # infer from this check that all alternatives failed at the current position

4497 # so emit this collective error message instead of any individual error message

4498 parse_start_loc = self.preParse(instring, loc)

4499 if maxExcLoc == parse_start_loc:

4500 maxException.msg = self.errmsg or ""

4501 raise maxException

4502

4503 raise ParseException(instring, loc, "no defined alternatives to match", self)

4504

4505 def __ior__(self, other):

4506 if isinstance(other, str_type):

4507 other = self._literalStringClass(other)

4508 if not isinstance(other, ParserElement):

4509 return NotImplemented

4510 return self.append(other) # MatchFirst([self, other])

4511

4512 def _generateDefaultName(self) -> str:

4513 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4514

4515 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4516 if (

4517 __diag__.warn_multiple_tokens_in_named_alternation

4518 and Diagnostics.warn_multiple_tokens_in_named_alternation

4519 not in self.suppress_warnings_

4520 ):

4521 if any(

4522 isinstance(e, And)

4523 and Diagnostics.warn_multiple_tokens_in_named_alternation

4524 not in e.suppress_warnings_

4525 for e in self.exprs

4526 ):

4527 warning = (

4528 "warn_multiple_tokens_in_named_alternation:"

4529 f" setting results name {name!r} on {type(self).__name__} expression"

4530 " will return a list of all parsed tokens in an And alternative,"

4531 " in prior versions only the first token was returned; enclose"

4532 " contained argument in Group"

4533 )

4534 warnings.warn(warning, stacklevel=3)

4535

4536 return super()._setResultsName(name, list_all_matches)

4537

4538

4539class Each(ParseExpression):

4540 """Requires all given :class:`ParserElement` s to be found, but in

4541 any order. Expressions may be separated by whitespace.

4542

4543 May be constructed using the ``'&'`` operator.

4544

4545 Example::

4546

4547 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4548 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4549 integer = Word(nums)

4550 shape_attr = "shape:" + shape_type("shape")

4551 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4552 color_attr = "color:" + color("color")

4553 size_attr = "size:" + integer("size")

4554

4555 # use Each (using operator '&') to accept attributes in any order

4556 # (shape and posn are required, color and size are optional)

4557 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4558

4559 shape_spec.run_tests('''

4560 shape: SQUARE color: BLACK posn: 100, 120

4561 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4562 color:GREEN size:20 shape:TRIANGLE posn:20,40

4563 '''

4564 )

4565

4566 prints::

4567

4568 shape: SQUARE color: BLACK posn: 100, 120

4569 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4570 - color: BLACK

4571 - posn: ['100', ',', '120']

4572 - x: 100

4573 - y: 120

4574 - shape: SQUARE

4575

4576

4577 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4578 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4579 - color: BLUE

4580 - posn: ['50', ',', '80']

4581 - x: 50

4582 - y: 80

4583 - shape: CIRCLE

4584 - size: 50

4585

4586

4587 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4588 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4589 - color: GREEN

4590 - posn: ['20', ',', '40']

4591 - x: 20

4592 - y: 40

4593 - shape: TRIANGLE

4594 - size: 20

4595 """

4596

4597 def __init__(

4598 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

4599 ) -> None:

4600 super().__init__(exprs, savelist)

4601 if self.exprs:

4602 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4603 else:

4604 self._may_return_empty = True

4605 self.skipWhitespace = True

4606 self.initExprGroups = True

4607 self.saveAsList = True

4608

4609 def __iand__(self, other):

4610 if isinstance(other, str_type):

4611 other = self._literalStringClass(other)

4612 if not isinstance(other, ParserElement):

4613 return NotImplemented

4614 return self.append(other) # Each([self, other])

4615

4616 def streamline(self) -> ParserElement:

4617 super().streamline()

4618 if self.exprs:

4619 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4620 else:

4621 self._may_return_empty = True

4622 return self

4623

4624 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4625 if self.initExprGroups:

4626 self.opt1map = dict(

4627 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4628 )

4629 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4630 opt2 = [

4631 e

4632 for e in self.exprs

4633 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4634 ]

4635 self.optionals = opt1 + opt2

4636 self.multioptionals = [

4637 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4638 for e in self.exprs

4639 if isinstance(e, _MultipleMatch)

4640 ]

4641 self.multirequired = [

4642 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4643 for e in self.exprs

4644 if isinstance(e, OneOrMore)

4645 ]

4646 self.required = [

4647 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4648 ]

4649 self.required += self.multirequired

4650 self.initExprGroups = False

4651

4652 tmpLoc = loc

4653 tmpReqd = self.required[:]

4654 tmpOpt = self.optionals[:]

4655 multis = self.multioptionals[:]

4656 matchOrder: list[ParserElement] = []

4657

4658 keepMatching = True

4659 failed: list[ParserElement] = []

4660 fatals: list[ParseFatalException] = []

4661 while keepMatching:

4662 tmpExprs = tmpReqd + tmpOpt + multis

4663 failed.clear()

4664 fatals.clear()

4665 for e in tmpExprs:

4666 try:

4667 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4668 except ParseFatalException as pfe:

4669 pfe.__traceback__ = None

4670 pfe.parser_element = e

4671 fatals.append(pfe)

4672 failed.append(e)

4673 except ParseException:

4674 failed.append(e)

4675 else:

4676 matchOrder.append(self.opt1map.get(id(e), e))

4677 if e in tmpReqd:

4678 tmpReqd.remove(e)

4679 elif e in tmpOpt:

4680 tmpOpt.remove(e)

4681 if len(failed) == len(tmpExprs):

4682 keepMatching = False

4683

4684 # look for any ParseFatalExceptions

4685 if fatals:

4686 if len(fatals) > 1:

4687 fatals.sort(key=lambda e: -e.loc)

4688 if fatals[0].loc == fatals[1].loc:

4689 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4690 max_fatal = fatals[0]

4691 raise max_fatal

4692

4693 if tmpReqd:

4694 missing = ", ".join([str(e) for e in tmpReqd])

4695 raise ParseException(

4696 instring,

4697 loc,

4698 f"Missing one or more required elements ({missing})",

4699 )

4700

4701 # add any unmatched Opts, in case they have default values defined

4702 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4703

4704 total_results = ParseResults([])

4705 for e in matchOrder:

4706 loc, results = e._parse(instring, loc, do_actions)

4707 total_results += results

4708

4709 return loc, total_results

4710

4711 def _generateDefaultName(self) -> str:

4712 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4713

4714

4715class ParseElementEnhance(ParserElement):

4716 """Abstract subclass of :class:`ParserElement`, for combining and

4717 post-processing parsed tokens.

4718 """

4719

4720 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

4721 super().__init__(savelist)

4722 if isinstance(expr, str_type):

4723 expr_str = typing.cast(str, expr)

4724 if issubclass(self._literalStringClass, Token):

4725 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4726 elif issubclass(type(self), self._literalStringClass):

4727 expr = Literal(expr_str)

4728 else:

4729 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4730 expr = typing.cast(ParserElement, expr)

4731 self.expr = expr

4732 if expr is not None:

4733 self.mayIndexError = expr.mayIndexError

4734 self._may_return_empty = expr.mayReturnEmpty

4735 self.set_whitespace_chars(

4736 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4737 )

4738 self.skipWhitespace = expr.skipWhitespace

4739 self.saveAsList = expr.saveAsList

4740 self.callPreparse = expr.callPreparse

4741 self.ignoreExprs.extend(expr.ignoreExprs)

4742

4743 def recurse(self) -> list[ParserElement]:

4744 return [self.expr] if self.expr is not None else []

4745

4746 def parseImpl(self, instring, loc, do_actions=True):

4747 if self.expr is None:

4748 raise ParseException(instring, loc, "No expression defined", self)

4749

4750 try:

4751 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

4752 except ParseSyntaxException:

4753 raise

4754 except ParseBaseException as pbe:

4755 pbe.pstr = pbe.pstr or instring

4756 pbe.loc = pbe.loc or loc

4757 pbe.parser_element = pbe.parser_element or self

4758 if not isinstance(self, Forward) and self.customName is not None:

4759 if self.errmsg:

4760 pbe.msg = self.errmsg

4761 raise

4762

4763 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4764 super().leave_whitespace(recursive)

4765

4766 if recursive:

4767 if self.expr is not None:

4768 self.expr = self.expr.copy()

4769 self.expr.leave_whitespace(recursive)

4770 return self

4771

4772 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4773 super().ignore_whitespace(recursive)

4774

4775 if recursive:

4776 if self.expr is not None:

4777 self.expr = self.expr.copy()

4778 self.expr.ignore_whitespace(recursive)

4779 return self

4780

4781 def ignore(self, other) -> ParserElement:

4782 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4783 super().ignore(other)

4784 if self.expr is not None:

4785 self.expr.ignore(self.ignoreExprs[-1])

4786

4787 return self

4788

4789 def streamline(self) -> ParserElement:

4790 super().streamline()

4791 if self.expr is not None:

4792 self.expr.streamline()

4793 return self

4794

4795 def _checkRecursion(self, parseElementList):

4796 if self in parseElementList:

4797 raise RecursiveGrammarException(parseElementList + [self])

4798 subRecCheckList = parseElementList[:] + [self]

4799 if self.expr is not None:

4800 self.expr._checkRecursion(subRecCheckList)

4801

4802 def validate(self, validateTrace=None) -> None:

4803 warnings.warn(

4804 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4805 DeprecationWarning,

4806 stacklevel=2,

4807 )

4808 if validateTrace is None:

4809 validateTrace = []

4810 tmp = validateTrace[:] + [self]

4811 if self.expr is not None:

4812 self.expr.validate(tmp)

4813 self._checkRecursion([])

4814

4815 def _generateDefaultName(self) -> str:

4816 return f"{type(self).__name__}:({self.expr})"

4817

4818 # Compatibility synonyms

4819 # fmt: off

4820 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4821 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4822 # fmt: on

4823

4824

4825class IndentedBlock(ParseElementEnhance):

4826 """

4827 Expression to match one or more expressions at a given indentation level.

4828 Useful for parsing text where structure is implied by indentation (like Python source code).

4829 """

4830

4831 class _Indent(Empty):

4832 def __init__(self, ref_col: int) -> None:

4833 super().__init__()

4834 self.errmsg = f"expected indent at column {ref_col}"

4835 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4836

4837 class _IndentGreater(Empty):

4838 def __init__(self, ref_col: int) -> None:

4839 super().__init__()

4840 self.errmsg = f"expected indent at column greater than {ref_col}"

4841 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4842

4843 def __init__(

4844 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4845 ) -> None:

4846 super().__init__(expr, savelist=True)

4847 # if recursive:

4848 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4849 self._recursive = recursive

4850 self._grouped = grouped

4851 self.parent_anchor = 1

4852

4853 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4854 # advance parse position to non-whitespace by using an Empty()

4855 # this should be the column to be used for all subsequent indented lines

4856 anchor_loc = Empty().preParse(instring, loc)

4857

4858 # see if self.expr matches at the current location - if not it will raise an exception

4859 # and no further work is necessary

4860 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

4861

4862 indent_col = col(anchor_loc, instring)

4863 peer_detect_expr = self._Indent(indent_col)

4864

4865 inner_expr = Empty() + peer_detect_expr + self.expr

4866 if self._recursive:

4867 sub_indent = self._IndentGreater(indent_col)

4868 nested_block = IndentedBlock(

4869 self.expr, recursive=self._recursive, grouped=self._grouped

4870 )

4871 nested_block.set_debug(self.debug)

4872 nested_block.parent_anchor = indent_col

4873 inner_expr += Opt(sub_indent + nested_block)

4874

4875 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4876 block = OneOrMore(inner_expr)

4877

4878 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4879

4880 if self._grouped:

4881 wrapper = Group

4882 else:

4883 wrapper = lambda expr: expr # type: ignore[misc, assignment]

4884 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4885 instring, anchor_loc, do_actions

4886 )

4887

4888

4889class AtStringStart(ParseElementEnhance):

4890 """Matches if expression matches at the beginning of the parse

4891 string::

4892

4893 AtStringStart(Word(nums)).parse_string("123")

4894 # prints ["123"]

4895

4896 AtStringStart(Word(nums)).parse_string(" 123")

4897 # raises ParseException

4898 """

4899

4900 def __init__(self, expr: Union[ParserElement, str]) -> None:

4901 super().__init__(expr)

4902 self.callPreparse = False

4903

4904 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4905 if loc != 0:

4906 raise ParseException(instring, loc, "not found at string start")

4907 return super().parseImpl(instring, loc, do_actions)

4908

4909

4910class AtLineStart(ParseElementEnhance):

4911 r"""Matches if an expression matches at the beginning of a line within

4912 the parse string

4913

4914 Example::

4915

4916 test = '''\

4917 AAA this line

4918 AAA and this line

4919 AAA but not this one

4920 B AAA and definitely not this one

4921 '''

4922

4923 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4924 print(t)

4925

4926 prints::

4927

4928 ['AAA', ' this line']

4929 ['AAA', ' and this line']

4930

4931 """

4932

4933 def __init__(self, expr: Union[ParserElement, str]) -> None:

4934 super().__init__(expr)

4935 self.callPreparse = False

4936

4937 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4938 if col(loc, instring) != 1:

4939 raise ParseException(instring, loc, "not found at line start")

4940 return super().parseImpl(instring, loc, do_actions)

4941

4942

4943class FollowedBy(ParseElementEnhance):

4944 """Lookahead matching of the given parse expression.

4945 ``FollowedBy`` does *not* advance the parsing position within

4946 the input string, it only verifies that the specified parse

4947 expression matches at the current position. ``FollowedBy``

4948 always returns a null token list. If any results names are defined

4949 in the lookahead expression, those *will* be returned for access by

4950 name.

4951

4952 Example::

4953

4954 # use FollowedBy to match a label only if it is followed by a ':'

4955 data_word = Word(alphas)

4956 label = data_word + FollowedBy(':')

4957 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4958

4959 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4960

4961 prints::

4962

4963 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4964 """

4965

4966 def __init__(self, expr: Union[ParserElement, str]) -> None:

4967 super().__init__(expr)

4968 self._may_return_empty = True

4969

4970 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4971 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4972 # we keep any named results that were defined in the FollowedBy expression

4973 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

4974 del ret[:]

4975

4976 return loc, ret

4977

4978

4979class PrecededBy(ParseElementEnhance):

4980 """Lookbehind matching of the given parse expression.

4981 ``PrecededBy`` does not advance the parsing position within the

4982 input string, it only verifies that the specified parse expression

4983 matches prior to the current position. ``PrecededBy`` always

4984 returns a null token list, but if a results name is defined on the

4985 given expression, it is returned.

4986

4987 Parameters:

4988

4989 - ``expr`` - expression that must match prior to the current parse

4990 location

4991 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4992 to lookbehind prior to the current parse location

4993

4994 If the lookbehind expression is a string, :class:`Literal`,

4995 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4996 with a specified exact or maximum length, then the retreat

4997 parameter is not required. Otherwise, retreat must be specified to

4998 give a maximum number of characters to look back from

4999 the current parse position for a lookbehind match.

5000

5001 Example::

5002

5003 # VB-style variable names with type prefixes

5004 int_var = PrecededBy("#") + pyparsing_common.identifier

5005 str_var = PrecededBy("$") + pyparsing_common.identifier

5006

5007 """

5008

5009 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

5010 super().__init__(expr)

5011 self.expr = self.expr().leave_whitespace()

5012 self._may_return_empty = True

5013 self.mayIndexError = False

5014 self.exact = False

5015 if isinstance(expr, str_type):

5016 expr = typing.cast(str, expr)

5017 retreat = len(expr)

5018 self.exact = True

5019 elif isinstance(expr, (Literal, Keyword)):

5020 retreat = expr.matchLen

5021 self.exact = True

5022 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

5023 retreat = expr.maxLen

5024 self.exact = True

5025 elif isinstance(expr, PositionToken):

5026 retreat = 0

5027 self.exact = True

5028 self.retreat = retreat

5029 self.errmsg = f"not preceded by {expr}"

5030 self.skipWhitespace = False

5031 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

5032

5033 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

5034 if self.exact:

5035 if loc < self.retreat:

5036 raise ParseException(instring, loc, self.errmsg, self)

5037 start = loc - self.retreat

5038 _, ret = self.expr._parse(instring, start)

5039 return loc, ret

5040

5041 # retreat specified a maximum lookbehind window, iterate

5042 test_expr = self.expr + StringEnd()

5043 instring_slice = instring[max(0, loc - self.retreat) : loc]

5044 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

5045

5046 for offset in range(1, min(loc, self.retreat + 1) + 1):

5047 try:

5048 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5049 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5050 except ParseBaseException as pbe:

5051 last_expr = pbe

5052 else:

5053 break

5054 else:

5055 raise last_expr

5056

5057 return loc, ret

5058

5059

5060class Located(ParseElementEnhance):

5061 """

5062 Decorates a returned token with its starting and ending

5063 locations in the input string.

5064

5065 This helper adds the following results names:

5066

5067 - ``locn_start`` - location where matched expression begins

5068 - ``locn_end`` - location where matched expression ends

5069 - ``value`` - the actual parsed results

5070

5071 Be careful if the input text contains ``<TAB>`` characters, you

5072 may want to call :class:`ParserElement.parse_with_tabs`

5073

5074 Example::

5075

5076 wd = Word(alphas)

5077 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5078 print(match)

5079

5080 prints::

5081

5082 [0, ['ljsdf'], 5]

5083 [8, ['lksdjjf'], 15]

5084 [18, ['lkkjj'], 23]

5085

5086 """

5087

5088 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5089 start = loc

5090 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5091 ret_tokens = ParseResults([start, tokens, loc])

5092 ret_tokens["locn_start"] = start

5093 ret_tokens["value"] = tokens

5094 ret_tokens["locn_end"] = loc

5095 if self.resultsName:

5096 # must return as a list, so that the name will be attached to the complete group

5097 return loc, [ret_tokens]

5098 else:

5099 return loc, ret_tokens

5100

5101

5102class NotAny(ParseElementEnhance):

5103 """

5104 Lookahead to disallow matching with the given parse expression.

5105 ``NotAny`` does *not* advance the parsing position within the

5106 input string, it only verifies that the specified parse expression

5107 does *not* match at the current position. Also, ``NotAny`` does

5108 *not* skip over leading whitespace. ``NotAny`` always returns

5109 a null token list. May be constructed using the ``'~'`` operator.

5110

5111 Example::

5112

5113 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5114

5115 # take care not to mistake keywords for identifiers

5116 ident = ~(AND | OR | NOT) + Word(alphas)

5117 boolean_term = Opt(NOT) + ident

5118

5119 # very crude boolean expression - to support parenthesis groups and

5120 # operation hierarchy, use infix_notation

5121 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5122

5123 # integers that are followed by "." are actually floats

5124 integer = Word(nums) + ~Char(".")

5125 """

5126

5127 def __init__(self, expr: Union[ParserElement, str]) -> None:

5128 super().__init__(expr)

5129 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5130 # self.leave_whitespace()

5131 self.skipWhitespace = False

5132

5133 self._may_return_empty = True

5134 self.errmsg = f"Found unwanted token, {self.expr}"

5135

5136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5137 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5138 raise ParseException(instring, loc, self.errmsg, self)

5139 return loc, []

5140

5141 def _generateDefaultName(self) -> str:

5142 return f"~{{{self.expr}}}"

5143

5144

5145class _MultipleMatch(ParseElementEnhance):

5146 def __init__(

5147 self,

5148 expr: Union[str, ParserElement],

5149 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5150 *,

5151 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5152 ) -> None:

5153 super().__init__(expr)

5154 stopOn = stopOn or stop_on

5155 self.saveAsList = True

5156 ender = stopOn

5157 if isinstance(ender, str_type):

5158 ender = self._literalStringClass(ender)

5159 self.stopOn(ender)

5160

5161 def stopOn(self, ender) -> ParserElement:

5162 if isinstance(ender, str_type):

5163 ender = self._literalStringClass(ender)

5164 self.not_ender = ~ender if ender is not None else None

5165 return self

5166

5167 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5168 self_expr_parse = self.expr._parse

5169 self_skip_ignorables = self._skipIgnorables

5170 check_ender = False

5171 if self.not_ender is not None:

5172 try_not_ender = self.not_ender.try_parse

5173 check_ender = True

5174

5175 # must be at least one (but first see if we are the stopOn sentinel;

5176 # if so, fail)

5177 if check_ender:

5178 try_not_ender(instring, loc)

5179 loc, tokens = self_expr_parse(instring, loc, do_actions)

5180 try:

5181 hasIgnoreExprs = not not self.ignoreExprs

5182 while 1:

5183 if check_ender:

5184 try_not_ender(instring, loc)

5185 if hasIgnoreExprs:

5186 preloc = self_skip_ignorables(instring, loc)

5187 else:

5188 preloc = loc

5189 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5190 tokens += tmptokens

5191 except (ParseException, IndexError):

5192 pass

5193

5194 return loc, tokens

5195

5196 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5197 if (

5198 __diag__.warn_ungrouped_named_tokens_in_collection

5199 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5200 not in self.suppress_warnings_

5201 ):

5202 for e in [self.expr] + self.expr.recurse():

5203 if (

5204 isinstance(e, ParserElement)

5205 and e.resultsName

5206 and (

5207 Diagnostics.warn_ungrouped_named_tokens_in_collection

5208 not in e.suppress_warnings_

5209 )

5210 ):

5211 warning = (

5212 "warn_ungrouped_named_tokens_in_collection:"

5213 f" setting results name {name!r} on {type(self).__name__} expression"

5214 f" collides with {e.resultsName!r} on contained expression"

5215 )

5216 warnings.warn(warning, stacklevel=3)

5217 break

5218

5219 return super()._setResultsName(name, list_all_matches)

5220

5221

5222class OneOrMore(_MultipleMatch):

5223 """

5224 Repetition of one or more of the given expression.

5225

5226 Parameters:

5227

5228 - ``expr`` - expression that must match one or more times

5229 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5230 (only required if the sentinel would ordinarily match the repetition

5231 expression)

5232

5233 Example::

5234

5235 data_word = Word(alphas)

5236 label = data_word + FollowedBy(':')

5237 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5238

5239 text = "shape: SQUARE posn: upper left color: BLACK"

5240 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5241

5242 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5243 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5244 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5245

5246 # could also be written as

5247 (attr_expr * (1,)).parse_string(text).pprint()

5248 """

5249

5250 def _generateDefaultName(self) -> str:

5251 return f"{{{self.expr}}}..."

5252

5253

5254class ZeroOrMore(_MultipleMatch):

5255 """

5256 Optional repetition of zero or more of the given expression.

5257

5258 Parameters:

5259

5260 - ``expr`` - expression that must match zero or more times

5261 - ``stop_on`` - expression for a terminating sentinel

5262 (only required if the sentinel would ordinarily match the repetition

5263 expression) - (default= ``None``)

5264

5265 Example: similar to :class:`OneOrMore`

5266 """

5267

5268 def __init__(

5269 self,

5270 expr: Union[str, ParserElement],

5271 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5272 *,

5273 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5274 ) -> None:

5275 super().__init__(expr, stopOn=stopOn or stop_on)

5276 self._may_return_empty = True

5277

5278 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5279 try:

5280 return super().parseImpl(instring, loc, do_actions)

5281 except (ParseException, IndexError):

5282 return loc, ParseResults([], name=self.resultsName)

5283

5284 def _generateDefaultName(self) -> str:

5285 return f"[{self.expr}]..."

5286

5287

5288class DelimitedList(ParseElementEnhance):

5289 """Helper to define a delimited list of expressions - the delimiter

5290 defaults to ','. By default, the list elements and delimiters can

5291 have intervening whitespace, and comments, but this can be

5292 overridden by passing ``combine=True`` in the constructor. If

5293 ``combine`` is set to ``True``, the matching tokens are

5294 returned as a single token string, with the delimiters included;

5295 otherwise, the matching tokens are returned as a list of tokens,

5296 with the delimiters suppressed.

5297

5298 If ``allow_trailing_delim`` is set to True, then the list may end with

5299 a delimiter.

5300

5301 Example::

5302

5303 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5304 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5305

5306 .. versionadded:: 3.1.0

5307 """

5308

5309 def __init__(

5310 self,

5311 expr: Union[str, ParserElement],

5312 delim: Union[str, ParserElement] = ",",

5313 combine: bool = False,

5314 min: typing.Optional[int] = None,

5315 max: typing.Optional[int] = None,

5316 *,

5317 allow_trailing_delim: bool = False,

5318 ) -> None:

5319 if isinstance(expr, str_type):

5320 expr = ParserElement._literalStringClass(expr)

5321 expr = typing.cast(ParserElement, expr)

5322

5323 if min is not None and min < 1:

5324 raise ValueError("min must be greater than 0")

5325

5326 if max is not None and min is not None and max < min:

5327 raise ValueError("max must be greater than, or equal to min")

5328

5329 self.content = expr

5330 self.raw_delim = str(delim)

5331 self.delim = delim

5332 self.combine = combine

5333 if not combine:

5334 self.delim = Suppress(delim)

5335 self.min = min or 1

5336 self.max = max

5337 self.allow_trailing_delim = allow_trailing_delim

5338

5339 delim_list_expr = self.content + (self.delim + self.content) * (

5340 self.min - 1,

5341 None if self.max is None else self.max - 1,

5342 )

5343 if self.allow_trailing_delim:

5344 delim_list_expr += Opt(self.delim)

5345

5346 if self.combine:

5347 delim_list_expr = Combine(delim_list_expr)

5348

5349 super().__init__(delim_list_expr, savelist=True)

5350

5351 def _generateDefaultName(self) -> str:

5352 content_expr = self.content.streamline()

5353 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5354

5355

5356class _NullToken:

5357 def __bool__(self):

5358 return False

5359

5360 def __str__(self):

5361 return ""

5362

5363

5364class Opt(ParseElementEnhance):

5365 """

5366 Optional matching of the given expression.

5367

5368 Parameters:

5369

5370 - ``expr`` - expression that must match zero or more times

5371 - ``default`` (optional) - value to be returned if the optional expression is not found.

5372

5373 Example::

5374

5375 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5376 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5377 zip.run_tests('''

5378 # traditional ZIP code

5379 12345

5380

5381 # ZIP+4 form

5382 12101-0001

5383

5384 # invalid ZIP

5385 98765-

5386 ''')

5387

5388 prints::

5389

5390 # traditional ZIP code

5391 12345

5392 ['12345']

5393

5394 # ZIP+4 form

5395 12101-0001

5396 ['12101-0001']

5397

5398 # invalid ZIP

5399 98765-

5400 ^

5401 FAIL: Expected end of text (at char 5), (line:1, col:6)

5402 """

5403

5404 __optionalNotMatched = _NullToken()

5405

5406 def __init__(

5407 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5408 ) -> None:

5409 super().__init__(expr, savelist=False)

5410 self.saveAsList = self.expr.saveAsList

5411 self.defaultValue = default

5412 self._may_return_empty = True

5413

5414 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5415 self_expr = self.expr

5416 try:

5417 loc, tokens = self_expr._parse(

5418 instring, loc, do_actions, callPreParse=False

5419 )

5420 except (ParseException, IndexError):

5421 default_value = self.defaultValue

5422 if default_value is not self.__optionalNotMatched:

5423 if self_expr.resultsName:

5424 tokens = ParseResults([default_value])

5425 tokens[self_expr.resultsName] = default_value

5426 else:

5427 tokens = [default_value] # type: ignore[assignment]

5428 else:

5429 tokens = [] # type: ignore[assignment]

5430 return loc, tokens

5431

5432 def _generateDefaultName(self) -> str:

5433 inner = str(self.expr)

5434 # strip off redundant inner {}'s

5435 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5436 inner = inner[1:-1]

5437 return f"[{inner}]"

5440Optional = Opt

5443class SkipTo(ParseElementEnhance):

5444 """

5445 Token for skipping over all undefined text until the matched

5446 expression is found.

5447

5448 Parameters:

5449

5450 - ``expr`` - target expression marking the end of the data to be skipped

5451 - ``include`` - if ``True``, the target expression is also parsed

5452 (the skipped text and target expression are returned as a 2-element

5453 list) (default= ``False``).

5454 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5455 comments) that might contain false matches to the target expression

5456 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5457 included in the skipped test; if found before the target expression is found,

5458 the :class:`SkipTo` is not a match

5459

5460 Example::

5461

5462 report = '''

5463 Outstanding Issues Report - 1 Jan 2000

5464

5465 # | Severity | Description | Days Open

5466 -----+----------+-------------------------------------------+-----------

5467 101 | Critical | Intermittent system crash | 6

5468 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5469 79 | Minor | System slow when running too many reports | 47

5470 '''

5471 integer = Word(nums)

5472 SEP = Suppress('|')

5473 # use SkipTo to simply match everything up until the next SEP

5474 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5475 # - parse action will call token.strip() for each matched token, i.e., the description body

5476 string_data = SkipTo(SEP, ignore=quoted_string)

5477 string_data.set_parse_action(token_map(str.strip))

5478 ticket_expr = (integer("issue_num") + SEP

5479 + string_data("sev") + SEP

5480 + string_data("desc") + SEP

5481 + integer("days_open"))

5482

5483 for tkt in ticket_expr.search_string(report):

5484 print tkt.dump()

5485

5486 prints::

5487

5488 ['101', 'Critical', 'Intermittent system crash', '6']

5489 - days_open: '6'

5490 - desc: 'Intermittent system crash'

5491 - issue_num: '101'

5492 - sev: 'Critical'

5493 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5494 - days_open: '14'

5495 - desc: "Spelling error on Login ('log|n')"

5496 - issue_num: '94'

5497 - sev: 'Cosmetic'

5498 ['79', 'Minor', 'System slow when running too many reports', '47']

5499 - days_open: '47'

5500 - desc: 'System slow when running too many reports'

5501 - issue_num: '79'

5502 - sev: 'Minor'

5503 """

5504

5505 def __init__(

5506 self,

5507 other: Union[ParserElement, str],

5508 include: bool = False,

5509 ignore: typing.Optional[Union[ParserElement, str]] = None,

5510 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5511 *,

5512 failOn: typing.Optional[Union[ParserElement, str]] = None,

5513 ) -> None:

5514 super().__init__(other)

5515 failOn = failOn or fail_on

5516 self.ignoreExpr = ignore

5517 self._may_return_empty = True

5518 self.mayIndexError = False

5519 self.includeMatch = include

5520 self.saveAsList = False

5521 if isinstance(failOn, str_type):

5522 self.failOn = self._literalStringClass(failOn)

5523 else:

5524 self.failOn = failOn

5525 self.errmsg = f"No match found for {self.expr}"

5526 self.ignorer = Empty().leave_whitespace()

5527 self._update_ignorer()

5528

5529 def _update_ignorer(self):

5530 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5531 self.ignorer.ignoreExprs.clear()

5532 for e in self.expr.ignoreExprs:

5533 self.ignorer.ignore(e)

5534 if self.ignoreExpr:

5535 self.ignorer.ignore(self.ignoreExpr)

5536

5537 def ignore(self, expr):

5538 super().ignore(expr)

5539 self._update_ignorer()

5540

5541 def parseImpl(self, instring, loc, do_actions=True):

5542 startloc = loc

5543 instrlen = len(instring)

5544 self_expr_parse = self.expr._parse

5545 self_failOn_canParseNext = (

5546 self.failOn.canParseNext if self.failOn is not None else None

5547 )

5548 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5549

5550 tmploc = loc

5551 while tmploc <= instrlen:

5552 if self_failOn_canParseNext is not None:

5553 # break if failOn expression matches

5554 if self_failOn_canParseNext(instring, tmploc):

5555 break

5556

5557 if ignorer_try_parse is not None:

5558 # advance past ignore expressions

5559 prev_tmploc = tmploc

5560 while 1:

5561 try:

5562 tmploc = ignorer_try_parse(instring, tmploc)

5563 except ParseBaseException:

5564 break

5565 # see if all ignorers matched, but didn't actually ignore anything

5566 if tmploc == prev_tmploc:

5567 break

5568 prev_tmploc = tmploc

5569

5570 try:

5571 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5572 except (ParseException, IndexError):

5573 # no match, advance loc in string

5574 tmploc += 1

5575 else:

5576 # matched skipto expr, done

5577 break

5578

5579 else:

5580 # ran off the end of the input string without matching skipto expr, fail

5581 raise ParseException(instring, loc, self.errmsg, self)

5582

5583 # build up return values

5584 loc = tmploc

5585 skiptext = instring[startloc:loc]

5586 skipresult = ParseResults(skiptext)

5587

5588 if self.includeMatch:

5589 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5590 skipresult += mat

5591

5592 return loc, skipresult

5593

5594

5595class Forward(ParseElementEnhance):

5596 """

5597 Forward declaration of an expression to be defined later -

5598 used for recursive grammars, such as algebraic infix notation.

5599 When the expression is known, it is assigned to the ``Forward``

5600 variable using the ``'<<'`` operator.

5601

5602 Note: take care when assigning to ``Forward`` not to overlook

5603 precedence of operators.

5604

5605 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5606

5607 fwd_expr << a | b | c

5608

5609 will actually be evaluated as::

5610

5611 (fwd_expr << a) | b | c

5612

5613 thereby leaving b and c out as parseable alternatives. It is recommended that you

5614 explicitly group the values inserted into the ``Forward``::

5615

5616 fwd_expr << (a | b | c)

5617

5618 Converting to use the ``'<<='`` operator instead will avoid this problem.

5619

5620 See :class:`ParseResults.pprint` for an example of a recursive

5621 parser created using ``Forward``.

5622 """

5623

5624 def __init__(

5625 self, other: typing.Optional[Union[ParserElement, str]] = None

5626 ) -> None:

5627 self.caller_frame = traceback.extract_stack(limit=2)[0]

5628 super().__init__(other, savelist=False) # type: ignore[arg-type]

5629 self.lshift_line = None

5630

5631 def __lshift__(self, other) -> Forward:

5632 if hasattr(self, "caller_frame"):

5633 del self.caller_frame

5634 if isinstance(other, str_type):

5635 other = self._literalStringClass(other)

5636

5637 if not isinstance(other, ParserElement):

5638 return NotImplemented

5639

5640 self.expr = other

5641 self.streamlined = other.streamlined

5642 self.mayIndexError = self.expr.mayIndexError

5643 self._may_return_empty = self.expr.mayReturnEmpty

5644 self.set_whitespace_chars(

5645 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5646 )

5647 self.skipWhitespace = self.expr.skipWhitespace

5648 self.saveAsList = self.expr.saveAsList

5649 self.ignoreExprs.extend(self.expr.ignoreExprs)

5650 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5651 return self

5652

5653 def __ilshift__(self, other) -> Forward:

5654 if not isinstance(other, ParserElement):

5655 return NotImplemented

5656

5657 return self << other

5658

5659 def __or__(self, other) -> ParserElement:

5660 caller_line = traceback.extract_stack(limit=2)[-2]

5661 if (

5662 __diag__.warn_on_match_first_with_lshift_operator

5663 and caller_line == self.lshift_line

5664 and Diagnostics.warn_on_match_first_with_lshift_operator

5665 not in self.suppress_warnings_

5666 ):

5667 warnings.warn(

5668 "warn_on_match_first_with_lshift_operator:"

5669 " using '<<' operator with '|' is probably an error, use '<<='",

5670 stacklevel=2,

5671 )

5672 ret = super().__or__(other)

5673 return ret

5674

5675 def __del__(self):

5676 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5677 if (

5678 self.expr is None

5679 and __diag__.warn_on_assignment_to_Forward

5680 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5681 ):

5682 warnings.warn_explicit(

5683 "warn_on_assignment_to_Forward:"

5684 " Forward defined here but no expression attached later using '<<=' or '<<'",

5685 UserWarning,

5686 filename=self.caller_frame.filename,

5687 lineno=self.caller_frame.lineno,

5688 )

5689

5690 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5691 if (

5692 self.expr is None

5693 and __diag__.warn_on_parse_using_empty_Forward

5694 and Diagnostics.warn_on_parse_using_empty_Forward

5695 not in self.suppress_warnings_

5696 ):

5697 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5698 parse_fns = (

5699 "parse_string",

5700 "scan_string",

5701 "search_string",

5702 "transform_string",

5703 )

5704 tb = traceback.extract_stack(limit=200)

5705 for i, frm in enumerate(reversed(tb), start=1):

5706 if frm.name in parse_fns:

5707 stacklevel = i + 1

5708 break

5709 else:

5710 stacklevel = 2

5711 warnings.warn(

5712 "warn_on_parse_using_empty_Forward:"

5713 " Forward expression was never assigned a value, will not parse any input",

5714 stacklevel=stacklevel,

5715 )

5716 if not ParserElement._left_recursion_enabled:

5717 return super().parseImpl(instring, loc, do_actions)

5718 # ## Bounded Recursion algorithm ##

5719 # Recursion only needs to be processed at ``Forward`` elements, since they are

5720 # the only ones that can actually refer to themselves. The general idea is

5721 # to handle recursion stepwise: We start at no recursion, then recurse once,

5722 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5723 #

5724 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5725 # - to *match* a specific recursion level, and

5726 # - to *search* the bounded recursion level

5727 # and the two run concurrently. The *search* must *match* each recursion level

5728 # to find the best possible match. This is handled by a memo table, which

5729 # provides the previous match to the next level match attempt.

5730 #

5731 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5732 #

5733 # There is a complication since we not only *parse* but also *transform* via

5734 # actions: We do not want to run the actions too often while expanding. Thus,

5735 # we expand using `do_actions=False` and only run `do_actions=True` if the next

5736 # recursion level is acceptable.

5737 with ParserElement.recursion_lock:

5738 memo = ParserElement.recursion_memos

5739 try:

5740 # we are parsing at a specific recursion expansion - use it as-is

5741 prev_loc, prev_result = memo[loc, self, do_actions]

5742 if isinstance(prev_result, Exception):

5743 raise prev_result

5744 return prev_loc, prev_result.copy()

5745 except KeyError:

5746 act_key = (loc, self, True)

5747 peek_key = (loc, self, False)

5748 # we are searching for the best recursion expansion - keep on improving

5749 # both `do_actions` cases must be tracked separately here!

5750 prev_loc, prev_peek = memo[peek_key] = (

5751 loc - 1,

5752 ParseException(

5753 instring, loc, "Forward recursion without base case", self

5754 ),

5755 )

5756 if do_actions:

5757 memo[act_key] = memo[peek_key]

5758 while True:

5759 try:

5760 new_loc, new_peek = super().parseImpl(instring, loc, False)

5761 except ParseException:

5762 # we failed before getting any match - do not hide the error

5763 if isinstance(prev_peek, Exception):

5764 raise

5765 new_loc, new_peek = prev_loc, prev_peek

5766 # the match did not get better: we are done

5767 if new_loc <= prev_loc:

5768 if do_actions:

5769 # replace the match for do_actions=False as well,

5770 # in case the action did backtrack

5771 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5772 del memo[peek_key], memo[act_key]

5773 return prev_loc, copy.copy(prev_result)

5774 del memo[peek_key]

5775 return prev_loc, copy.copy(prev_peek)

5776 # the match did get better: see if we can improve further

5777 if do_actions:

5778 try:

5779 memo[act_key] = super().parseImpl(instring, loc, True)

5780 except ParseException as e:

5781 memo[peek_key] = memo[act_key] = (new_loc, e)

5782 raise

5783 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5784

5785 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5786 self.skipWhitespace = False

5787 return self

5788

5789 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5790 self.skipWhitespace = True

5791 return self

5792

5793 def streamline(self) -> ParserElement:

5794 if not self.streamlined:

5795 self.streamlined = True

5796 if self.expr is not None:

5797 self.expr.streamline()

5798 return self

5799

5800 def validate(self, validateTrace=None) -> None:

5801 warnings.warn(

5802 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5803 DeprecationWarning,

5804 stacklevel=2,

5805 )

5806 if validateTrace is None:

5807 validateTrace = []

5808

5809 if self not in validateTrace:

5810 tmp = validateTrace[:] + [self]

5811 if self.expr is not None:

5812 self.expr.validate(tmp)

5813 self._checkRecursion([])

5814

5815 def _generateDefaultName(self) -> str:

5816 # Avoid infinite recursion by setting a temporary _defaultName

5817 save_default_name = self._defaultName

5818 self._defaultName = ": ..."

5819

5820 # Use the string representation of main expression.

5821 try:

5822 if self.expr is not None:

5823 ret_string = str(self.expr)[:1000]

5824 else:

5825 ret_string = "None"

5826 except Exception:

5827 ret_string = "..."

5828

5829 self._defaultName = save_default_name

5830 return f"{type(self).__name__}: {ret_string}"

5831

5832 def copy(self) -> ParserElement:

5833 if self.expr is not None:

5834 return super().copy()

5835 else:

5836 ret = Forward()

5837 ret <<= self

5838 return ret

5839

5840 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5841 # fmt: off

5842 if (

5843 __diag__.warn_name_set_on_empty_Forward

5844 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5845 and self.expr is None

5846 ):

5847 warning = (

5848 "warn_name_set_on_empty_Forward:"

5849 f" setting results name {name!r} on {type(self).__name__} expression"

5850 " that has no contained expression"

5851 )

5852 warnings.warn(warning, stacklevel=3)

5853 # fmt: on

5854

5855 return super()._setResultsName(name, list_all_matches)

5856

5857 # Compatibility synonyms

5858 # fmt: off

5859 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5860 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5861 # fmt: on

5862

5863

5864class TokenConverter(ParseElementEnhance):

5865 """

5866 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

5867 """

5868

5869 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

5870 super().__init__(expr) # , savelist)

5871 self.saveAsList = False

5872

5873

5874class Combine(TokenConverter):

5875 """Converter to concatenate all matching tokens to a single string.

5876 By default, the matching patterns must also be contiguous in the

5877 input string; this can be disabled by specifying

5878 ``'adjacent=False'`` in the constructor.

5879

5880 Example::

5881

5882 real = Word(nums) + '.' + Word(nums)

5883 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5884 # will also erroneously match the following

5885 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5886

5887 real = Combine(Word(nums) + '.' + Word(nums))

5888 print(real.parse_string('3.1416')) # -> ['3.1416']

5889 # no match when there are internal spaces

5890 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5891 """

5892

5893 def __init__(

5894 self,

5895 expr: ParserElement,

5896 join_string: str = "",

5897 adjacent: bool = True,

5898 *,

5899 joinString: typing.Optional[str] = None,

5900 ) -> None:

5901 super().__init__(expr)

5902 joinString = joinString if joinString is not None else join_string

5903 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5904 if adjacent:

5905 self.leave_whitespace()

5906 self.adjacent = adjacent

5907 self.skipWhitespace = True

5908 self.joinString = joinString

5909 self.callPreparse = True

5910

5911 def ignore(self, other) -> ParserElement:

5912 if self.adjacent:

5913 ParserElement.ignore(self, other)

5914 else:

5915 super().ignore(other)

5916 return self

5917

5918 def postParse(self, instring, loc, tokenlist):

5919 retToks = tokenlist.copy()

5920 del retToks[:]

5921 retToks += ParseResults(

5922 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5923 )

5924

5925 if self.resultsName and retToks.haskeys():

5926 return [retToks]

5927 else:

5928 return retToks

5929

5930

5931class Group(TokenConverter):

5932 """Converter to return the matched tokens as a list - useful for

5933 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5934

5935 The optional ``aslist`` argument when set to True will return the

5936 parsed tokens as a Python list instead of a pyparsing ParseResults.

5937

5938 Example::

5939

5940 ident = Word(alphas)

5941 num = Word(nums)

5942 term = ident | num

5943 func = ident + Opt(DelimitedList(term))

5944 print(func.parse_string("fn a, b, 100"))

5945 # -> ['fn', 'a', 'b', '100']

5946

5947 func = ident + Group(Opt(DelimitedList(term)))

5948 print(func.parse_string("fn a, b, 100"))

5949 # -> ['fn', ['a', 'b', '100']]

5950 """

5951

5952 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

5953 super().__init__(expr)

5954 self.saveAsList = True

5955 self._asPythonList = aslist

5956

5957 def postParse(self, instring, loc, tokenlist):

5958 if self._asPythonList:

5959 return ParseResults.List(

5960 tokenlist.asList()

5961 if isinstance(tokenlist, ParseResults)

5962 else list(tokenlist)

5963 )

5964

5965 return [tokenlist]

5966

5967

5968class Dict(TokenConverter):

5969 """Converter to return a repetitive expression as a list, but also

5970 as a dictionary. Each element can also be referenced using the first

5971 token in the expression as its key. Useful for tabular report

5972 scraping when the first column can be used as a item key.

5973

5974 The optional ``asdict`` argument when set to True will return the

5975 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5976

5977 Example::

5978

5979 data_word = Word(alphas)

5980 label = data_word + FollowedBy(':')

5981

5982 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5983 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5984

5985 # print attributes as plain groups

5986 print(attr_expr[1, ...].parse_string(text).dump())

5987

5988 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5989 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5990 print(result.dump())

5991

5992 # access named fields as dict entries, or output as dict

5993 print(result['shape'])

5994 print(result.as_dict())

5995

5996 prints::

5997

5998 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5999 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

6000 - color: 'light blue'

6001 - posn: 'upper left'

6002 - shape: 'SQUARE'

6003 - texture: 'burlap'

6004 SQUARE

6005 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

6006

6007 See more examples at :class:`ParseResults` of accessing fields by results name.

6008 """

6009

6010 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

6011 super().__init__(expr)

6012 self.saveAsList = True

6013 self._asPythonDict = asdict

6014

6015 def postParse(self, instring, loc, tokenlist):

6016 for i, tok in enumerate(tokenlist):

6017 if len(tok) == 0:

6018 continue

6019

6020 ikey = tok[0]

6021 if isinstance(ikey, int):

6022 ikey = str(ikey).strip()

6023

6024 if len(tok) == 1:

6025 tokenlist[ikey] = _ParseResultsWithOffset("", i)

6026

6027 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

6028 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

6029

6030 else:

6031 try:

6032 dictvalue = tok.copy() # ParseResults(i)

6033 except Exception:

6034 exc = TypeError(

6035 "could not extract dict values from parsed results"

6036 " - Dict expression must contain Grouped expressions"

6037 )

6038 raise exc from None

6039

6040 del dictvalue[0]

6041

6042 if len(dictvalue) != 1 or (

6043 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

6044 ):

6045 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

6046 else:

6047 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

6048

6049 if self._asPythonDict:

6050 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6051

6052 return [tokenlist] if self.resultsName else tokenlist

6053

6054

6055class Suppress(TokenConverter):

6056 """Converter for ignoring the results of a parsed expression.

6057

6058 Example::

6059

6060 source = "a, b, c,d"

6061 wd = Word(alphas)

6062 wd_list1 = wd + (',' + wd)[...]

6063 print(wd_list1.parse_string(source))

6064

6065 # often, delimiters that are useful during parsing are just in the

6066 # way afterward - use Suppress to keep them out of the parsed output

6067 wd_list2 = wd + (Suppress(',') + wd)[...]

6068 print(wd_list2.parse_string(source))

6069

6070 # Skipped text (using '...') can be suppressed as well

6071 source = "lead in START relevant text END trailing text"

6072 start_marker = Keyword("START")

6073 end_marker = Keyword("END")

6074 find_body = Suppress(...) + start_marker + ... + end_marker

6075 print(find_body.parse_string(source)

6076

6077 prints::

6078

6079 ['a', ',', 'b', ',', 'c', ',', 'd']

6080 ['a', 'b', 'c', 'd']

6081 ['START', 'relevant text ', 'END']

6082

6083 (See also :class:`DelimitedList`.)

6084 """

6085

6086 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6087 if expr is ...:

6088 expr = _PendingSkip(NoMatch())

6089 super().__init__(expr)

6090

6091 def __add__(self, other) -> ParserElement:

6092 if isinstance(self.expr, _PendingSkip):

6093 return Suppress(SkipTo(other)) + other

6094

6095 return super().__add__(other)

6096

6097 def __sub__(self, other) -> ParserElement:

6098 if isinstance(self.expr, _PendingSkip):

6099 return Suppress(SkipTo(other)) - other

6100

6101 return super().__sub__(other)

6102

6103 def postParse(self, instring, loc, tokenlist):

6104 return []

6105

6106 def suppress(self) -> ParserElement:

6107 return self

6108

6109

6110# XXX: Example needs to be re-done for updated output

6111def trace_parse_action(f: ParseAction) -> ParseAction:

6112 """Decorator for debugging parse actions.

6113

6114 When the parse action is called, this decorator will print

6115 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6116 When the parse action completes, the decorator will print

6117 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6118

6119 Example::

6120

6121 wd = Word(alphas)

6122

6123 @trace_parse_action

6124 def remove_duplicate_chars(tokens):

6125 return ''.join(sorted(set(''.join(tokens))))

6126

6127 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6128 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6129

6130 prints::

6131

6132 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6133 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6134 ['dfjkls']

6135

6136 .. versionchanged:: 3.1.0

6137 Exception type added to output

6138 """

6139 f = _trim_arity(f)

6140

6141 def z(*paArgs):

6142 thisFunc = f.__name__

6143 s, l, t = paArgs[-3:]

6144 if len(paArgs) > 3:

6145 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6146 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6147 try:

6148 ret = f(*paArgs)

6149 except Exception as exc:

6150 sys.stderr.write(

6151 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6152 )

6153 raise

6154 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6155 return ret

6156

6157 z.__name__ = f.__name__

6158 return z

6159

6160

6161# convenience constants for positional expressions

6162empty = Empty().set_name("empty")

6163line_start = LineStart().set_name("line_start")

6164line_end = LineEnd().set_name("line_end")

6165string_start = StringStart().set_name("string_start")

6166string_end = StringEnd().set_name("string_end")

6167

6168_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6169 lambda s, l, t: t[0][1]

6170)

6171_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6172 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6173)

6174_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6175 lambda s, l, t: chr(int(t[0][1:], 8))

6176)

6177_singleChar = (

6178 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6179)

6180_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6181_reBracketExpr = (

6182 Literal("[")

6183 + Opt("^").set_results_name("negate")

6184 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6185 + Literal("]")

6186)

6187

6188

6189def srange(s: str) -> str:

6190 r"""Helper to easily define string ranges for use in :class:`Word`

6191 construction. Borrows syntax from regexp ``'[]'`` string range

6192 definitions::

6193

6194 srange("[0-9]") -> "0123456789"

6195 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6196 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6197

6198 The input string must be enclosed in []'s, and the returned string

6199 is the expanded character set joined into a single string. The

6200 values enclosed in the []'s may be:

6201

6202 - a single character

6203 - an escaped character with a leading backslash (such as ``\-``

6204 or ``\]``)

6205 - an escaped hex character with a leading ``'\x'``

6206 (``\x21``, which is a ``'!'`` character) (``\0x##``

6207 is also supported for backwards compatibility)

6208 - an escaped octal character with a leading ``'\0'``

6209 (``\041``, which is a ``'!'`` character)

6210 - a range of any of the above, separated by a dash (``'a-z'``,

6211 etc.)

6212 - any combination of the above (``'aeiouy'``,

6213 ``'a-zA-Z0-9_$'``, etc.)

6214 """

6215

6216 def _expanded(p):

6217 if isinstance(p, ParseResults):

6218 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6219 else:

6220 yield p

6221

6222 try:

6223 return "".join(

6224 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6225 )

6226 except Exception as e:

6227 return ""

6228

6229

6230def token_map(func, *args) -> ParseAction:

6231 """Helper to define a parse action by mapping a function to all

6232 elements of a :class:`ParseResults` list. If any additional args are passed,

6233 they are forwarded to the given function as additional arguments

6234 after the token, as in

6235 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6236 which will convert the parsed data to an integer using base 16.

6237

6238 Example (compare the last to example in :class:`ParserElement.transform_string`::

6239

6240 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6241 hex_ints.run_tests('''

6242 00 11 22 aa FF 0a 0d 1a

6243 ''')

6244

6245 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6246 upperword[1, ...].run_tests('''

6247 my kingdom for a horse

6248 ''')

6249

6250 wd = Word(alphas).set_parse_action(token_map(str.title))

6251 wd[1, ...].set_parse_action(' '.join).run_tests('''

6252 now is the winter of our discontent made glorious summer by this sun of york

6253 ''')

6254

6255 prints::

6256

6257 00 11 22 aa FF 0a 0d 1a

6258 [0, 17, 34, 170, 255, 10, 13, 26]

6259

6260 my kingdom for a horse

6261 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6262

6263 now is the winter of our discontent made glorious summer by this sun of york

6264 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6265 """

6266

6267 def pa(s, l, t):

6268 return [func(tokn, *args) for tokn in t]

6269

6270 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6271 pa.__name__ = func_name

6272

6273 return pa

6274

6275

6276def autoname_elements() -> None:

6277 """

6278 Utility to simplify mass-naming of parser elements, for

6279 generating railroad diagram with named subdiagrams.

6280 """

6281

6282 # guard against _getframe not being implemented in the current Python

6283 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6284 calling_frame = getframe_fn(1)

6285 if calling_frame is None:

6286 return

6287

6288 # find all locals in the calling frame that are ParserElements

6289 calling_frame = typing.cast(types.FrameType, calling_frame)

6290 for name, var in calling_frame.f_locals.items():

6291 # if no custom name defined, set the name to the var name

6292 if isinstance(var, ParserElement) and not var.customName:

6293 var.set_name(name)

6294

6295

6296dbl_quoted_string = Combine(

6297 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6298).set_name("string enclosed in double quotes")

6299

6300sgl_quoted_string = Combine(

6301 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6302).set_name("string enclosed in single quotes")

6303

6304quoted_string = Combine(

6305 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6306 "double quoted string"

6307 )

6308 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6309 "single quoted string"

6310 )

6311).set_name("quoted string using single or double quotes")

6312

6313# XXX: Is there some way to make this show up in API docs?

6314# .. versionadded:: 3.1.0

6315python_quoted_string = Combine(

6316 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6317 "multiline double quoted string"

6318 )

6319 ^ (

6320 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6321 ).set_name("multiline single quoted string")

6322 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6323 "double quoted string"

6324 )

6325 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6326 "single quoted string"

6327 )

6328).set_name("Python quoted string")

6329

6330unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6331

6332

6333alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6334punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6335

6336# build list of built-in expressions, for future reference if a global default value

6337# gets updated

6338_builtin_exprs: list[ParserElement] = [

6339 v for v in vars().values() if isinstance(v, ParserElement)

6340]

6341

6342# Compatibility synonyms

6343# fmt: off

6344sglQuotedString = sgl_quoted_string

6345dblQuotedString = dbl_quoted_string

6346quotedString = quoted_string

6347unicodeString = unicode_string

6348lineStart = line_start

6349lineEnd = line_end

6350stringStart = string_start

6351stringEnd = string_end

6352nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6353traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6354conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6355tokenMap = replaced_by_pep8("tokenMap", token_map)

6356# fmt: on