Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 43%

1844 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1845

1846 - ``exception_action`` - method to be called when expression fails to parse;

1847 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1848 """

1849 self.debugActions = self.DebugActions(

1850 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1851 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1852 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1853 )

1854 self.debug = True

1855 return self

1856

1857 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

1858 """

1859 Enable display of debugging messages while doing pattern matching.

1860 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1861 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1862

1863 Example::

1864

1865 wd = Word(alphas).set_name("alphaword")

1866 integer = Word(nums).set_name("numword")

1867 term = wd | integer

1868

1869 # turn on debugging for wd

1870 wd.set_debug()

1871

1872 term[1, ...].parse_string("abc 123 xyz 890")

1873

1874 prints::

1875

1876 Match alphaword at loc 0(1,1)

1877 Matched alphaword -> ['abc']

1878 Match alphaword at loc 3(1,4)

1879 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1880 Match alphaword at loc 7(1,8)

1881 Matched alphaword -> ['xyz']

1882 Match alphaword at loc 11(1,12)

1883 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1884 Match alphaword at loc 15(1,16)

1885 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1886

1887 The output shown is that produced by the default debug actions - custom debug actions can be

1888 specified using :class:`set_debug_actions`. Prior to attempting

1889 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1890 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1891 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1892 which makes debugging and exception messages easier to understand - for instance, the default

1893 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1894 """

1895 if recurse:

1896 for expr in self.visit_all():

1897 expr.set_debug(flag, recurse=False)

1898 return self

1899

1900 if flag:

1901 self.set_debug_actions(

1902 _default_start_debug_action,

1903 _default_success_debug_action,

1904 _default_exception_debug_action,

1905 )

1906 else:

1907 self.debug = False

1908 return self

1909

1910 @property

1911 def default_name(self) -> str:

1912 if self._defaultName is None:

1913 self._defaultName = self._generateDefaultName()

1914 return self._defaultName

1915

1916 @abstractmethod

1917 def _generateDefaultName(self) -> str:

1918 """

1919 Child classes must define this method, which defines how the ``default_name`` is set.

1920 """

1921

1922 def set_name(self, name: typing.Optional[str]) -> ParserElement:

1923 """

1924 Define name for this expression, makes debugging and exception messages clearer. If

1925 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

1926 enable debug for this expression.

1927

1928 If `name` is None, clears any custom name for this expression, and clears the

1929 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

1930

1931 Example::

1932

1933 integer = Word(nums)

1934 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1935

1936 integer.set_name("integer")

1937 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1938 """

1939 self.customName = name # type: ignore[assignment]

1940 self.errmsg = f"Expected {str(self)}"

1941

1942 if __diag__.enable_debug_on_named_expressions:

1943 self.set_debug(name is not None)

1944

1945 return self

1946

1947 @property

1948 def name(self) -> str:

1949 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1950 return self.customName if self.customName is not None else self.default_name

1951

1952 @name.setter

1953 def name(self, new_name) -> None:

1954 self.set_name(new_name)

1955

1956 def __str__(self) -> str:

1957 return self.name

1958

1959 def __repr__(self) -> str:

1960 return str(self)

1961

1962 def streamline(self) -> ParserElement:

1963 self.streamlined = True

1964 self._defaultName = None

1965 return self

1966

1967 def recurse(self) -> list[ParserElement]:

1968 return []

1969

1970 def _checkRecursion(self, parseElementList):

1971 subRecCheckList = parseElementList[:] + [self]

1972 for e in self.recurse():

1973 e._checkRecursion(subRecCheckList)

1974

1975 def validate(self, validateTrace=None) -> None:

1976 """

1977 Check defined expressions for valid structure, check for infinite recursive definitions.

1978 """

1979 warnings.warn(

1980 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

1981 DeprecationWarning,

1982 stacklevel=2,

1983 )

1984 self._checkRecursion([])

1985

1986 def parse_file(

1987 self,

1988 file_or_filename: Union[str, Path, TextIO],

1989 encoding: str = "utf-8",

1990 parse_all: bool = False,

1991 *,

1992 parseAll: bool = False,

1993 ) -> ParseResults:

1994 """

1995 Execute the parse expression on the given file or filename.

1996 If a filename is specified (instead of a file object),

1997 the entire file is opened, read, and closed before parsing.

1998 """

1999 parseAll = parseAll or parse_all

2000 try:

2001 file_or_filename = typing.cast(TextIO, file_or_filename)

2002 file_contents = file_or_filename.read()

2003 except AttributeError:

2004 file_or_filename = typing.cast(str, file_or_filename)

2005 with open(file_or_filename, "r", encoding=encoding) as f:

2006 file_contents = f.read()

2007 try:

2008 return self.parse_string(file_contents, parseAll)

2009 except ParseBaseException as exc:

2010 if ParserElement.verbose_stacktrace:

2011 raise

2012

2013 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2014 raise exc.with_traceback(None)

2015

2016 def __eq__(self, other):

2017 if self is other:

2018 return True

2019 elif isinstance(other, str_type):

2020 return self.matches(other, parse_all=True)

2021 elif isinstance(other, ParserElement):

2022 return vars(self) == vars(other)

2023 return False

2024

2025 def __hash__(self):

2026 return id(self)

2027

2028 def matches(

2029 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

2030 ) -> bool:

2031 """

2032 Method for quick testing of a parser against a test string. Good for simple

2033 inline microtests of sub expressions while building up larger parser.

2034

2035 Parameters:

2036

2037 - ``test_string`` - to test against this expression for a match

2038 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2039

2040 Example::

2041

2042 expr = Word(nums)

2043 assert expr.matches("100")

2044 """

2045 parseAll = parseAll and parse_all

2046 try:

2047 self.parse_string(str(test_string), parse_all=parseAll)

2048 return True

2049 except ParseBaseException:

2050 return False

2051

2052 def run_tests(

2053 self,

2054 tests: Union[str, list[str]],

2055 parse_all: bool = True,

2056 comment: typing.Optional[Union[ParserElement, str]] = "#",

2057 full_dump: bool = True,

2058 print_results: bool = True,

2059 failure_tests: bool = False,

2060 post_parse: typing.Optional[

2061 Callable[[str, ParseResults], typing.Optional[str]]

2062 ] = None,

2063 file: typing.Optional[TextIO] = None,

2064 with_line_numbers: bool = False,

2065 *,

2066 parseAll: bool = True,

2067 fullDump: bool = True,

2068 printResults: bool = True,

2069 failureTests: bool = False,

2070 postParse: typing.Optional[

2071 Callable[[str, ParseResults], typing.Optional[str]]

2072 ] = None,

2073 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2074 """

2075 Execute the parse expression on a series of test strings, showing each

2076 test, the parsed results or where the parse failed. Quick and easy way to

2077 run a parse expression against a list of sample strings.

2078

2079 Parameters:

2080

2081 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2082 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2083 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2084 string; pass None to disable comment filtering

2085 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2086 if False, only dump nested list

2087 - ``print_results`` - (default= ``True``) prints test output to stdout

2088 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2089 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2090 `fn(test_string, parse_results)` and returns a string to be added to the test output

2091 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2092 if None, will default to ``sys.stdout``

2093 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2094

2095 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2096 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2097 test's output

2098

2099 Example::

2100

2101 number_expr = pyparsing_common.number.copy()

2102

2103 result = number_expr.run_tests('''

2104 # unsigned integer

2105 100

2106 # negative integer

2107 -100

2108 # float with scientific notation

2109 6.02e23

2110 # integer with scientific notation

2111 1e-12

2112 ''')

2113 print("Success" if result[0] else "Failed!")

2114

2115 result = number_expr.run_tests('''

2116 # stray character

2117 100Z

2118 # missing leading digit before '.'

2119 -.100

2120 # too many '.'

2121 3.14.159

2122 ''', failure_tests=True)

2123 print("Success" if result[0] else "Failed!")

2124

2125 prints::

2126

2127 # unsigned integer

2128 100

2129 [100]

2130

2131 # negative integer

2132 -100

2133 [-100]

2134

2135 # float with scientific notation

2136 6.02e23

2137 [6.02e+23]

2138

2139 # integer with scientific notation

2140 1e-12

2141 [1e-12]

2142

2143 Success

2144

2145 # stray character

2146 100Z

2147 ^

2148 FAIL: Expected end of text (at char 3), (line:1, col:4)

2149

2150 # missing leading digit before '.'

2151 -.100

2152 ^

2153 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2154

2155 # too many '.'

2156 3.14.159

2157 ^

2158 FAIL: Expected end of text (at char 4), (line:1, col:5)

2159

2160 Success

2161

2162 Each test string must be on a single line. If you want to test a string that spans multiple

2163 lines, create a test like this::

2164

2165 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2166

2167 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2168 """

2169 from .testing import pyparsing_test

2170

2171 parseAll = parseAll and parse_all

2172 fullDump = fullDump and full_dump

2173 printResults = printResults and print_results

2174 failureTests = failureTests or failure_tests

2175 postParse = postParse or post_parse

2176 if isinstance(tests, str_type):

2177 tests = typing.cast(str, tests)

2178 line_strip = type(tests).strip

2179 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2180 comment_specified = comment is not None

2181 if comment_specified:

2182 if isinstance(comment, str_type):

2183 comment = typing.cast(str, comment)

2184 comment = Literal(comment)

2185 comment = typing.cast(ParserElement, comment)

2186 if file is None:

2187 file = sys.stdout

2188 print_ = file.write

2189

2190 result: Union[ParseResults, Exception]

2191 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2192 comments: list[str] = []

2193 success = True

2194 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2195 BOM = "\ufeff"

2196 nlstr = "\n"

2197 for t in tests:

2198 if comment_specified and comment.matches(t, False) or comments and not t:

2199 comments.append(

2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2201 )

2202 continue

2203 if not t:

2204 continue

2205 out = [

2206 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2207 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2208 ]

2209 comments.clear()

2210 try:

2211 # convert newline marks to actual newlines, and strip leading BOM if present

2212 t = NL.transform_string(t.lstrip(BOM))

2213 result = self.parse_string(t, parse_all=parseAll)

2214 except ParseBaseException as pe:

2215 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2216 out.append(pe.explain())

2217 out.append(f"FAIL: {fatal}{pe}")

2218 if ParserElement.verbose_stacktrace:

2219 out.extend(traceback.format_tb(pe.__traceback__))

2220 success = success and failureTests

2221 result = pe

2222 except Exception as exc:

2223 tag = "FAIL-EXCEPTION"

2224

2225 # see if this exception was raised in a parse action

2226 tb = exc.__traceback__

2227 it = iter(traceback.walk_tb(tb))

2228 for f, line in it:

2229 if (f.f_code.co_filename, line) == pa_call_line_synth:

2230 next_f = next(it)[0]

2231 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2232 break

2233

2234 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2235 if ParserElement.verbose_stacktrace:

2236 out.extend(traceback.format_tb(exc.__traceback__))

2237 success = success and failureTests

2238 result = exc

2239 else:

2240 success = success and not failureTests

2241 if postParse is not None:

2242 try:

2243 pp_value = postParse(t, result)

2244 if pp_value is not None:

2245 if isinstance(pp_value, ParseResults):

2246 out.append(pp_value.dump())

2247 else:

2248 out.append(str(pp_value))

2249 else:

2250 out.append(result.dump())

2251 except Exception as e:

2252 out.append(result.dump(full=fullDump))

2253 out.append(

2254 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2255 )

2256 else:

2257 out.append(result.dump(full=fullDump))

2258 out.append("")

2259

2260 if printResults:

2261 print_("\n".join(out))

2262

2263 allResults.append((t, result))

2264

2265 return success, allResults

2266

2267 def create_diagram(

2268 self,

2269 output_html: Union[TextIO, Path, str],

2270 vertical: int = 3,

2271 show_results_names: bool = False,

2272 show_groups: bool = False,

2273 embed: bool = False,

2274 show_hidden: bool = False,

2275 **kwargs,

2276 ) -> None:

2277 """

2278 Create a railroad diagram for the parser.

2279

2280 Parameters:

2281

2282 - ``output_html`` (str or file-like object) - output target for generated

2283 diagram HTML

2284 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2285 instead of horizontally (default=3)

2286 - ``show_results_names`` - bool flag whether diagram should show annotations for

2287 defined results names

2288 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2289 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2290 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2291 the resulting HTML in an enclosing HTML source

2292 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2293 can be used to insert custom CSS styling

2294 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2295 generated code

2296

2297 Additional diagram-formatting keyword arguments can also be included;

2298 see railroad.Diagram class.

2299 """

2300

2301 try:

2302 from .diagram import to_railroad, railroad_to_html

2303 except ImportError as ie:

2304 raise Exception(

2305 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2306 ) from ie

2307

2308 self.streamline()

2309

2310 railroad = to_railroad(

2311 self,

2312 vertical=vertical,

2313 show_results_names=show_results_names,

2314 show_groups=show_groups,

2315 show_hidden=show_hidden,

2316 diagram_kwargs=kwargs,

2317 )

2318 if not isinstance(output_html, (str, Path)):

2319 # we were passed a file-like object, just write to it

2320 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2321 return

2322

2323 with open(output_html, "w", encoding="utf-8") as diag_file:

2324 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2325

2326 # Compatibility synonyms

2327 # fmt: off

2328 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2329 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2330 "setDefaultWhitespaceChars", set_default_whitespace_chars

2331 ))

2332 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2333 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2334 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2335 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2336

2337 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2338 setBreak = replaced_by_pep8("setBreak", set_break)

2339 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2340 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2341 addCondition = replaced_by_pep8("addCondition", add_condition)

2342 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2343 tryParse = replaced_by_pep8("tryParse", try_parse)

2344 parseString = replaced_by_pep8("parseString", parse_string)

2345 scanString = replaced_by_pep8("scanString", scan_string)

2346 transformString = replaced_by_pep8("transformString", transform_string)

2347 searchString = replaced_by_pep8("searchString", search_string)

2348 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2349 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2350 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2351 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2352 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2353 setDebug = replaced_by_pep8("setDebug", set_debug)

2354 setName = replaced_by_pep8("setName", set_name)

2355 parseFile = replaced_by_pep8("parseFile", parse_file)

2356 runTests = replaced_by_pep8("runTests", run_tests)

2357 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2358 defaultName = default_name

2359 # fmt: on

2360

2361

2362class _PendingSkip(ParserElement):

2363 # internal placeholder class to hold a place were '...' is added to a parser element,

2364 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2365 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2366 super().__init__()

2367 self.anchor = expr

2368 self.must_skip = must_skip

2369

2370 def _generateDefaultName(self) -> str:

2371 return str(self.anchor + Empty()).replace("Empty", "...")

2372

2373 def __add__(self, other) -> ParserElement:

2374 skipper = SkipTo(other).set_name("...")("_skipped*")

2375 if self.must_skip:

2376

2377 def must_skip(t):

2378 if not t._skipped or t._skipped.as_list() == [""]:

2379 del t[0]

2380 t.pop("_skipped", None)

2381

2382 def show_skip(t):

2383 if t._skipped.as_list()[-1:] == [""]:

2384 t.pop("_skipped")

2385 t["_skipped"] = f"missing <{self.anchor!r}>"

2386

2387 return (

2388 self.anchor + skipper().add_parse_action(must_skip)

2389 | skipper().add_parse_action(show_skip)

2390 ) + other

2391

2392 return self.anchor + skipper + other

2393

2394 def __repr__(self):

2395 return self.defaultName

2396

2397 def parseImpl(self, *args) -> ParseImplReturnType:

2398 raise Exception(

2399 "use of `...` expression without following SkipTo target expression"

2400 )

2401

2402

2403class Token(ParserElement):

2404 """Abstract :class:`ParserElement` subclass, for defining atomic

2405 matching patterns.

2406 """

2407

2408 def __init__(self) -> None:

2409 super().__init__(savelist=False)

2410

2411 def _generateDefaultName(self) -> str:

2412 return type(self).__name__

2413

2414

2415class NoMatch(Token):

2416 """

2417 A token that will never match.

2418 """

2419

2420 def __init__(self) -> None:

2421 super().__init__()

2422 self._may_return_empty = True

2423 self.mayIndexError = False

2424 self.errmsg = "Unmatchable token"

2425

2426 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2427 raise ParseException(instring, loc, self.errmsg, self)

2428

2429

2430class Literal(Token):

2431 """

2432 Token to exactly match a specified string.

2433

2434 Example::

2435

2436 Literal('abc').parse_string('abc') # -> ['abc']

2437 Literal('abc').parse_string('abcdef') # -> ['abc']

2438 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2439

2440 For case-insensitive matching, use :class:`CaselessLiteral`.

2441

2442 For keyword matching (force word break before and after the matched string),

2443 use :class:`Keyword` or :class:`CaselessKeyword`.

2444 """

2445

2446 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2447 # Performance tuning: select a subclass with optimized parseImpl

2448 if cls is Literal:

2449 match_string = matchString or match_string

2450 if not match_string:

2451 return super().__new__(Empty)

2452 if len(match_string) == 1:

2453 return super().__new__(_SingleCharLiteral)

2454

2455 # Default behavior

2456 return super().__new__(cls)

2457

2458 # Needed to make copy.copy() work correctly if we customize __new__

2459 def __getnewargs__(self):

2460 return (self.match,)

2461

2462 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2463 super().__init__()

2464 match_string = matchString or match_string

2465 self.match = match_string

2466 self.matchLen = len(match_string)

2467 self.firstMatchChar = match_string[:1]

2468 self.errmsg = f"Expected {self.name}"

2469 self._may_return_empty = False

2470 self.mayIndexError = False

2471

2472 def _generateDefaultName(self) -> str:

2473 return repr(self.match)

2474

2475 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2476 if instring[loc] == self.firstMatchChar and instring.startswith(

2477 self.match, loc

2478 ):

2479 return loc + self.matchLen, self.match

2480 raise ParseException(instring, loc, self.errmsg, self)

2481

2482

2483class Empty(Literal):

2484 """

2485 An empty token, will always match.

2486 """

2487

2488 def __init__(self, match_string="", *, matchString="") -> None:

2489 super().__init__("")

2490 self._may_return_empty = True

2491 self.mayIndexError = False

2492

2493 def _generateDefaultName(self) -> str:

2494 return "Empty"

2495

2496 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2497 return loc, []

2498

2499

2500class _SingleCharLiteral(Literal):

2501 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2502 if instring[loc] == self.firstMatchChar:

2503 return loc + 1, self.match

2504 raise ParseException(instring, loc, self.errmsg, self)

2505

2506

2507ParserElement._literalStringClass = Literal

2508

2509

2510class Keyword(Token):

2511 """

2512 Token to exactly match a specified string as a keyword, that is,

2513 it must be immediately preceded and followed by whitespace or

2514 non-keyword characters. Compare with :class:`Literal`:

2515

2516 - ``Literal("if")`` will match the leading ``'if'`` in

2517 ``'ifAndOnlyIf'``.

2518 - ``Keyword("if")`` will not; it will only match the leading

2519 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2520

2521 Accepts two optional constructor arguments in addition to the

2522 keyword string:

2523

2524 - ``ident_chars`` is a string of characters that would be valid

2525 identifier characters, defaulting to all alphanumerics + "_" and

2526 "$"

2527 - ``caseless`` allows case-insensitive matching, default is ``False``.

2528

2529 Example::

2530

2531 Keyword("start").parse_string("start") # -> ['start']

2532 Keyword("start").parse_string("starting") # -> Exception

2533

2534 For case-insensitive matching, use :class:`CaselessKeyword`.

2535 """

2536

2537 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2538

2539 def __init__(

2540 self,

2541 match_string: str = "",

2542 ident_chars: typing.Optional[str] = None,

2543 caseless: bool = False,

2544 *,

2545 matchString: str = "",

2546 identChars: typing.Optional[str] = None,

2547 ) -> None:

2548 super().__init__()

2549 identChars = identChars or ident_chars

2550 if identChars is None:

2551 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2552 match_string = matchString or match_string

2553 self.match = match_string

2554 self.matchLen = len(match_string)

2555 self.firstMatchChar = match_string[:1]

2556 if not self.firstMatchChar:

2557 raise ValueError("null string passed to Keyword; use Empty() instead")

2558 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2559 self._may_return_empty = False

2560 self.mayIndexError = False

2561 self.caseless = caseless

2562 if caseless:

2563 self.caselessmatch = match_string.upper()

2564 identChars = identChars.upper()

2565 self.identChars = set(identChars)

2566

2567 def _generateDefaultName(self) -> str:

2568 return repr(self.match)

2569

2570 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2571 errmsg = self.errmsg or ""

2572 errloc = loc

2573 if self.caseless:

2574 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2575 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2576 if (

2577 loc >= len(instring) - self.matchLen

2578 or instring[loc + self.matchLen].upper() not in self.identChars

2579 ):

2580 return loc + self.matchLen, self.match

2581

2582 # followed by keyword char

2583 errmsg += ", was immediately followed by keyword character"

2584 errloc = loc + self.matchLen

2585 else:

2586 # preceded by keyword char

2587 errmsg += ", keyword was immediately preceded by keyword character"

2588 errloc = loc - 1

2589 # else no match just raise plain exception

2590

2591 elif (

2592 instring[loc] == self.firstMatchChar

2593 and self.matchLen == 1

2594 or instring.startswith(self.match, loc)

2595 ):

2596 if loc == 0 or instring[loc - 1] not in self.identChars:

2597 if (

2598 loc >= len(instring) - self.matchLen

2599 or instring[loc + self.matchLen] not in self.identChars

2600 ):

2601 return loc + self.matchLen, self.match

2602

2603 # followed by keyword char

2604 errmsg += ", keyword was immediately followed by keyword character"

2605 errloc = loc + self.matchLen

2606 else:

2607 # preceded by keyword char

2608 errmsg += ", keyword was immediately preceded by keyword character"

2609 errloc = loc - 1

2610 # else no match just raise plain exception

2611

2612 raise ParseException(instring, errloc, errmsg, self)

2613

2614 @staticmethod

2615 def set_default_keyword_chars(chars) -> None:

2616 """

2617 Overrides the default characters used by :class:`Keyword` expressions.

2618 """

2619 Keyword.DEFAULT_KEYWORD_CHARS = chars

2620

2621 # Compatibility synonyms

2622 setDefaultKeywordChars = staticmethod(

2623 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2624 )

2625

2626

2627class CaselessLiteral(Literal):

2628 """

2629 Token to match a specified string, ignoring case of letters.

2630 Note: the matched results will always be in the case of the given

2631 match string, NOT the case of the input text.

2632

2633 Example::

2634

2635 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2636 # -> ['CMD', 'CMD', 'CMD']

2637

2638 (Contrast with example for :class:`CaselessKeyword`.)

2639 """

2640

2641 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2642 match_string = matchString or match_string

2643 super().__init__(match_string.upper())

2644 # Preserve the defining literal.

2645 self.returnString = match_string

2646 self.errmsg = f"Expected {self.name}"

2647

2648 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2649 if instring[loc : loc + self.matchLen].upper() == self.match:

2650 return loc + self.matchLen, self.returnString

2651 raise ParseException(instring, loc, self.errmsg, self)

2652

2653

2654class CaselessKeyword(Keyword):

2655 """

2656 Caseless version of :class:`Keyword`.

2657

2658 Example::

2659

2660 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2661 # -> ['CMD', 'CMD']

2662

2663 (Contrast with example for :class:`CaselessLiteral`.)

2664 """

2665

2666 def __init__(

2667 self,

2668 match_string: str = "",

2669 ident_chars: typing.Optional[str] = None,

2670 *,

2671 matchString: str = "",

2672 identChars: typing.Optional[str] = None,

2673 ) -> None:

2674 identChars = identChars or ident_chars

2675 match_string = matchString or match_string

2676 super().__init__(match_string, identChars, caseless=True)

2677

2678

2679class CloseMatch(Token):

2680 """A variation on :class:`Literal` which matches "close" matches,

2681 that is, strings with at most 'n' mismatching characters.

2682 :class:`CloseMatch` takes parameters:

2683

2684 - ``match_string`` - string to be matched

2685 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2686 - ``max_mismatches`` - (``default=1``) maximum number of

2687 mismatches allowed to count as a match

2688

2689 The results from a successful parse will contain the matched text

2690 from the input string and the following named results:

2691

2692 - ``mismatches`` - a list of the positions within the

2693 match_string where mismatches were found

2694 - ``original`` - the original match_string used to compare

2695 against the input string

2696

2697 If ``mismatches`` is an empty list, then the match was an exact

2698 match.

2699

2700 Example::

2701

2702 patt = CloseMatch("ATCATCGAATGGA")

2703 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2704 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2705

2706 # exact match

2707 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2708

2709 # close match allowing up to 2 mismatches

2710 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2711 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2712 """

2713

2714 def __init__(

2715 self,

2716 match_string: str,

2717 max_mismatches: typing.Optional[int] = None,

2718 *,

2719 maxMismatches: int = 1,

2720 caseless=False,

2721 ) -> None:

2722 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2723 super().__init__()

2724 self.match_string = match_string

2725 self.maxMismatches = maxMismatches

2726 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2727 self.caseless = caseless

2728 self.mayIndexError = False

2729 self._may_return_empty = False

2730

2731 def _generateDefaultName(self) -> str:

2732 return f"{type(self).__name__}:{self.match_string!r}"

2733

2734 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2735 start = loc

2736 instrlen = len(instring)

2737 maxloc = start + len(self.match_string)

2738

2739 if maxloc <= instrlen:

2740 match_string = self.match_string

2741 match_stringloc = 0

2742 mismatches = []

2743 maxMismatches = self.maxMismatches

2744

2745 for match_stringloc, s_m in enumerate(

2746 zip(instring[loc:maxloc], match_string)

2747 ):

2748 src, mat = s_m

2749 if self.caseless:

2750 src, mat = src.lower(), mat.lower()

2751

2752 if src != mat:

2753 mismatches.append(match_stringloc)

2754 if len(mismatches) > maxMismatches:

2755 break

2756 else:

2757 loc = start + match_stringloc + 1

2758 results = ParseResults([instring[start:loc]])

2759 results["original"] = match_string

2760 results["mismatches"] = mismatches

2761 return loc, results

2762

2763 raise ParseException(instring, loc, self.errmsg, self)

2764

2765

2766class Word(Token):

2767 """Token for matching words composed of allowed character sets.

2768

2769 Parameters:

2770

2771 - ``init_chars`` - string of all characters that should be used to

2772 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2773 if ``body_chars`` is also specified, then this is the string of

2774 initial characters

2775 - ``body_chars`` - string of characters that

2776 can be used for matching after a matched initial character as

2777 given in ``init_chars``; if omitted, same as the initial characters

2778 (default=``None``)

2779 - ``min`` - minimum number of characters to match (default=1)

2780 - ``max`` - maximum number of characters to match (default=0)

2781 - ``exact`` - exact number of characters to match (default=0)

2782 - ``as_keyword`` - match as a keyword (default=``False``)

2783 - ``exclude_chars`` - characters that might be

2784 found in the input ``body_chars`` string but which should not be

2785 accepted for matching ;useful to define a word of all

2786 printables except for one or two characters, for instance

2787 (default=``None``)

2788

2789 :class:`srange` is useful for defining custom character set strings

2790 for defining :class:`Word` expressions, using range notation from

2791 regular expression character sets.

2792

2793 A common mistake is to use :class:`Word` to match a specific literal

2794 string, as in ``Word("Address")``. Remember that :class:`Word`

2795 uses the string argument to define *sets* of matchable characters.

2796 This expression would match "Add", "AAA", "dAred", or any other word

2797 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2798 exact literal string, use :class:`Literal` or :class:`Keyword`.

2799

2800 pyparsing includes helper strings for building Words:

2801

2802 - :class:`alphas`

2803 - :class:`nums`

2804 - :class:`alphanums`

2805 - :class:`hexnums`

2806 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2807 - accented, tilded, umlauted, etc.)

2808 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2809 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2810 - :class:`printables` (any non-whitespace character)

2811

2812 ``alphas``, ``nums``, and ``printables`` are also defined in several

2813 Unicode sets - see :class:`pyparsing_unicode``.

2814

2815 Example::

2816

2817 # a word composed of digits

2818 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2819

2820 # a word with a leading capital, and zero or more lowercase

2821 capitalized_word = Word(alphas.upper(), alphas.lower())

2822

2823 # hostnames are alphanumeric, with leading alpha, and '-'

2824 hostname = Word(alphas, alphanums + '-')

2825

2826 # roman numeral (not a strict parser, accepts invalid mix of characters)

2827 roman = Word("IVXLCDM")

2828

2829 # any string of non-whitespace characters, except for ','

2830 csv_value = Word(printables, exclude_chars=",")

2831 """

2832

2833 def __init__(

2834 self,

2835 init_chars: str = "",

2836 body_chars: typing.Optional[str] = None,

2837 min: int = 1,

2838 max: int = 0,

2839 exact: int = 0,

2840 as_keyword: bool = False,

2841 exclude_chars: typing.Optional[str] = None,

2842 *,

2843 initChars: typing.Optional[str] = None,

2844 bodyChars: typing.Optional[str] = None,

2845 asKeyword: bool = False,

2846 excludeChars: typing.Optional[str] = None,

2847 ) -> None:

2848 initChars = initChars or init_chars

2849 bodyChars = bodyChars or body_chars

2850 asKeyword = asKeyword or as_keyword

2851 excludeChars = excludeChars or exclude_chars

2852 super().__init__()

2853 if not initChars:

2854 raise ValueError(

2855 f"invalid {type(self).__name__}, initChars cannot be empty string"

2856 )

2857

2858 initChars_set = set(initChars)

2859 if excludeChars:

2860 excludeChars_set = set(excludeChars)

2861 initChars_set -= excludeChars_set

2862 if bodyChars:

2863 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2864 self.initChars = initChars_set

2865 self.initCharsOrig = "".join(sorted(initChars_set))

2866

2867 if bodyChars:

2868 self.bodyChars = set(bodyChars)

2869 self.bodyCharsOrig = "".join(sorted(bodyChars))

2870 else:

2871 self.bodyChars = initChars_set

2872 self.bodyCharsOrig = self.initCharsOrig

2873

2874 self.maxSpecified = max > 0

2875

2876 if min < 1:

2877 raise ValueError(

2878 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2879 )

2880

2881 if self.maxSpecified and min > max:

2882 raise ValueError(

2883 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2884 )

2885

2886 self.minLen = min

2887

2888 if max > 0:

2889 self.maxLen = max

2890 else:

2891 self.maxLen = _MAX_INT

2892

2893 if exact > 0:

2894 min = max = exact

2895 self.maxLen = exact

2896 self.minLen = exact

2897

2898 self.errmsg = f"Expected {self.name}"

2899 self.mayIndexError = False

2900 self.asKeyword = asKeyword

2901 if self.asKeyword:

2902 self.errmsg += " as a keyword"

2903

2904 # see if we can make a regex for this Word

2905 if " " not in (self.initChars | self.bodyChars):

2906 if len(self.initChars) == 1:

2907 re_leading_fragment = re.escape(self.initCharsOrig)

2908 else:

2909 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2910

2911 if self.bodyChars == self.initChars:

2912 if max == 0 and self.minLen == 1:

2913 repeat = "+"

2914 elif max == 1:

2915 repeat = ""

2916 else:

2917 if self.minLen != self.maxLen:

2918 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2919 else:

2920 repeat = f"{{{self.minLen}}}"

2921 self.reString = f"{re_leading_fragment}{repeat}"

2922 else:

2923 if max == 1:

2924 re_body_fragment = ""

2925 repeat = ""

2926 else:

2927 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2928 if max == 0 and self.minLen == 1:

2929 repeat = "*"

2930 elif max == 2:

2931 repeat = "?" if min <= 1 else ""

2932 else:

2933 if min != max:

2934 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2935 else:

2936 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2937

2938 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2939

2940 if self.asKeyword:

2941 self.reString = rf"\b{self.reString}\b"

2942

2943 try:

2944 self.re = re.compile(self.reString)

2945 except re.error:

2946 self.re = None # type: ignore[assignment]

2947 else:

2948 self.re_match = self.re.match

2949 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

2950

2951 def _generateDefaultName(self) -> str:

2952 def charsAsStr(s):

2953 max_repr_len = 16

2954 s = _collapse_string_to_ranges(s, re_escape=False)

2955

2956 if len(s) > max_repr_len:

2957 return s[: max_repr_len - 3] + "..."

2958

2959 return s

2960

2961 if self.initChars != self.bodyChars:

2962 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2963 else:

2964 base = f"W:({charsAsStr(self.initChars)})"

2965

2966 # add length specification

2967 if self.minLen > 1 or self.maxLen != _MAX_INT:

2968 if self.minLen == self.maxLen:

2969 if self.minLen == 1:

2970 return base[2:]

2971 else:

2972 return base + f"{{{self.minLen}}}"

2973 elif self.maxLen == _MAX_INT:

2974 return base + f"{{{self.minLen},...}}"

2975 else:

2976 return base + f"{{{self.minLen},{self.maxLen}}}"

2977 return base

2978

2979 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2980 if instring[loc] not in self.initChars:

2981 raise ParseException(instring, loc, self.errmsg, self)

2982

2983 start = loc

2984 loc += 1

2985 instrlen = len(instring)

2986 body_chars: set[str] = self.bodyChars

2987 maxloc = start + self.maxLen

2988 maxloc = min(maxloc, instrlen)

2989 while loc < maxloc and instring[loc] in body_chars:

2990 loc += 1

2991

2992 throw_exception = False

2993 if loc - start < self.minLen:

2994 throw_exception = True

2995 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

2996 throw_exception = True

2997 elif self.asKeyword and (

2998 (start > 0 and instring[start - 1] in body_chars)

2999 or (loc < instrlen and instring[loc] in body_chars)

3000 ):

3001 throw_exception = True

3002

3003 if throw_exception:

3004 raise ParseException(instring, loc, self.errmsg, self)

3005

3006 return loc, instring[start:loc]

3007

3008 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3009 result = self.re_match(instring, loc)

3010 if not result:

3011 raise ParseException(instring, loc, self.errmsg, self)

3012

3013 loc = result.end()

3014 return loc, result.group()

3015

3016

3017class Char(Word):

3018 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3019 when defining a match of any single character in a string of

3020 characters.

3021 """

3022

3023 def __init__(

3024 self,

3025 charset: str,

3026 as_keyword: bool = False,

3027 exclude_chars: typing.Optional[str] = None,

3028 *,

3029 asKeyword: bool = False,

3030 excludeChars: typing.Optional[str] = None,

3031 ) -> None:

3032 asKeyword = asKeyword or as_keyword

3033 excludeChars = excludeChars or exclude_chars

3034 super().__init__(

3035 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3036 )

3037

3038

3039class Regex(Token):

3040 r"""Token for matching strings that match a given regular

3041 expression. Defined with string specifying the regular expression in

3042 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3043 If the given regex contains named groups (defined using ``(?P<name>...)``),

3044 these will be preserved as named :class:`ParseResults`.

3045

3046 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3047 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3048 a compiled RE that was compiled using ``regex``.

3049

3050 Example::

3051

3052 realnum = Regex(r"[+-]?\d+\.\d*")

3053 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3054 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3055

3056 # named fields in a regex will be returned as named results

3057 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3058

3059 # the Regex class will accept re's compiled using the regex module

3060 import regex

3061 parser = pp.Regex(regex.compile(r'[0-9]'))

3062 """

3063

3064 def __init__(

3065 self,

3066 pattern: Any,

3067 flags: Union[re.RegexFlag, int] = 0,

3068 as_group_list: bool = False,

3069 as_match: bool = False,

3070 *,

3071 asGroupList: bool = False,

3072 asMatch: bool = False,

3073 ) -> None:

3074 """The parameters ``pattern`` and ``flags`` are passed

3075 to the ``re.compile()`` function as-is. See the Python

3076 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3077 explanation of the acceptable patterns and flags.

3078 """

3079 super().__init__()

3080 asGroupList = asGroupList or as_group_list

3081 asMatch = asMatch or as_match

3082

3083 if isinstance(pattern, str_type):

3084 if not pattern:

3085 raise ValueError("null string passed to Regex; use Empty() instead")

3086

3087 self._re = None

3088 self._may_return_empty = None # type: ignore [assignment]

3089 self.reString = self.pattern = pattern

3090

3091 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3092 self._re = pattern

3093 self._may_return_empty = None # type: ignore [assignment]

3094 self.pattern = self.reString = pattern.pattern

3095

3096 elif callable(pattern):

3097 # defer creating this pattern until we really need it

3098 self.pattern = pattern

3099 self._may_return_empty = None # type: ignore [assignment]

3100 self._re = None

3101

3102 else:

3103 raise TypeError(

3104 "Regex may only be constructed with a string or a compiled RE object,"

3105 " or a callable that takes no arguments and returns a string or a"

3106 " compiled RE object"

3107 )

3108

3109 self.flags = flags

3110 self.errmsg = f"Expected {self.name}"

3111 self.mayIndexError = False

3112 self.asGroupList = asGroupList

3113 self.asMatch = asMatch

3114 if self.asGroupList:

3115 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3116 if self.asMatch:

3117 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3118

3119 @cached_property

3120 def re(self) -> re.Pattern:

3121 if self._re:

3122 return self._re

3123

3124 if callable(self.pattern):

3125 # replace self.pattern with the string returned by calling self.pattern()

3126 self.pattern = cast(Callable[[], str], self.pattern)()

3127

3128 # see if we got a compiled RE back instead of a str - if so, we're done

3129 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3130 self._re = cast(re.Pattern[str], self.pattern)

3131 self.pattern = self.reString = self._re.pattern

3132 return self._re

3133

3134 try:

3135 self._re = re.compile(self.pattern, self.flags)

3136 except re.error:

3137 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3138 else:

3139 self._may_return_empty = self.re.match("", pos=0) is not None

3140 return self._re

3141

3142 @cached_property

3143 def re_match(self) -> Callable[[str, int], Any]:

3144 return self.re.match

3145

3146 @property

3147 def mayReturnEmpty(self):

3148 if self._may_return_empty is None:

3149 # force compile of regex pattern, to set may_return_empty flag

3150 self.re # noqa

3151 return self._may_return_empty

3152

3153 @mayReturnEmpty.setter

3154 def mayReturnEmpty(self, value):

3155 self._may_return_empty = value

3156

3157 def _generateDefaultName(self) -> str:

3158 unescaped = repr(self.pattern).replace("\\\\", "\\")

3159 return f"Re:({unescaped})"

3160

3161 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3162 # explicit check for matching past the length of the string;

3163 # this is done because the re module will not complain about

3164 # a match with `pos > len(instring)`, it will just return ""

3165 if loc > len(instring) and self.mayReturnEmpty:

3166 raise ParseException(instring, loc, self.errmsg, self)

3167

3168 result = self.re_match(instring, loc)

3169 if not result:

3170 raise ParseException(instring, loc, self.errmsg, self)

3171

3172 loc = result.end()

3173 ret = ParseResults(result.group())

3174 d = result.groupdict()

3175

3176 for k, v in d.items():

3177 ret[k] = v

3178

3179 return loc, ret

3180

3181 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3182 if loc > len(instring) and self.mayReturnEmpty:

3183 raise ParseException(instring, loc, self.errmsg, self)

3184

3185 result = self.re_match(instring, loc)

3186 if not result:

3187 raise ParseException(instring, loc, self.errmsg, self)

3188

3189 loc = result.end()

3190 ret = result.groups()

3191 return loc, ret

3192

3193 def parseImplAsMatch(self, instring, loc, do_actions=True):

3194 if loc > len(instring) and self.mayReturnEmpty:

3195 raise ParseException(instring, loc, self.errmsg, self)

3196

3197 result = self.re_match(instring, loc)

3198 if not result:

3199 raise ParseException(instring, loc, self.errmsg, self)

3200

3201 loc = result.end()

3202 ret = result

3203 return loc, ret

3204

3205 def sub(self, repl: str) -> ParserElement:

3206 r"""

3207 Return :class:`Regex` with an attached parse action to transform the parsed

3208 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3209

3210 Example::

3211

3212 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3213 print(make_html.transform_string("h1:main title:"))

3214 # prints "<h1>main title</h1>"

3215 """

3216 if self.asGroupList:

3217 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3218

3219 if self.asMatch and callable(repl):

3220 raise TypeError(

3221 "cannot use sub() with a callable with Regex(as_match=True)"

3222 )

3223

3224 if self.asMatch:

3225

3226 def pa(tokens):

3227 return tokens[0].expand(repl)

3228

3229 else:

3230

3231 def pa(tokens):

3232 return self.re.sub(repl, tokens[0])

3233

3234 return self.add_parse_action(pa)

3235

3236

3237class QuotedString(Token):

3238 r"""

3239 Token for matching strings that are delimited by quoting characters.

3240

3241 Defined with the following parameters:

3242

3243 - ``quote_char`` - string of one or more characters defining the

3244 quote delimiting string

3245 - ``esc_char`` - character to re_escape quotes, typically backslash

3246 (default= ``None``)

3247 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3248 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3249 (default= ``None``)

3250 - ``multiline`` - boolean indicating whether quotes can span

3251 multiple lines (default= ``False``)

3252 - ``unquote_results`` - boolean indicating whether the matched text

3253 should be unquoted (default= ``True``)

3254 - ``end_quote_char`` - string of one or more characters defining the

3255 end of the quote delimited string (default= ``None`` => same as

3256 quote_char)

3257 - ``convert_whitespace_escapes`` - convert escaped whitespace

3258 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3259 (default= ``True``)

3260

3261 Example::

3262

3263 qs = QuotedString('"')

3264 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3265 complex_qs = QuotedString('{{', end_quote_char='}}')

3266 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3267 sql_qs = QuotedString('"', esc_quote='""')

3268 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3269

3270 prints::

3271

3272 [['This is the quote']]

3273 [['This is the "quote"']]

3274 [['This is the quote with "embedded" quotes']]

3275 """

3276

3277 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3278

3279 def __init__(

3280 self,

3281 quote_char: str = "",

3282 esc_char: typing.Optional[str] = None,

3283 esc_quote: typing.Optional[str] = None,

3284 multiline: bool = False,

3285 unquote_results: bool = True,

3286 end_quote_char: typing.Optional[str] = None,

3287 convert_whitespace_escapes: bool = True,

3288 *,

3289 quoteChar: str = "",

3290 escChar: typing.Optional[str] = None,

3291 escQuote: typing.Optional[str] = None,

3292 unquoteResults: bool = True,

3293 endQuoteChar: typing.Optional[str] = None,

3294 convertWhitespaceEscapes: bool = True,

3295 ) -> None:

3296 super().__init__()

3297 esc_char = escChar or esc_char

3298 esc_quote = escQuote or esc_quote

3299 unquote_results = unquoteResults and unquote_results

3300 end_quote_char = endQuoteChar or end_quote_char

3301 convert_whitespace_escapes = (

3302 convertWhitespaceEscapes and convert_whitespace_escapes

3303 )

3304 quote_char = quoteChar or quote_char

3305

3306 # remove white space from quote chars

3307 quote_char = quote_char.strip()

3308 if not quote_char:

3309 raise ValueError("quote_char cannot be the empty string")

3310

3311 if end_quote_char is None:

3312 end_quote_char = quote_char

3313 else:

3314 end_quote_char = end_quote_char.strip()

3315 if not end_quote_char:

3316 raise ValueError("end_quote_char cannot be the empty string")

3317

3318 self.quote_char: str = quote_char

3319 self.quote_char_len: int = len(quote_char)

3320 self.first_quote_char: str = quote_char[0]

3321 self.end_quote_char: str = end_quote_char

3322 self.end_quote_char_len: int = len(end_quote_char)

3323 self.esc_char: str = esc_char or ""

3324 self.has_esc_char: bool = esc_char is not None

3325 self.esc_quote: str = esc_quote or ""

3326 self.unquote_results: bool = unquote_results

3327 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3328 self.multiline = multiline

3329 self.re_flags = re.RegexFlag(0)

3330

3331 # fmt: off

3332 # build up re pattern for the content between the quote delimiters

3333 inner_pattern: list[str] = []

3334

3335 if esc_quote:

3336 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3337

3338 if esc_char:

3339 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3340

3341 if len(self.end_quote_char) > 1:

3342 inner_pattern.append(

3343 "(?:"

3344 + "|".join(

3345 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3346 for i in range(len(self.end_quote_char) - 1, 0, -1)

3347 )

3348 + ")"

3349 )

3350

3351 if self.multiline:

3352 self.re_flags |= re.MULTILINE | re.DOTALL

3353 inner_pattern.append(

3354 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3355 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3356 )

3357 else:

3358 inner_pattern.append(

3359 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3360 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3361 )

3362

3363 self.pattern = "".join(

3364 [

3365 re.escape(self.quote_char),

3366 "(?:",

3367 '|'.join(inner_pattern),

3368 ")*",

3369 re.escape(self.end_quote_char),

3370 ]

3371 )

3372

3373 if self.unquote_results:

3374 if self.convert_whitespace_escapes:

3375 self.unquote_scan_re = re.compile(

3376 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3377 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3378 rf"|({re.escape(self.esc_char)}.)"

3379 rf"|(\n|.)",

3380 flags=self.re_flags,

3381 )

3382 else:

3383 self.unquote_scan_re = re.compile(

3384 rf"({re.escape(self.esc_char)}.)"

3385 rf"|(\n|.)",

3386 flags=self.re_flags

3387 )

3388 # fmt: on

3389

3390 try:

3391 self.re = re.compile(self.pattern, self.re_flags)

3392 self.reString = self.pattern

3393 self.re_match = self.re.match

3394 except re.error:

3395 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3396

3397 self.errmsg = f"Expected {self.name}"

3398 self.mayIndexError = False

3399 self._may_return_empty = True

3400

3401 def _generateDefaultName(self) -> str:

3402 if self.quote_char == self.end_quote_char and isinstance(

3403 self.quote_char, str_type

3404 ):

3405 return f"string enclosed in {self.quote_char!r}"

3406

3407 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3408

3409 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3410 # check first character of opening quote to see if that is a match

3411 # before doing the more complicated regex match

3412 result = (

3413 instring[loc] == self.first_quote_char

3414 and self.re_match(instring, loc)

3415 or None

3416 )

3417 if not result:

3418 raise ParseException(instring, loc, self.errmsg, self)

3419

3420 # get ending loc and matched string from regex matching result

3421 loc = result.end()

3422 ret = result.group()

3423

3424 def convert_escaped_numerics(s: str) -> str:

3425 if s == "0":

3426 return "\0"

3427 if s.isdigit() and len(s) == 3:

3428 return chr(int(s, base=8))

3429 elif s.startswith(("u", "x")):

3430 return chr(int(s[1:], base=16))

3431 else:

3432 return s

3433

3434 if self.unquote_results:

3435 # strip off quotes

3436 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3437

3438 if isinstance(ret, str_type):

3439 # fmt: off

3440 if self.convert_whitespace_escapes:

3441 # as we iterate over matches in the input string,

3442 # collect from whichever match group of the unquote_scan_re

3443 # regex matches (only 1 group will match at any given time)

3444 ret = "".join(

3445 # match group 1 matches \t, \n, etc.

3446 self.ws_map[match.group(1)] if match.group(1)

3447 # match group 2 matches escaped octal, null, hex, and Unicode

3448 # sequences

3449 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3450 # match group 3 matches escaped characters

3451 else match.group(3)[-1] if match.group(3)

3452 # match group 4 matches any character

3453 else match.group(4)

3454 for match in self.unquote_scan_re.finditer(ret)

3455 )

3456 else:

3457 ret = "".join(

3458 # match group 1 matches escaped characters

3459 match.group(1)[-1] if match.group(1)

3460 # match group 2 matches any character

3461 else match.group(2)

3462 for match in self.unquote_scan_re.finditer(ret)

3463 )

3464 # fmt: on

3465

3466 # replace escaped quotes

3467 if self.esc_quote:

3468 ret = ret.replace(self.esc_quote, self.end_quote_char)

3469

3470 return loc, ret

3471

3472

3473class CharsNotIn(Token):

3474 """Token for matching words composed of characters *not* in a given

3475 set (will include whitespace in matched characters if not listed in

3476 the provided exclusion set - see example). Defined with string

3477 containing all disallowed characters, and an optional minimum,

3478 maximum, and/or exact length. The default value for ``min`` is

3479 1 (a minimum value < 1 is not valid); the default values for

3480 ``max`` and ``exact`` are 0, meaning no maximum or exact

3481 length restriction.

3482

3483 Example::

3484

3485 # define a comma-separated-value as anything that is not a ','

3486 csv_value = CharsNotIn(',')

3487 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3488

3489 prints::

3490

3491 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3492 """

3493

3494 def __init__(

3495 self,

3496 not_chars: str = "",

3497 min: int = 1,

3498 max: int = 0,

3499 exact: int = 0,

3500 *,

3501 notChars: str = "",

3502 ) -> None:

3503 super().__init__()

3504 self.skipWhitespace = False

3505 self.notChars = not_chars or notChars

3506 self.notCharsSet = set(self.notChars)

3507

3508 if min < 1:

3509 raise ValueError(

3510 "cannot specify a minimum length < 1; use"

3511 " Opt(CharsNotIn()) if zero-length char group is permitted"

3512 )

3513

3514 self.minLen = min

3515

3516 if max > 0:

3517 self.maxLen = max

3518 else:

3519 self.maxLen = _MAX_INT

3520

3521 if exact > 0:

3522 self.maxLen = exact

3523 self.minLen = exact

3524

3525 self.errmsg = f"Expected {self.name}"

3526 self._may_return_empty = self.minLen == 0

3527 self.mayIndexError = False

3528

3529 def _generateDefaultName(self) -> str:

3530 not_chars_str = _collapse_string_to_ranges(self.notChars)

3531 if len(not_chars_str) > 16:

3532 return f"!W:({self.notChars[: 16 - 3]}...)"

3533 else:

3534 return f"!W:({self.notChars})"

3535

3536 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3537 notchars = self.notCharsSet

3538 if instring[loc] in notchars:

3539 raise ParseException(instring, loc, self.errmsg, self)

3540

3541 start = loc

3542 loc += 1

3543 maxlen = min(start + self.maxLen, len(instring))

3544 while loc < maxlen and instring[loc] not in notchars:

3545 loc += 1

3546

3547 if loc - start < self.minLen:

3548 raise ParseException(instring, loc, self.errmsg, self)

3549

3550 return loc, instring[start:loc]

3551

3552

3553class White(Token):

3554 """Special matching class for matching whitespace. Normally,

3555 whitespace is ignored by pyparsing grammars. This class is included

3556 when some whitespace structures are significant. Define with

3557 a string containing the whitespace characters to be matched; default

3558 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3559 ``max``, and ``exact`` arguments, as defined for the

3560 :class:`Word` class.

3561 """

3562

3563 whiteStrs = {

3564 " ": "<SP>",

3565 "\t": "<TAB>",

3566 "\n": "<LF>",

3567 "\r": "<CR>",

3568 "\f": "<FF>",

3569 "\u00A0": "<NBSP>",

3570 "\u1680": "<OGHAM_SPACE_MARK>",

3571 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3572 "\u2000": "<EN_QUAD>",

3573 "\u2001": "<EM_QUAD>",

3574 "\u2002": "<EN_SPACE>",

3575 "\u2003": "<EM_SPACE>",

3576 "\u2004": "<THREE-PER-EM_SPACE>",

3577 "\u2005": "<FOUR-PER-EM_SPACE>",

3578 "\u2006": "<SIX-PER-EM_SPACE>",

3579 "\u2007": "<FIGURE_SPACE>",

3580 "\u2008": "<PUNCTUATION_SPACE>",

3581 "\u2009": "<THIN_SPACE>",

3582 "\u200A": "<HAIR_SPACE>",

3583 "\u200B": "<ZERO_WIDTH_SPACE>",

3584 "\u202F": "<NNBSP>",

3585 "\u205F": "<MMSP>",

3586 "\u3000": "<IDEOGRAPHIC_SPACE>",

3587 }

3588

3589 def __init__(

3590 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3591 ) -> None:

3592 super().__init__()

3593 self.matchWhite = ws

3594 self.set_whitespace_chars(

3595 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3596 copy_defaults=True,

3597 )

3598 # self.leave_whitespace()

3599 self._may_return_empty = True

3600 self.errmsg = f"Expected {self.name}"

3601

3602 self.minLen = min

3603

3604 if max > 0:

3605 self.maxLen = max

3606 else:

3607 self.maxLen = _MAX_INT

3608

3609 if exact > 0:

3610 self.maxLen = exact

3611 self.minLen = exact

3612

3613 def _generateDefaultName(self) -> str:

3614 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3615

3616 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3617 if instring[loc] not in self.matchWhite:

3618 raise ParseException(instring, loc, self.errmsg, self)

3619 start = loc

3620 loc += 1

3621 maxloc = start + self.maxLen

3622 maxloc = min(maxloc, len(instring))

3623 while loc < maxloc and instring[loc] in self.matchWhite:

3624 loc += 1

3625

3626 if loc - start < self.minLen:

3627 raise ParseException(instring, loc, self.errmsg, self)

3628

3629 return loc, instring[start:loc]

3630

3631

3632class PositionToken(Token):

3633 def __init__(self) -> None:

3634 super().__init__()

3635 self._may_return_empty = True

3636 self.mayIndexError = False

3637

3638

3639class GoToColumn(PositionToken):

3640 """Token to advance to a specific column of input text; useful for

3641 tabular report scraping.

3642 """

3643

3644 def __init__(self, colno: int) -> None:

3645 super().__init__()

3646 self.col = colno

3647

3648 def preParse(self, instring: str, loc: int) -> int:

3649 if col(loc, instring) == self.col:

3650 return loc

3651

3652 instrlen = len(instring)

3653 if self.ignoreExprs:

3654 loc = self._skipIgnorables(instring, loc)

3655 while (

3656 loc < instrlen

3657 and instring[loc].isspace()

3658 and col(loc, instring) != self.col

3659 ):

3660 loc += 1

3661

3662 return loc

3663

3664 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3665 thiscol = col(loc, instring)

3666 if thiscol > self.col:

3667 raise ParseException(instring, loc, "Text not in expected column", self)

3668 newloc = loc + self.col - thiscol

3669 ret = instring[loc:newloc]

3670 return newloc, ret

3671

3672

3673class LineStart(PositionToken):

3674 r"""Matches if current position is at the beginning of a line within

3675 the parse string

3676

3677 Example::

3678

3679 test = '''\

3680 AAA this line

3681 AAA and this line

3682 AAA but not this one

3683 B AAA and definitely not this one

3684 '''

3685

3686 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3687 print(t)

3688

3689 prints::

3690

3691 ['AAA', ' this line']

3692 ['AAA', ' and this line']

3693

3694 """

3695

3696 def __init__(self) -> None:

3697 super().__init__()

3698 self.leave_whitespace()

3699 self.orig_whiteChars = set() | self.whiteChars

3700 self.whiteChars.discard("\n")

3701 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3702 self.set_name("start of line")

3703

3704 def preParse(self, instring: str, loc: int) -> int:

3705 if loc == 0:

3706 return loc

3707

3708 ret = self.skipper.preParse(instring, loc)

3709

3710 if "\n" in self.orig_whiteChars:

3711 while instring[ret : ret + 1] == "\n":

3712 ret = self.skipper.preParse(instring, ret + 1)

3713

3714 return ret

3715

3716 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3717 if col(loc, instring) == 1:

3718 return loc, []

3719 raise ParseException(instring, loc, self.errmsg, self)

3720

3721

3722class LineEnd(PositionToken):

3723 """Matches if current position is at the end of a line within the

3724 parse string

3725 """

3726

3727 def __init__(self) -> None:

3728 super().__init__()

3729 self.whiteChars.discard("\n")

3730 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3731 self.set_name("end of line")

3732

3733 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3734 if loc < len(instring):

3735 if instring[loc] == "\n":

3736 return loc + 1, "\n"

3737 else:

3738 raise ParseException(instring, loc, self.errmsg, self)

3739 elif loc == len(instring):

3740 return loc + 1, []

3741 else:

3742 raise ParseException(instring, loc, self.errmsg, self)

3743

3744

3745class StringStart(PositionToken):

3746 """Matches if current position is at the beginning of the parse

3747 string

3748 """

3749

3750 def __init__(self) -> None:

3751 super().__init__()

3752 self.set_name("start of text")

3753

3754 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3755 # see if entire string up to here is just whitespace and ignoreables

3756 if loc != 0 and loc != self.preParse(instring, 0):

3757 raise ParseException(instring, loc, self.errmsg, self)

3758

3759 return loc, []

3760

3761

3762class StringEnd(PositionToken):

3763 """

3764 Matches if current position is at the end of the parse string

3765 """

3766

3767 def __init__(self) -> None:

3768 super().__init__()

3769 self.set_name("end of text")

3770

3771 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3772 if loc < len(instring):

3773 raise ParseException(instring, loc, self.errmsg, self)

3774 if loc == len(instring):

3775 return loc + 1, []

3776 if loc > len(instring):

3777 return loc, []

3778

3779 raise ParseException(instring, loc, self.errmsg, self)

3780

3781

3782class WordStart(PositionToken):

3783 """Matches if the current position is at the beginning of a

3784 :class:`Word`, and is not preceded by any character in a given

3785 set of ``word_chars`` (default= ``printables``). To emulate the

3786 ``\b`` behavior of regular expressions, use

3787 ``WordStart(alphanums)``. ``WordStart`` will also match at

3788 the beginning of the string being parsed, or at the beginning of

3789 a line.

3790 """

3791

3792 def __init__(

3793 self, word_chars: str = printables, *, wordChars: str = printables

3794 ) -> None:

3795 wordChars = word_chars if wordChars == printables else wordChars

3796 super().__init__()

3797 self.wordChars = set(wordChars)

3798 self.set_name("start of a word")

3799

3800 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3801 if loc != 0:

3802 if (

3803 instring[loc - 1] in self.wordChars

3804 or instring[loc] not in self.wordChars

3805 ):

3806 raise ParseException(instring, loc, self.errmsg, self)

3807 return loc, []

3808

3809

3810class WordEnd(PositionToken):

3811 """Matches if the current position is at the end of a :class:`Word`,

3812 and is not followed by any character in a given set of ``word_chars``

3813 (default= ``printables``). To emulate the ``\b`` behavior of

3814 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3815 will also match at the end of the string being parsed, or at the end

3816 of a line.

3817 """

3818

3819 def __init__(

3820 self, word_chars: str = printables, *, wordChars: str = printables

3821 ) -> None:

3822 wordChars = word_chars if wordChars == printables else wordChars

3823 super().__init__()

3824 self.wordChars = set(wordChars)

3825 self.skipWhitespace = False

3826 self.set_name("end of a word")

3827

3828 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3829 instrlen = len(instring)

3830 if instrlen > 0 and loc < instrlen:

3831 if (

3832 instring[loc] in self.wordChars

3833 or instring[loc - 1] not in self.wordChars

3834 ):

3835 raise ParseException(instring, loc, self.errmsg, self)

3836 return loc, []

3837

3838

3839class Tag(Token):

3840 """

3841 A meta-element for inserting a named result into the parsed

3842 tokens that may be checked later in a parse action or while

3843 processing the parsed results. Accepts an optional tag value,

3844 defaulting to `True`.

3845

3846 Example::

3847

3848 end_punc = "." | ("!" + Tag("enthusiastic")))

3849 greeting = "Hello," + Word(alphas) + end_punc

3850

3851 result = greeting.parse_string("Hello, World.")

3852 print(result.dump())

3853

3854 result = greeting.parse_string("Hello, World!")

3855 print(result.dump())

3856

3857 prints::

3858

3859 ['Hello,', 'World', '.']

3860

3861 ['Hello,', 'World', '!']

3862 - enthusiastic: True

3863 """

3864

3865 def __init__(self, tag_name: str, value: Any = True) -> None:

3866 super().__init__()

3867 self._may_return_empty = True

3868 self.mayIndexError = False

3869 self.leave_whitespace()

3870 self.tag_name = tag_name

3871 self.tag_value = value

3872 self.add_parse_action(self._add_tag)

3873 self.show_in_diagram = False

3874

3875 def _add_tag(self, tokens: ParseResults):

3876 tokens[self.tag_name] = self.tag_value

3877

3878 def _generateDefaultName(self) -> str:

3879 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

3880

3881

3882class ParseExpression(ParserElement):

3883 """Abstract subclass of ParserElement, for combining and

3884 post-processing parsed tokens.

3885 """

3886

3887 def __init__(

3888 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

3889 ) -> None:

3890 super().__init__(savelist)

3891 self.exprs: list[ParserElement]

3892 if isinstance(exprs, _generatorType):

3893 exprs = list(exprs)

3894

3895 if isinstance(exprs, str_type):

3896 self.exprs = [self._literalStringClass(exprs)]

3897 elif isinstance(exprs, ParserElement):

3898 self.exprs = [exprs]

3899 elif isinstance(exprs, Iterable):

3900 exprs = list(exprs)

3901 # if sequence of strings provided, wrap with Literal

3902 if any(isinstance(expr, str_type) for expr in exprs):

3903 exprs = (

3904 self._literalStringClass(e) if isinstance(e, str_type) else e

3905 for e in exprs

3906 )

3907 self.exprs = list(exprs)

3908 else:

3909 try:

3910 self.exprs = list(exprs)

3911 except TypeError:

3912 self.exprs = [exprs]

3913 self.callPreparse = False

3914

3915 def recurse(self) -> list[ParserElement]:

3916 return self.exprs[:]

3917

3918 def append(self, other) -> ParserElement:

3919 self.exprs.append(other)

3920 self._defaultName = None

3921 return self

3922

3923 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3924 """

3925 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3926 all contained expressions.

3927 """

3928 super().leave_whitespace(recursive)

3929

3930 if recursive:

3931 self.exprs = [e.copy() for e in self.exprs]

3932 for e in self.exprs:

3933 e.leave_whitespace(recursive)

3934 return self

3935

3936 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3937 """

3938 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3939 all contained expressions.

3940 """

3941 super().ignore_whitespace(recursive)

3942 if recursive:

3943 self.exprs = [e.copy() for e in self.exprs]

3944 for e in self.exprs:

3945 e.ignore_whitespace(recursive)

3946 return self

3947

3948 def ignore(self, other) -> ParserElement:

3949 if isinstance(other, Suppress):

3950 if other not in self.ignoreExprs:

3951 super().ignore(other)

3952 for e in self.exprs:

3953 e.ignore(self.ignoreExprs[-1])

3954 else:

3955 super().ignore(other)

3956 for e in self.exprs:

3957 e.ignore(self.ignoreExprs[-1])

3958 return self

3959

3960 def _generateDefaultName(self) -> str:

3961 return f"{type(self).__name__}:({self.exprs})"

3962

3963 def streamline(self) -> ParserElement:

3964 if self.streamlined:

3965 return self

3966

3967 super().streamline()

3968

3969 for e in self.exprs:

3970 e.streamline()

3971

3972 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3973 # but only if there are no parse actions or resultsNames on the nested And's

3974 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3975 if len(self.exprs) == 2:

3976 other = self.exprs[0]

3977 if (

3978 isinstance(other, self.__class__)

3979 and not other.parseAction

3980 and other.resultsName is None

3981 and not other.debug

3982 ):

3983 self.exprs = other.exprs[:] + [self.exprs[1]]

3984 self._defaultName = None

3985 self._may_return_empty |= other.mayReturnEmpty

3986 self.mayIndexError |= other.mayIndexError

3987

3988 other = self.exprs[-1]

3989 if (

3990 isinstance(other, self.__class__)

3991 and not other.parseAction

3992 and other.resultsName is None

3993 and not other.debug

3994 ):

3995 self.exprs = self.exprs[:-1] + other.exprs[:]

3996 self._defaultName = None

3997 self._may_return_empty |= other.mayReturnEmpty

3998 self.mayIndexError |= other.mayIndexError

3999

4000 self.errmsg = f"Expected {self}"

4001

4002 return self

4003

4004 def validate(self, validateTrace=None) -> None:

4005 warnings.warn(

4006 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4007 DeprecationWarning,

4008 stacklevel=2,

4009 )

4010 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4011 for e in self.exprs:

4012 e.validate(tmp)

4013 self._checkRecursion([])

4014

4015 def copy(self) -> ParserElement:

4016 ret = super().copy()

4017 ret = typing.cast(ParseExpression, ret)

4018 ret.exprs = [e.copy() for e in self.exprs]

4019 return ret

4020

4021 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4022 if not (

4023 __diag__.warn_ungrouped_named_tokens_in_collection

4024 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4025 not in self.suppress_warnings_

4026 ):

4027 return super()._setResultsName(name, list_all_matches)

4028

4029 for e in self.exprs:

4030 if (

4031 isinstance(e, ParserElement)

4032 and e.resultsName

4033 and (

4034 Diagnostics.warn_ungrouped_named_tokens_in_collection

4035 not in e.suppress_warnings_

4036 )

4037 ):

4038 warning = (

4039 "warn_ungrouped_named_tokens_in_collection:"

4040 f" setting results name {name!r} on {type(self).__name__} expression"

4041 f" collides with {e.resultsName!r} on contained expression"

4042 )

4043 warnings.warn(warning, stacklevel=3)

4044 break

4045

4046 return super()._setResultsName(name, list_all_matches)

4047

4048 # Compatibility synonyms

4049 # fmt: off

4050 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4051 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4052 # fmt: on

4053

4054

4055class And(ParseExpression):

4056 """

4057 Requires all given :class:`ParserElement` s to be found in the given order.

4058 Expressions may be separated by whitespace.

4059 May be constructed using the ``'+'`` operator.

4060 May also be constructed using the ``'-'`` operator, which will

4061 suppress backtracking.

4062

4063 Example::

4064

4065 integer = Word(nums)

4066 name_expr = Word(alphas)[1, ...]

4067

4068 expr = And([integer("id"), name_expr("name"), integer("age")])

4069 # more easily written as:

4070 expr = integer("id") + name_expr("name") + integer("age")

4071 """

4072

4073 class _ErrorStop(Empty):

4074 def __init__(self, *args, **kwargs) -> None:

4075 super().__init__(*args, **kwargs)

4076 self.leave_whitespace()

4077

4078 def _generateDefaultName(self) -> str:

4079 return "-"

4080

4081 def __init__(

4082 self,

4083 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4084 savelist: bool = True,

4085 ) -> None:

4086 # instantiate exprs as a list, converting strs to ParserElements

4087 exprs: list[ParserElement] = [

4088 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4089 ]

4090

4091 # convert any Ellipsis elements to SkipTo

4092 if Ellipsis in exprs:

4093

4094 # Ellipsis cannot be the last element

4095 if exprs[-1] is Ellipsis:

4096 raise Exception("cannot construct And with sequence ending in ...")

4097

4098 tmp: list[ParserElement] = []

4099 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4100 if cur_expr is Ellipsis:

4101 tmp.append(SkipTo(next_expr)("_skipped*"))

4102 else:

4103 tmp.append(cur_expr)

4104

4105 exprs[:-1] = tmp

4106

4107 super().__init__(exprs, savelist)

4108 if self.exprs:

4109 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4110 if not isinstance(self.exprs[0], White):

4111 self.set_whitespace_chars(

4112 self.exprs[0].whiteChars,

4113 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4114 )

4115 self.skipWhitespace = self.exprs[0].skipWhitespace

4116 else:

4117 self.skipWhitespace = False

4118 else:

4119 self._may_return_empty = True

4120 self.callPreparse = True

4121

4122 def streamline(self) -> ParserElement:

4123 # collapse any _PendingSkip's

4124 if self.exprs and any(

4125 isinstance(e, ParseExpression)

4126 and e.exprs

4127 and isinstance(e.exprs[-1], _PendingSkip)

4128 for e in self.exprs[:-1]

4129 ):

4130 deleted_expr_marker = NoMatch()

4131 for i, e in enumerate(self.exprs[:-1]):

4132 if e is deleted_expr_marker:

4133 continue

4134 if (

4135 isinstance(e, ParseExpression)

4136 and e.exprs

4137 and isinstance(e.exprs[-1], _PendingSkip)

4138 ):

4139 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4140 self.exprs[i + 1] = deleted_expr_marker

4141 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4142

4143 super().streamline()

4144

4145 # link any IndentedBlocks to the prior expression

4146 prev: ParserElement

4147 cur: ParserElement

4148 for prev, cur in zip(self.exprs, self.exprs[1:]):

4149 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4150 # (but watch out for recursive grammar)

4151 seen = set()

4152 while True:

4153 if id(cur) in seen:

4154 break

4155 seen.add(id(cur))

4156 if isinstance(cur, IndentedBlock):

4157 prev.add_parse_action(

4158 lambda s, l, t, cur_=cur: setattr(

4159 cur_, "parent_anchor", col(l, s)

4160 )

4161 )

4162 break

4163 subs = cur.recurse()

4164 next_first = next(iter(subs), None)

4165 if next_first is None:

4166 break

4167 cur = typing.cast(ParserElement, next_first)

4168

4169 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4170 return self

4171

4172 def parseImpl(self, instring, loc, do_actions=True):

4173 # pass False as callPreParse arg to _parse for first element, since we already

4174 # pre-parsed the string as part of our And pre-parsing

4175 loc, resultlist = self.exprs[0]._parse(

4176 instring, loc, do_actions, callPreParse=False

4177 )

4178 errorStop = False

4179 for e in self.exprs[1:]:

4180 # if isinstance(e, And._ErrorStop):

4181 if type(e) is And._ErrorStop:

4182 errorStop = True

4183 continue

4184 if errorStop:

4185 try:

4186 loc, exprtokens = e._parse(instring, loc, do_actions)

4187 except ParseSyntaxException:

4188 raise

4189 except ParseBaseException as pe:

4190 pe.__traceback__ = None

4191 raise ParseSyntaxException._from_exception(pe)

4192 except IndexError:

4193 raise ParseSyntaxException(

4194 instring, len(instring), self.errmsg, self

4195 )

4196 else:

4197 loc, exprtokens = e._parse(instring, loc, do_actions)

4198 resultlist += exprtokens

4199 return loc, resultlist

4200

4201 def __iadd__(self, other):

4202 if isinstance(other, str_type):

4203 other = self._literalStringClass(other)

4204 if not isinstance(other, ParserElement):

4205 return NotImplemented

4206 return self.append(other) # And([self, other])

4207

4208 def _checkRecursion(self, parseElementList):

4209 subRecCheckList = parseElementList[:] + [self]

4210 for e in self.exprs:

4211 e._checkRecursion(subRecCheckList)

4212 if not e.mayReturnEmpty:

4213 break

4214

4215 def _generateDefaultName(self) -> str:

4216 inner = " ".join(str(e) for e in self.exprs)

4217 # strip off redundant inner {}'s

4218 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4219 inner = inner[1:-1]

4220 return f"{{{inner}}}"

4221

4222

4223class Or(ParseExpression):

4224 """Requires that at least one :class:`ParserElement` is found. If

4225 two expressions match, the expression that matches the longest

4226 string will be used. May be constructed using the ``'^'``

4227 operator.

4228

4229 Example::

4230

4231 # construct Or using '^' operator

4232

4233 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4234 print(number.search_string("123 3.1416 789"))

4235

4236 prints::

4237

4238 [['123'], ['3.1416'], ['789']]

4239 """

4240

4241 def __init__(

4242 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4243 ) -> None:

4244 super().__init__(exprs, savelist)

4245 if self.exprs:

4246 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4247 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4248 else:

4249 self._may_return_empty = True

4250

4251 def streamline(self) -> ParserElement:

4252 super().streamline()

4253 if self.exprs:

4254 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4255 self.saveAsList = any(e.saveAsList for e in self.exprs)

4256 self.skipWhitespace = all(

4257 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4258 )

4259 else:

4260 self.saveAsList = False

4261 return self

4262

4263 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4264 maxExcLoc = -1

4265 maxException = None

4266 matches: list[tuple[int, ParserElement]] = []

4267 fatals: list[ParseFatalException] = []

4268 if all(e.callPreparse for e in self.exprs):

4269 loc = self.preParse(instring, loc)

4270 for e in self.exprs:

4271 try:

4272 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4273 except ParseFatalException as pfe:

4274 pfe.__traceback__ = None

4275 pfe.parser_element = e

4276 fatals.append(pfe)

4277 maxException = None

4278 maxExcLoc = -1

4279 except ParseException as err:

4280 if not fatals:

4281 err.__traceback__ = None

4282 if err.loc > maxExcLoc:

4283 maxException = err

4284 maxExcLoc = err.loc

4285 except IndexError:

4286 if len(instring) > maxExcLoc:

4287 maxException = ParseException(

4288 instring, len(instring), e.errmsg, self

4289 )

4290 maxExcLoc = len(instring)

4291 else:

4292 # save match among all matches, to retry longest to shortest

4293 matches.append((loc2, e))

4294

4295 if matches:

4296 # re-evaluate all matches in descending order of length of match, in case attached actions

4297 # might change whether or how much they match of the input.

4298 matches.sort(key=itemgetter(0), reverse=True)

4299

4300 if not do_actions:

4301 # no further conditions or parse actions to change the selection of

4302 # alternative, so the first match will be the best match

4303 best_expr = matches[0][1]

4304 return best_expr._parse(instring, loc, do_actions)

4305

4306 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4307 for loc1, expr1 in matches:

4308 if loc1 <= longest[0]:

4309 # already have a longer match than this one will deliver, we are done

4310 return longest

4311

4312 try:

4313 loc2, toks = expr1._parse(instring, loc, do_actions)

4314 except ParseException as err:

4315 err.__traceback__ = None

4316 if err.loc > maxExcLoc:

4317 maxException = err

4318 maxExcLoc = err.loc

4319 else:

4320 if loc2 >= loc1:

4321 return loc2, toks

4322 # didn't match as much as before

4323 elif loc2 > longest[0]:

4324 longest = loc2, toks

4325

4326 if longest != (-1, None):

4327 return longest

4328

4329 if fatals:

4330 if len(fatals) > 1:

4331 fatals.sort(key=lambda e: -e.loc)

4332 if fatals[0].loc == fatals[1].loc:

4333 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4334 max_fatal = fatals[0]

4335 raise max_fatal

4336

4337 if maxException is not None:

4338 # infer from this check that all alternatives failed at the current position

4339 # so emit this collective error message instead of any single error message

4340 parse_start_loc = self.preParse(instring, loc)

4341 if maxExcLoc == parse_start_loc:

4342 maxException.msg = self.errmsg or ""

4343 raise maxException

4344

4345 raise ParseException(instring, loc, "no defined alternatives to match", self)

4346

4347 def __ixor__(self, other):

4348 if isinstance(other, str_type):

4349 other = self._literalStringClass(other)

4350 if not isinstance(other, ParserElement):

4351 return NotImplemented

4352 return self.append(other) # Or([self, other])

4353

4354 def _generateDefaultName(self) -> str:

4355 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4356

4357 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4358 if (

4359 __diag__.warn_multiple_tokens_in_named_alternation

4360 and Diagnostics.warn_multiple_tokens_in_named_alternation

4361 not in self.suppress_warnings_

4362 ):

4363 if any(

4364 isinstance(e, And)

4365 and Diagnostics.warn_multiple_tokens_in_named_alternation

4366 not in e.suppress_warnings_

4367 for e in self.exprs

4368 ):

4369 warning = (

4370 "warn_multiple_tokens_in_named_alternation:"

4371 f" setting results name {name!r} on {type(self).__name__} expression"

4372 " will return a list of all parsed tokens in an And alternative,"

4373 " in prior versions only the first token was returned; enclose"

4374 " contained argument in Group"

4375 )

4376 warnings.warn(warning, stacklevel=3)

4377

4378 return super()._setResultsName(name, list_all_matches)

4379

4380

4381class MatchFirst(ParseExpression):

4382 """Requires that at least one :class:`ParserElement` is found. If

4383 more than one expression matches, the first one listed is the one that will

4384 match. May be constructed using the ``'|'`` operator.

4385

4386 Example::

4387

4388 # construct MatchFirst using '|' operator

4389

4390 # watch the order of expressions to match

4391 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4392 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4393

4394 # put more selective expression first

4395 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4396 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4397 """

4398

4399 def __init__(

4400 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4401 ) -> None:

4402 super().__init__(exprs, savelist)

4403 if self.exprs:

4404 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4405 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4406 else:

4407 self._may_return_empty = True

4408

4409 def streamline(self) -> ParserElement:

4410 if self.streamlined:

4411 return self

4412

4413 super().streamline()

4414 if self.exprs:

4415 self.saveAsList = any(e.saveAsList for e in self.exprs)

4416 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4417 self.skipWhitespace = all(

4418 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4419 )

4420 else:

4421 self.saveAsList = False

4422 self._may_return_empty = True

4423 return self

4424

4425 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4426 maxExcLoc = -1

4427 maxException = None

4428

4429 for e in self.exprs:

4430 try:

4431 return e._parse(instring, loc, do_actions)

4432 except ParseFatalException as pfe:

4433 pfe.__traceback__ = None

4434 pfe.parser_element = e

4435 raise

4436 except ParseException as err:

4437 if err.loc > maxExcLoc:

4438 maxException = err

4439 maxExcLoc = err.loc

4440 except IndexError:

4441 if len(instring) > maxExcLoc:

4442 maxException = ParseException(

4443 instring, len(instring), e.errmsg, self

4444 )

4445 maxExcLoc = len(instring)

4446

4447 if maxException is not None:

4448 # infer from this check that all alternatives failed at the current position

4449 # so emit this collective error message instead of any individual error message

4450 parse_start_loc = self.preParse(instring, loc)

4451 if maxExcLoc == parse_start_loc:

4452 maxException.msg = self.errmsg or ""

4453 raise maxException

4454

4455 raise ParseException(instring, loc, "no defined alternatives to match", self)

4456

4457 def __ior__(self, other):

4458 if isinstance(other, str_type):

4459 other = self._literalStringClass(other)

4460 if not isinstance(other, ParserElement):

4461 return NotImplemented

4462 return self.append(other) # MatchFirst([self, other])

4463

4464 def _generateDefaultName(self) -> str:

4465 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4466

4467 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4468 if (

4469 __diag__.warn_multiple_tokens_in_named_alternation

4470 and Diagnostics.warn_multiple_tokens_in_named_alternation

4471 not in self.suppress_warnings_

4472 ):

4473 if any(

4474 isinstance(e, And)

4475 and Diagnostics.warn_multiple_tokens_in_named_alternation

4476 not in e.suppress_warnings_

4477 for e in self.exprs

4478 ):

4479 warning = (

4480 "warn_multiple_tokens_in_named_alternation:"

4481 f" setting results name {name!r} on {type(self).__name__} expression"

4482 " will return a list of all parsed tokens in an And alternative,"

4483 " in prior versions only the first token was returned; enclose"

4484 " contained argument in Group"

4485 )

4486 warnings.warn(warning, stacklevel=3)

4487

4488 return super()._setResultsName(name, list_all_matches)

4489

4490

4491class Each(ParseExpression):

4492 """Requires all given :class:`ParserElement` s to be found, but in

4493 any order. Expressions may be separated by whitespace.

4494

4495 May be constructed using the ``'&'`` operator.

4496

4497 Example::

4498

4499 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4500 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4501 integer = Word(nums)

4502 shape_attr = "shape:" + shape_type("shape")

4503 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4504 color_attr = "color:" + color("color")

4505 size_attr = "size:" + integer("size")

4506

4507 # use Each (using operator '&') to accept attributes in any order

4508 # (shape and posn are required, color and size are optional)

4509 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4510

4511 shape_spec.run_tests('''

4512 shape: SQUARE color: BLACK posn: 100, 120

4513 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4514 color:GREEN size:20 shape:TRIANGLE posn:20,40

4515 '''

4516 )

4517

4518 prints::

4519

4520 shape: SQUARE color: BLACK posn: 100, 120

4521 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4522 - color: BLACK

4523 - posn: ['100', ',', '120']

4524 - x: 100

4525 - y: 120

4526 - shape: SQUARE

4527

4528

4529 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4530 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4531 - color: BLUE

4532 - posn: ['50', ',', '80']

4533 - x: 50

4534 - y: 80

4535 - shape: CIRCLE

4536 - size: 50

4537

4538

4539 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4540 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4541 - color: GREEN

4542 - posn: ['20', ',', '40']

4543 - x: 20

4544 - y: 40

4545 - shape: TRIANGLE

4546 - size: 20

4547 """

4548

4549 def __init__(

4550 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

4551 ) -> None:

4552 super().__init__(exprs, savelist)

4553 if self.exprs:

4554 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4555 else:

4556 self._may_return_empty = True

4557 self.skipWhitespace = True

4558 self.initExprGroups = True

4559 self.saveAsList = True

4560

4561 def __iand__(self, other):

4562 if isinstance(other, str_type):

4563 other = self._literalStringClass(other)

4564 if not isinstance(other, ParserElement):

4565 return NotImplemented

4566 return self.append(other) # Each([self, other])

4567

4568 def streamline(self) -> ParserElement:

4569 super().streamline()

4570 if self.exprs:

4571 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4572 else:

4573 self._may_return_empty = True

4574 return self

4575

4576 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4577 if self.initExprGroups:

4578 self.opt1map = dict(

4579 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4580 )

4581 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4582 opt2 = [

4583 e

4584 for e in self.exprs

4585 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4586 ]

4587 self.optionals = opt1 + opt2

4588 self.multioptionals = [

4589 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4590 for e in self.exprs

4591 if isinstance(e, _MultipleMatch)

4592 ]

4593 self.multirequired = [

4594 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4595 for e in self.exprs

4596 if isinstance(e, OneOrMore)

4597 ]

4598 self.required = [

4599 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4600 ]

4601 self.required += self.multirequired

4602 self.initExprGroups = False

4603

4604 tmpLoc = loc

4605 tmpReqd = self.required[:]

4606 tmpOpt = self.optionals[:]

4607 multis = self.multioptionals[:]

4608 matchOrder: list[ParserElement] = []

4609

4610 keepMatching = True

4611 failed: list[ParserElement] = []

4612 fatals: list[ParseFatalException] = []

4613 while keepMatching:

4614 tmpExprs = tmpReqd + tmpOpt + multis

4615 failed.clear()

4616 fatals.clear()

4617 for e in tmpExprs:

4618 try:

4619 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4620 except ParseFatalException as pfe:

4621 pfe.__traceback__ = None

4622 pfe.parser_element = e

4623 fatals.append(pfe)

4624 failed.append(e)

4625 except ParseException:

4626 failed.append(e)

4627 else:

4628 matchOrder.append(self.opt1map.get(id(e), e))

4629 if e in tmpReqd:

4630 tmpReqd.remove(e)

4631 elif e in tmpOpt:

4632 tmpOpt.remove(e)

4633 if len(failed) == len(tmpExprs):

4634 keepMatching = False

4635

4636 # look for any ParseFatalExceptions

4637 if fatals:

4638 if len(fatals) > 1:

4639 fatals.sort(key=lambda e: -e.loc)

4640 if fatals[0].loc == fatals[1].loc:

4641 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4642 max_fatal = fatals[0]

4643 raise max_fatal

4644

4645 if tmpReqd:

4646 missing = ", ".join([str(e) for e in tmpReqd])

4647 raise ParseException(

4648 instring,

4649 loc,

4650 f"Missing one or more required elements ({missing})",

4651 )

4652

4653 # add any unmatched Opts, in case they have default values defined

4654 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4655

4656 total_results = ParseResults([])

4657 for e in matchOrder:

4658 loc, results = e._parse(instring, loc, do_actions)

4659 total_results += results

4660

4661 return loc, total_results

4662

4663 def _generateDefaultName(self) -> str:

4664 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4665

4666

4667class ParseElementEnhance(ParserElement):

4668 """Abstract subclass of :class:`ParserElement`, for combining and

4669 post-processing parsed tokens.

4670 """

4671

4672 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

4673 super().__init__(savelist)

4674 if isinstance(expr, str_type):

4675 expr_str = typing.cast(str, expr)

4676 if issubclass(self._literalStringClass, Token):

4677 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4678 elif issubclass(type(self), self._literalStringClass):

4679 expr = Literal(expr_str)

4680 else:

4681 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4682 expr = typing.cast(ParserElement, expr)

4683 self.expr = expr

4684 if expr is not None:

4685 self.mayIndexError = expr.mayIndexError

4686 self._may_return_empty = expr.mayReturnEmpty

4687 self.set_whitespace_chars(

4688 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4689 )

4690 self.skipWhitespace = expr.skipWhitespace

4691 self.saveAsList = expr.saveAsList

4692 self.callPreparse = expr.callPreparse

4693 self.ignoreExprs.extend(expr.ignoreExprs)

4694

4695 def recurse(self) -> list[ParserElement]:

4696 return [self.expr] if self.expr is not None else []

4697

4698 def parseImpl(self, instring, loc, do_actions=True):

4699 if self.expr is None:

4700 raise ParseException(instring, loc, "No expression defined", self)

4701

4702 try:

4703 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

4704 except ParseSyntaxException:

4705 raise

4706 except ParseBaseException as pbe:

4707 pbe.pstr = pbe.pstr or instring

4708 pbe.loc = pbe.loc or loc

4709 pbe.parser_element = pbe.parser_element or self

4710 if not isinstance(self, Forward) and self.customName is not None:

4711 if self.errmsg:

4712 pbe.msg = self.errmsg

4713 raise

4714

4715 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4716 super().leave_whitespace(recursive)

4717

4718 if recursive:

4719 if self.expr is not None:

4720 self.expr = self.expr.copy()

4721 self.expr.leave_whitespace(recursive)

4722 return self

4723

4724 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4725 super().ignore_whitespace(recursive)

4726

4727 if recursive:

4728 if self.expr is not None:

4729 self.expr = self.expr.copy()

4730 self.expr.ignore_whitespace(recursive)

4731 return self

4732

4733 def ignore(self, other) -> ParserElement:

4734 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4735 super().ignore(other)

4736 if self.expr is not None:

4737 self.expr.ignore(self.ignoreExprs[-1])

4738

4739 return self

4740

4741 def streamline(self) -> ParserElement:

4742 super().streamline()

4743 if self.expr is not None:

4744 self.expr.streamline()

4745 return self

4746

4747 def _checkRecursion(self, parseElementList):

4748 if self in parseElementList:

4749 raise RecursiveGrammarException(parseElementList + [self])

4750 subRecCheckList = parseElementList[:] + [self]

4751 if self.expr is not None:

4752 self.expr._checkRecursion(subRecCheckList)

4753

4754 def validate(self, validateTrace=None) -> None:

4755 warnings.warn(

4756 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4757 DeprecationWarning,

4758 stacklevel=2,

4759 )

4760 if validateTrace is None:

4761 validateTrace = []

4762 tmp = validateTrace[:] + [self]

4763 if self.expr is not None:

4764 self.expr.validate(tmp)

4765 self._checkRecursion([])

4766

4767 def _generateDefaultName(self) -> str:

4768 return f"{type(self).__name__}:({self.expr})"

4769

4770 # Compatibility synonyms

4771 # fmt: off

4772 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4773 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4774 # fmt: on

4775

4776

4777class IndentedBlock(ParseElementEnhance):

4778 """

4779 Expression to match one or more expressions at a given indentation level.

4780 Useful for parsing text where structure is implied by indentation (like Python source code).

4781 """

4782

4783 class _Indent(Empty):

4784 def __init__(self, ref_col: int) -> None:

4785 super().__init__()

4786 self.errmsg = f"expected indent at column {ref_col}"

4787 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4788

4789 class _IndentGreater(Empty):

4790 def __init__(self, ref_col: int) -> None:

4791 super().__init__()

4792 self.errmsg = f"expected indent at column greater than {ref_col}"

4793 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4794

4795 def __init__(

4796 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4797 ) -> None:

4798 super().__init__(expr, savelist=True)

4799 # if recursive:

4800 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4801 self._recursive = recursive

4802 self._grouped = grouped

4803 self.parent_anchor = 1

4804

4805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4806 # advance parse position to non-whitespace by using an Empty()

4807 # this should be the column to be used for all subsequent indented lines

4808 anchor_loc = Empty().preParse(instring, loc)

4809

4810 # see if self.expr matches at the current location - if not it will raise an exception

4811 # and no further work is necessary

4812 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

4813

4814 indent_col = col(anchor_loc, instring)

4815 peer_detect_expr = self._Indent(indent_col)

4816

4817 inner_expr = Empty() + peer_detect_expr + self.expr

4818 if self._recursive:

4819 sub_indent = self._IndentGreater(indent_col)

4820 nested_block = IndentedBlock(

4821 self.expr, recursive=self._recursive, grouped=self._grouped

4822 )

4823 nested_block.set_debug(self.debug)

4824 nested_block.parent_anchor = indent_col

4825 inner_expr += Opt(sub_indent + nested_block)

4826

4827 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4828 block = OneOrMore(inner_expr)

4829

4830 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4831

4832 if self._grouped:

4833 wrapper = Group

4834 else:

4835 wrapper = lambda expr: expr # type: ignore[misc, assignment]

4836 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4837 instring, anchor_loc, do_actions

4838 )

4839

4840

4841class AtStringStart(ParseElementEnhance):

4842 """Matches if expression matches at the beginning of the parse

4843 string::

4844

4845 AtStringStart(Word(nums)).parse_string("123")

4846 # prints ["123"]

4847

4848 AtStringStart(Word(nums)).parse_string(" 123")

4849 # raises ParseException

4850 """

4851

4852 def __init__(self, expr: Union[ParserElement, str]) -> None:

4853 super().__init__(expr)

4854 self.callPreparse = False

4855

4856 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4857 if loc != 0:

4858 raise ParseException(instring, loc, "not found at string start")

4859 return super().parseImpl(instring, loc, do_actions)

4860

4861

4862class AtLineStart(ParseElementEnhance):

4863 r"""Matches if an expression matches at the beginning of a line within

4864 the parse string

4865

4866 Example::

4867

4868 test = '''\

4869 AAA this line

4870 AAA and this line

4871 AAA but not this one

4872 B AAA and definitely not this one

4873 '''

4874

4875 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4876 print(t)

4877

4878 prints::

4879

4880 ['AAA', ' this line']

4881 ['AAA', ' and this line']

4882

4883 """

4884

4885 def __init__(self, expr: Union[ParserElement, str]) -> None:

4886 super().__init__(expr)

4887 self.callPreparse = False

4888

4889 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4890 if col(loc, instring) != 1:

4891 raise ParseException(instring, loc, "not found at line start")

4892 return super().parseImpl(instring, loc, do_actions)

4893

4894

4895class FollowedBy(ParseElementEnhance):

4896 """Lookahead matching of the given parse expression.

4897 ``FollowedBy`` does *not* advance the parsing position within

4898 the input string, it only verifies that the specified parse

4899 expression matches at the current position. ``FollowedBy``

4900 always returns a null token list. If any results names are defined

4901 in the lookahead expression, those *will* be returned for access by

4902 name.

4903

4904 Example::

4905

4906 # use FollowedBy to match a label only if it is followed by a ':'

4907 data_word = Word(alphas)

4908 label = data_word + FollowedBy(':')

4909 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4910

4911 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4912

4913 prints::

4914

4915 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4916 """

4917

4918 def __init__(self, expr: Union[ParserElement, str]) -> None:

4919 super().__init__(expr)

4920 self._may_return_empty = True

4921

4922 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4923 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4924 # we keep any named results that were defined in the FollowedBy expression

4925 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

4926 del ret[:]

4927

4928 return loc, ret

4929

4930

4931class PrecededBy(ParseElementEnhance):

4932 """Lookbehind matching of the given parse expression.

4933 ``PrecededBy`` does not advance the parsing position within the

4934 input string, it only verifies that the specified parse expression

4935 matches prior to the current position. ``PrecededBy`` always

4936 returns a null token list, but if a results name is defined on the

4937 given expression, it is returned.

4938

4939 Parameters:

4940

4941 - ``expr`` - expression that must match prior to the current parse

4942 location

4943 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4944 to lookbehind prior to the current parse location

4945

4946 If the lookbehind expression is a string, :class:`Literal`,

4947 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4948 with a specified exact or maximum length, then the retreat

4949 parameter is not required. Otherwise, retreat must be specified to

4950 give a maximum number of characters to look back from

4951 the current parse position for a lookbehind match.

4952

4953 Example::

4954

4955 # VB-style variable names with type prefixes

4956 int_var = PrecededBy("#") + pyparsing_common.identifier

4957 str_var = PrecededBy("$") + pyparsing_common.identifier

4958

4959 """

4960

4961 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

4962 super().__init__(expr)

4963 self.expr = self.expr().leave_whitespace()

4964 self._may_return_empty = True

4965 self.mayIndexError = False

4966 self.exact = False

4967 if isinstance(expr, str_type):

4968 expr = typing.cast(str, expr)

4969 retreat = len(expr)

4970 self.exact = True

4971 elif isinstance(expr, (Literal, Keyword)):

4972 retreat = expr.matchLen

4973 self.exact = True

4974 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4975 retreat = expr.maxLen

4976 self.exact = True

4977 elif isinstance(expr, PositionToken):

4978 retreat = 0

4979 self.exact = True

4980 self.retreat = retreat

4981 self.errmsg = f"not preceded by {expr}"

4982 self.skipWhitespace = False

4983 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4984

4985 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

4986 if self.exact:

4987 if loc < self.retreat:

4988 raise ParseException(instring, loc, self.errmsg, self)

4989 start = loc - self.retreat

4990 _, ret = self.expr._parse(instring, start)

4991 return loc, ret

4992

4993 # retreat specified a maximum lookbehind window, iterate

4994 test_expr = self.expr + StringEnd()

4995 instring_slice = instring[max(0, loc - self.retreat) : loc]

4996 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

4997

4998 for offset in range(1, min(loc, self.retreat + 1) + 1):

4999 try:

5000 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5001 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5002 except ParseBaseException as pbe:

5003 last_expr = pbe

5004 else:

5005 break

5006 else:

5007 raise last_expr

5008

5009 return loc, ret

5010

5011

5012class Located(ParseElementEnhance):

5013 """

5014 Decorates a returned token with its starting and ending

5015 locations in the input string.

5016

5017 This helper adds the following results names:

5018

5019 - ``locn_start`` - location where matched expression begins

5020 - ``locn_end`` - location where matched expression ends

5021 - ``value`` - the actual parsed results

5022

5023 Be careful if the input text contains ``<TAB>`` characters, you

5024 may want to call :class:`ParserElement.parse_with_tabs`

5025

5026 Example::

5027

5028 wd = Word(alphas)

5029 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5030 print(match)

5031

5032 prints::

5033

5034 [0, ['ljsdf'], 5]

5035 [8, ['lksdjjf'], 15]

5036 [18, ['lkkjj'], 23]

5037

5038 """

5039

5040 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5041 start = loc

5042 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5043 ret_tokens = ParseResults([start, tokens, loc])

5044 ret_tokens["locn_start"] = start

5045 ret_tokens["value"] = tokens

5046 ret_tokens["locn_end"] = loc

5047 if self.resultsName:

5048 # must return as a list, so that the name will be attached to the complete group

5049 return loc, [ret_tokens]

5050 else:

5051 return loc, ret_tokens

5052

5053

5054class NotAny(ParseElementEnhance):

5055 """

5056 Lookahead to disallow matching with the given parse expression.

5057 ``NotAny`` does *not* advance the parsing position within the

5058 input string, it only verifies that the specified parse expression

5059 does *not* match at the current position. Also, ``NotAny`` does

5060 *not* skip over leading whitespace. ``NotAny`` always returns

5061 a null token list. May be constructed using the ``'~'`` operator.

5062

5063 Example::

5064

5065 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5066

5067 # take care not to mistake keywords for identifiers

5068 ident = ~(AND | OR | NOT) + Word(alphas)

5069 boolean_term = Opt(NOT) + ident

5070

5071 # very crude boolean expression - to support parenthesis groups and

5072 # operation hierarchy, use infix_notation

5073 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5074

5075 # integers that are followed by "." are actually floats

5076 integer = Word(nums) + ~Char(".")

5077 """

5078

5079 def __init__(self, expr: Union[ParserElement, str]) -> None:

5080 super().__init__(expr)

5081 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5082 # self.leave_whitespace()

5083 self.skipWhitespace = False

5084

5085 self._may_return_empty = True

5086 self.errmsg = f"Found unwanted token, {self.expr}"

5087

5088 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5089 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5090 raise ParseException(instring, loc, self.errmsg, self)

5091 return loc, []

5092

5093 def _generateDefaultName(self) -> str:

5094 return f"~{{{self.expr}}}"

5095

5096

5097class _MultipleMatch(ParseElementEnhance):

5098 def __init__(

5099 self,

5100 expr: Union[str, ParserElement],

5101 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5102 *,

5103 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5104 ) -> None:

5105 super().__init__(expr)

5106 stopOn = stopOn or stop_on

5107 self.saveAsList = True

5108 ender = stopOn

5109 if isinstance(ender, str_type):

5110 ender = self._literalStringClass(ender)

5111 self.stopOn(ender)

5112

5113 def stopOn(self, ender) -> ParserElement:

5114 if isinstance(ender, str_type):

5115 ender = self._literalStringClass(ender)

5116 self.not_ender = ~ender if ender is not None else None

5117 return self

5118

5119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5120 self_expr_parse = self.expr._parse

5121 self_skip_ignorables = self._skipIgnorables

5122 check_ender = False

5123 if self.not_ender is not None:

5124 try_not_ender = self.not_ender.try_parse

5125 check_ender = True

5126

5127 # must be at least one (but first see if we are the stopOn sentinel;

5128 # if so, fail)

5129 if check_ender:

5130 try_not_ender(instring, loc)

5131 loc, tokens = self_expr_parse(instring, loc, do_actions)

5132 try:

5133 hasIgnoreExprs = not not self.ignoreExprs

5134 while 1:

5135 if check_ender:

5136 try_not_ender(instring, loc)

5137 if hasIgnoreExprs:

5138 preloc = self_skip_ignorables(instring, loc)

5139 else:

5140 preloc = loc

5141 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5142 tokens += tmptokens

5143 except (ParseException, IndexError):

5144 pass

5145

5146 return loc, tokens

5147

5148 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5149 if (

5150 __diag__.warn_ungrouped_named_tokens_in_collection

5151 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5152 not in self.suppress_warnings_

5153 ):

5154 for e in [self.expr] + self.expr.recurse():

5155 if (

5156 isinstance(e, ParserElement)

5157 and e.resultsName

5158 and (

5159 Diagnostics.warn_ungrouped_named_tokens_in_collection

5160 not in e.suppress_warnings_

5161 )

5162 ):

5163 warning = (

5164 "warn_ungrouped_named_tokens_in_collection:"

5165 f" setting results name {name!r} on {type(self).__name__} expression"

5166 f" collides with {e.resultsName!r} on contained expression"

5167 )

5168 warnings.warn(warning, stacklevel=3)

5169 break

5170

5171 return super()._setResultsName(name, list_all_matches)

5172

5173

5174class OneOrMore(_MultipleMatch):

5175 """

5176 Repetition of one or more of the given expression.

5177

5178 Parameters:

5179

5180 - ``expr`` - expression that must match one or more times

5181 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5182 (only required if the sentinel would ordinarily match the repetition

5183 expression)

5184

5185 Example::

5186

5187 data_word = Word(alphas)

5188 label = data_word + FollowedBy(':')

5189 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5190

5191 text = "shape: SQUARE posn: upper left color: BLACK"

5192 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5193

5194 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5195 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5196 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5197

5198 # could also be written as

5199 (attr_expr * (1,)).parse_string(text).pprint()

5200 """

5201

5202 def _generateDefaultName(self) -> str:

5203 return f"{{{self.expr}}}..."

5204

5205

5206class ZeroOrMore(_MultipleMatch):

5207 """

5208 Optional repetition of zero or more of the given expression.

5209

5210 Parameters:

5211

5212 - ``expr`` - expression that must match zero or more times

5213 - ``stop_on`` - expression for a terminating sentinel

5214 (only required if the sentinel would ordinarily match the repetition

5215 expression) - (default= ``None``)

5216

5217 Example: similar to :class:`OneOrMore`

5218 """

5219

5220 def __init__(

5221 self,

5222 expr: Union[str, ParserElement],

5223 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5224 *,

5225 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5226 ) -> None:

5227 super().__init__(expr, stopOn=stopOn or stop_on)

5228 self._may_return_empty = True

5229

5230 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5231 try:

5232 return super().parseImpl(instring, loc, do_actions)

5233 except (ParseException, IndexError):

5234 return loc, ParseResults([], name=self.resultsName)

5235

5236 def _generateDefaultName(self) -> str:

5237 return f"[{self.expr}]..."

5238

5239

5240class DelimitedList(ParseElementEnhance):

5241 def __init__(

5242 self,

5243 expr: Union[str, ParserElement],

5244 delim: Union[str, ParserElement] = ",",

5245 combine: bool = False,

5246 min: typing.Optional[int] = None,

5247 max: typing.Optional[int] = None,

5248 *,

5249 allow_trailing_delim: bool = False,

5250 ) -> None:

5251 """Helper to define a delimited list of expressions - the delimiter

5252 defaults to ','. By default, the list elements and delimiters can

5253 have intervening whitespace, and comments, but this can be

5254 overridden by passing ``combine=True`` in the constructor. If

5255 ``combine`` is set to ``True``, the matching tokens are

5256 returned as a single token string, with the delimiters included;

5257 otherwise, the matching tokens are returned as a list of tokens,

5258 with the delimiters suppressed.

5259

5260 If ``allow_trailing_delim`` is set to True, then the list may end with

5261 a delimiter.

5262

5263 Example::

5264

5265 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5266 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5267 """

5268 if isinstance(expr, str_type):

5269 expr = ParserElement._literalStringClass(expr)

5270 expr = typing.cast(ParserElement, expr)

5271

5272 if min is not None and min < 1:

5273 raise ValueError("min must be greater than 0")

5274

5275 if max is not None and min is not None and max < min:

5276 raise ValueError("max must be greater than, or equal to min")

5277

5278 self.content = expr

5279 self.raw_delim = str(delim)

5280 self.delim = delim

5281 self.combine = combine

5282 if not combine:

5283 self.delim = Suppress(delim)

5284 self.min = min or 1

5285 self.max = max

5286 self.allow_trailing_delim = allow_trailing_delim

5287

5288 delim_list_expr = self.content + (self.delim + self.content) * (

5289 self.min - 1,

5290 None if self.max is None else self.max - 1,

5291 )

5292 if self.allow_trailing_delim:

5293 delim_list_expr += Opt(self.delim)

5294

5295 if self.combine:

5296 delim_list_expr = Combine(delim_list_expr)

5297

5298 super().__init__(delim_list_expr, savelist=True)

5299

5300 def _generateDefaultName(self) -> str:

5301 content_expr = self.content.streamline()

5302 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5303

5304

5305class _NullToken:

5306 def __bool__(self):

5307 return False

5308

5309 def __str__(self):

5310 return ""

5311

5312

5313class Opt(ParseElementEnhance):

5314 """

5315 Optional matching of the given expression.

5316

5317 Parameters:

5318

5319 - ``expr`` - expression that must match zero or more times

5320 - ``default`` (optional) - value to be returned if the optional expression is not found.

5321

5322 Example::

5323

5324 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5325 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5326 zip.run_tests('''

5327 # traditional ZIP code

5328 12345

5329

5330 # ZIP+4 form

5331 12101-0001

5332

5333 # invalid ZIP

5334 98765-

5335 ''')

5336

5337 prints::

5338

5339 # traditional ZIP code

5340 12345

5341 ['12345']

5342

5343 # ZIP+4 form

5344 12101-0001

5345 ['12101-0001']

5346

5347 # invalid ZIP

5348 98765-

5349 ^

5350 FAIL: Expected end of text (at char 5), (line:1, col:6)

5351 """

5352

5353 __optionalNotMatched = _NullToken()

5354

5355 def __init__(

5356 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5357 ) -> None:

5358 super().__init__(expr, savelist=False)

5359 self.saveAsList = self.expr.saveAsList

5360 self.defaultValue = default

5361 self._may_return_empty = True

5362

5363 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5364 self_expr = self.expr

5365 try:

5366 loc, tokens = self_expr._parse(

5367 instring, loc, do_actions, callPreParse=False

5368 )

5369 except (ParseException, IndexError):

5370 default_value = self.defaultValue

5371 if default_value is not self.__optionalNotMatched:

5372 if self_expr.resultsName:

5373 tokens = ParseResults([default_value])

5374 tokens[self_expr.resultsName] = default_value

5375 else:

5376 tokens = [default_value] # type: ignore[assignment]

5377 else:

5378 tokens = [] # type: ignore[assignment]

5379 return loc, tokens

5380

5381 def _generateDefaultName(self) -> str:

5382 inner = str(self.expr)

5383 # strip off redundant inner {}'s

5384 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5385 inner = inner[1:-1]

5386 return f"[{inner}]"

5389Optional = Opt

5392class SkipTo(ParseElementEnhance):

5393 """

5394 Token for skipping over all undefined text until the matched

5395 expression is found.

5396

5397 Parameters:

5398

5399 - ``expr`` - target expression marking the end of the data to be skipped

5400 - ``include`` - if ``True``, the target expression is also parsed

5401 (the skipped text and target expression are returned as a 2-element

5402 list) (default= ``False``).

5403 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5404 comments) that might contain false matches to the target expression

5405 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5406 included in the skipped test; if found before the target expression is found,

5407 the :class:`SkipTo` is not a match

5408

5409 Example::

5410

5411 report = '''

5412 Outstanding Issues Report - 1 Jan 2000

5413

5414 # | Severity | Description | Days Open

5415 -----+----------+-------------------------------------------+-----------

5416 101 | Critical | Intermittent system crash | 6

5417 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5418 79 | Minor | System slow when running too many reports | 47

5419 '''

5420 integer = Word(nums)

5421 SEP = Suppress('|')

5422 # use SkipTo to simply match everything up until the next SEP

5423 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5424 # - parse action will call token.strip() for each matched token, i.e., the description body

5425 string_data = SkipTo(SEP, ignore=quoted_string)

5426 string_data.set_parse_action(token_map(str.strip))

5427 ticket_expr = (integer("issue_num") + SEP

5428 + string_data("sev") + SEP

5429 + string_data("desc") + SEP

5430 + integer("days_open"))

5431

5432 for tkt in ticket_expr.search_string(report):

5433 print tkt.dump()

5434

5435 prints::

5436

5437 ['101', 'Critical', 'Intermittent system crash', '6']

5438 - days_open: '6'

5439 - desc: 'Intermittent system crash'

5440 - issue_num: '101'

5441 - sev: 'Critical'

5442 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5443 - days_open: '14'

5444 - desc: "Spelling error on Login ('log|n')"

5445 - issue_num: '94'

5446 - sev: 'Cosmetic'

5447 ['79', 'Minor', 'System slow when running too many reports', '47']

5448 - days_open: '47'

5449 - desc: 'System slow when running too many reports'

5450 - issue_num: '79'

5451 - sev: 'Minor'

5452 """

5453

5454 def __init__(

5455 self,

5456 other: Union[ParserElement, str],

5457 include: bool = False,

5458 ignore: typing.Optional[Union[ParserElement, str]] = None,

5459 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5460 *,

5461 failOn: typing.Optional[Union[ParserElement, str]] = None,

5462 ) -> None:

5463 super().__init__(other)

5464 failOn = failOn or fail_on

5465 self.ignoreExpr = ignore

5466 self._may_return_empty = True

5467 self.mayIndexError = False

5468 self.includeMatch = include

5469 self.saveAsList = False

5470 if isinstance(failOn, str_type):

5471 self.failOn = self._literalStringClass(failOn)

5472 else:

5473 self.failOn = failOn

5474 self.errmsg = f"No match found for {self.expr}"

5475 self.ignorer = Empty().leave_whitespace()

5476 self._update_ignorer()

5477

5478 def _update_ignorer(self):

5479 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5480 self.ignorer.ignoreExprs.clear()

5481 for e in self.expr.ignoreExprs:

5482 self.ignorer.ignore(e)

5483 if self.ignoreExpr:

5484 self.ignorer.ignore(self.ignoreExpr)

5485

5486 def ignore(self, expr):

5487 super().ignore(expr)

5488 self._update_ignorer()

5489

5490 def parseImpl(self, instring, loc, do_actions=True):

5491 startloc = loc

5492 instrlen = len(instring)

5493 self_expr_parse = self.expr._parse

5494 self_failOn_canParseNext = (

5495 self.failOn.canParseNext if self.failOn is not None else None

5496 )

5497 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5498

5499 tmploc = loc

5500 while tmploc <= instrlen:

5501 if self_failOn_canParseNext is not None:

5502 # break if failOn expression matches

5503 if self_failOn_canParseNext(instring, tmploc):

5504 break

5505

5506 if ignorer_try_parse is not None:

5507 # advance past ignore expressions

5508 prev_tmploc = tmploc

5509 while 1:

5510 try:

5511 tmploc = ignorer_try_parse(instring, tmploc)

5512 except ParseBaseException:

5513 break

5514 # see if all ignorers matched, but didn't actually ignore anything

5515 if tmploc == prev_tmploc:

5516 break

5517 prev_tmploc = tmploc

5518

5519 try:

5520 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5521 except (ParseException, IndexError):

5522 # no match, advance loc in string

5523 tmploc += 1

5524 else:

5525 # matched skipto expr, done

5526 break

5527

5528 else:

5529 # ran off the end of the input string without matching skipto expr, fail

5530 raise ParseException(instring, loc, self.errmsg, self)

5531

5532 # build up return values

5533 loc = tmploc

5534 skiptext = instring[startloc:loc]

5535 skipresult = ParseResults(skiptext)

5536

5537 if self.includeMatch:

5538 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5539 skipresult += mat

5540

5541 return loc, skipresult

5542

5543

5544class Forward(ParseElementEnhance):

5545 """

5546 Forward declaration of an expression to be defined later -

5547 used for recursive grammars, such as algebraic infix notation.

5548 When the expression is known, it is assigned to the ``Forward``

5549 variable using the ``'<<'`` operator.

5550

5551 Note: take care when assigning to ``Forward`` not to overlook

5552 precedence of operators.

5553

5554 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5555

5556 fwd_expr << a | b | c

5557

5558 will actually be evaluated as::

5559

5560 (fwd_expr << a) | b | c

5561

5562 thereby leaving b and c out as parseable alternatives. It is recommended that you

5563 explicitly group the values inserted into the ``Forward``::

5564

5565 fwd_expr << (a | b | c)

5566

5567 Converting to use the ``'<<='`` operator instead will avoid this problem.

5568

5569 See :class:`ParseResults.pprint` for an example of a recursive

5570 parser created using ``Forward``.

5571 """

5572

5573 def __init__(

5574 self, other: typing.Optional[Union[ParserElement, str]] = None

5575 ) -> None:

5576 self.caller_frame = traceback.extract_stack(limit=2)[0]

5577 super().__init__(other, savelist=False) # type: ignore[arg-type]

5578 self.lshift_line = None

5579

5580 def __lshift__(self, other) -> Forward:

5581 if hasattr(self, "caller_frame"):

5582 del self.caller_frame

5583 if isinstance(other, str_type):

5584 other = self._literalStringClass(other)

5585

5586 if not isinstance(other, ParserElement):

5587 return NotImplemented

5588

5589 self.expr = other

5590 self.streamlined = other.streamlined

5591 self.mayIndexError = self.expr.mayIndexError

5592 self._may_return_empty = self.expr.mayReturnEmpty

5593 self.set_whitespace_chars(

5594 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5595 )

5596 self.skipWhitespace = self.expr.skipWhitespace

5597 self.saveAsList = self.expr.saveAsList

5598 self.ignoreExprs.extend(self.expr.ignoreExprs)

5599 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5600 return self

5601

5602 def __ilshift__(self, other) -> Forward:

5603 if not isinstance(other, ParserElement):

5604 return NotImplemented

5605

5606 return self << other

5607

5608 def __or__(self, other) -> ParserElement:

5609 caller_line = traceback.extract_stack(limit=2)[-2]

5610 if (

5611 __diag__.warn_on_match_first_with_lshift_operator

5612 and caller_line == self.lshift_line

5613 and Diagnostics.warn_on_match_first_with_lshift_operator

5614 not in self.suppress_warnings_

5615 ):

5616 warnings.warn(

5617 "warn_on_match_first_with_lshift_operator:"

5618 " using '<<' operator with '|' is probably an error, use '<<='",

5619 stacklevel=2,

5620 )

5621 ret = super().__or__(other)

5622 return ret

5623

5624 def __del__(self):

5625 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5626 if (

5627 self.expr is None

5628 and __diag__.warn_on_assignment_to_Forward

5629 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5630 ):

5631 warnings.warn_explicit(

5632 "warn_on_assignment_to_Forward:"

5633 " Forward defined here but no expression attached later using '<<=' or '<<'",

5634 UserWarning,

5635 filename=self.caller_frame.filename,

5636 lineno=self.caller_frame.lineno,

5637 )

5638

5639 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5640 if (

5641 self.expr is None

5642 and __diag__.warn_on_parse_using_empty_Forward

5643 and Diagnostics.warn_on_parse_using_empty_Forward

5644 not in self.suppress_warnings_

5645 ):

5646 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5647 parse_fns = (

5648 "parse_string",

5649 "scan_string",

5650 "search_string",

5651 "transform_string",

5652 )

5653 tb = traceback.extract_stack(limit=200)

5654 for i, frm in enumerate(reversed(tb), start=1):

5655 if frm.name in parse_fns:

5656 stacklevel = i + 1

5657 break

5658 else:

5659 stacklevel = 2

5660 warnings.warn(

5661 "warn_on_parse_using_empty_Forward:"

5662 " Forward expression was never assigned a value, will not parse any input",

5663 stacklevel=stacklevel,

5664 )

5665 if not ParserElement._left_recursion_enabled:

5666 return super().parseImpl(instring, loc, do_actions)

5667 # ## Bounded Recursion algorithm ##

5668 # Recursion only needs to be processed at ``Forward`` elements, since they are

5669 # the only ones that can actually refer to themselves. The general idea is

5670 # to handle recursion stepwise: We start at no recursion, then recurse once,

5671 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5672 #

5673 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5674 # - to *match* a specific recursion level, and

5675 # - to *search* the bounded recursion level

5676 # and the two run concurrently. The *search* must *match* each recursion level

5677 # to find the best possible match. This is handled by a memo table, which

5678 # provides the previous match to the next level match attempt.

5679 #

5680 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5681 #

5682 # There is a complication since we not only *parse* but also *transform* via

5683 # actions: We do not want to run the actions too often while expanding. Thus,

5684 # we expand using `do_actions=False` and only run `do_actions=True` if the next

5685 # recursion level is acceptable.

5686 with ParserElement.recursion_lock:

5687 memo = ParserElement.recursion_memos

5688 try:

5689 # we are parsing at a specific recursion expansion - use it as-is

5690 prev_loc, prev_result = memo[loc, self, do_actions]

5691 if isinstance(prev_result, Exception):

5692 raise prev_result

5693 return prev_loc, prev_result.copy()

5694 except KeyError:

5695 act_key = (loc, self, True)

5696 peek_key = (loc, self, False)

5697 # we are searching for the best recursion expansion - keep on improving

5698 # both `do_actions` cases must be tracked separately here!

5699 prev_loc, prev_peek = memo[peek_key] = (

5700 loc - 1,

5701 ParseException(

5702 instring, loc, "Forward recursion without base case", self

5703 ),

5704 )

5705 if do_actions:

5706 memo[act_key] = memo[peek_key]

5707 while True:

5708 try:

5709 new_loc, new_peek = super().parseImpl(instring, loc, False)

5710 except ParseException:

5711 # we failed before getting any match - do not hide the error

5712 if isinstance(prev_peek, Exception):

5713 raise

5714 new_loc, new_peek = prev_loc, prev_peek

5715 # the match did not get better: we are done

5716 if new_loc <= prev_loc:

5717 if do_actions:

5718 # replace the match for do_actions=False as well,

5719 # in case the action did backtrack

5720 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5721 del memo[peek_key], memo[act_key]

5722 return prev_loc, copy.copy(prev_result)

5723 del memo[peek_key]

5724 return prev_loc, copy.copy(prev_peek)

5725 # the match did get better: see if we can improve further

5726 if do_actions:

5727 try:

5728 memo[act_key] = super().parseImpl(instring, loc, True)

5729 except ParseException as e:

5730 memo[peek_key] = memo[act_key] = (new_loc, e)

5731 raise

5732 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5733

5734 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5735 self.skipWhitespace = False

5736 return self

5737

5738 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5739 self.skipWhitespace = True

5740 return self

5741

5742 def streamline(self) -> ParserElement:

5743 if not self.streamlined:

5744 self.streamlined = True

5745 if self.expr is not None:

5746 self.expr.streamline()

5747 return self

5748

5749 def validate(self, validateTrace=None) -> None:

5750 warnings.warn(

5751 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5752 DeprecationWarning,

5753 stacklevel=2,

5754 )

5755 if validateTrace is None:

5756 validateTrace = []

5757

5758 if self not in validateTrace:

5759 tmp = validateTrace[:] + [self]

5760 if self.expr is not None:

5761 self.expr.validate(tmp)

5762 self._checkRecursion([])

5763

5764 def _generateDefaultName(self) -> str:

5765 # Avoid infinite recursion by setting a temporary _defaultName

5766 save_default_name = self._defaultName

5767 self._defaultName = ": ..."

5768

5769 # Use the string representation of main expression.

5770 try:

5771 if self.expr is not None:

5772 ret_string = str(self.expr)[:1000]

5773 else:

5774 ret_string = "None"

5775 except Exception:

5776 ret_string = "..."

5777

5778 self._defaultName = save_default_name

5779 return f"{type(self).__name__}: {ret_string}"

5780

5781 def copy(self) -> ParserElement:

5782 if self.expr is not None:

5783 return super().copy()

5784 else:

5785 ret = Forward()

5786 ret <<= self

5787 return ret

5788

5789 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5790 # fmt: off

5791 if (

5792 __diag__.warn_name_set_on_empty_Forward

5793 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5794 and self.expr is None

5795 ):

5796 warning = (

5797 "warn_name_set_on_empty_Forward:"

5798 f" setting results name {name!r} on {type(self).__name__} expression"

5799 " that has no contained expression"

5800 )

5801 warnings.warn(warning, stacklevel=3)

5802 # fmt: on

5803

5804 return super()._setResultsName(name, list_all_matches)

5805

5806 # Compatibility synonyms

5807 # fmt: off

5808 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5809 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5810 # fmt: on

5811

5812

5813class TokenConverter(ParseElementEnhance):

5814 """

5815 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

5816 """

5817

5818 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

5819 super().__init__(expr) # , savelist)

5820 self.saveAsList = False

5821

5822

5823class Combine(TokenConverter):

5824 """Converter to concatenate all matching tokens to a single string.

5825 By default, the matching patterns must also be contiguous in the

5826 input string; this can be disabled by specifying

5827 ``'adjacent=False'`` in the constructor.

5828

5829 Example::

5830

5831 real = Word(nums) + '.' + Word(nums)

5832 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5833 # will also erroneously match the following

5834 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5835

5836 real = Combine(Word(nums) + '.' + Word(nums))

5837 print(real.parse_string('3.1416')) # -> ['3.1416']

5838 # no match when there are internal spaces

5839 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5840 """

5841

5842 def __init__(

5843 self,

5844 expr: ParserElement,

5845 join_string: str = "",

5846 adjacent: bool = True,

5847 *,

5848 joinString: typing.Optional[str] = None,

5849 ) -> None:

5850 super().__init__(expr)

5851 joinString = joinString if joinString is not None else join_string

5852 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5853 if adjacent:

5854 self.leave_whitespace()

5855 self.adjacent = adjacent

5856 self.skipWhitespace = True

5857 self.joinString = joinString

5858 self.callPreparse = True

5859

5860 def ignore(self, other) -> ParserElement:

5861 if self.adjacent:

5862 ParserElement.ignore(self, other)

5863 else:

5864 super().ignore(other)

5865 return self

5866

5867 def postParse(self, instring, loc, tokenlist):

5868 retToks = tokenlist.copy()

5869 del retToks[:]

5870 retToks += ParseResults(

5871 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5872 )

5873

5874 if self.resultsName and retToks.haskeys():

5875 return [retToks]

5876 else:

5877 return retToks

5878

5879

5880class Group(TokenConverter):

5881 """Converter to return the matched tokens as a list - useful for

5882 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5883

5884 The optional ``aslist`` argument when set to True will return the

5885 parsed tokens as a Python list instead of a pyparsing ParseResults.

5886

5887 Example::

5888

5889 ident = Word(alphas)

5890 num = Word(nums)

5891 term = ident | num

5892 func = ident + Opt(DelimitedList(term))

5893 print(func.parse_string("fn a, b, 100"))

5894 # -> ['fn', 'a', 'b', '100']

5895

5896 func = ident + Group(Opt(DelimitedList(term)))

5897 print(func.parse_string("fn a, b, 100"))

5898 # -> ['fn', ['a', 'b', '100']]

5899 """

5900

5901 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

5902 super().__init__(expr)

5903 self.saveAsList = True

5904 self._asPythonList = aslist

5905

5906 def postParse(self, instring, loc, tokenlist):

5907 if self._asPythonList:

5908 return ParseResults.List(

5909 tokenlist.asList()

5910 if isinstance(tokenlist, ParseResults)

5911 else list(tokenlist)

5912 )

5913

5914 return [tokenlist]

5915

5916

5917class Dict(TokenConverter):

5918 """Converter to return a repetitive expression as a list, but also

5919 as a dictionary. Each element can also be referenced using the first

5920 token in the expression as its key. Useful for tabular report

5921 scraping when the first column can be used as a item key.

5922

5923 The optional ``asdict`` argument when set to True will return the

5924 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5925

5926 Example::

5927

5928 data_word = Word(alphas)

5929 label = data_word + FollowedBy(':')

5930

5931 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5932 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5933

5934 # print attributes as plain groups

5935 print(attr_expr[1, ...].parse_string(text).dump())

5936

5937 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5938 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5939 print(result.dump())

5940

5941 # access named fields as dict entries, or output as dict

5942 print(result['shape'])

5943 print(result.as_dict())

5944

5945 prints::

5946

5947 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5948 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5949 - color: 'light blue'

5950 - posn: 'upper left'

5951 - shape: 'SQUARE'

5952 - texture: 'burlap'

5953 SQUARE

5954 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5955

5956 See more examples at :class:`ParseResults` of accessing fields by results name.

5957 """

5958

5959 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

5960 super().__init__(expr)

5961 self.saveAsList = True

5962 self._asPythonDict = asdict

5963

5964 def postParse(self, instring, loc, tokenlist):

5965 for i, tok in enumerate(tokenlist):

5966 if len(tok) == 0:

5967 continue

5968

5969 ikey = tok[0]

5970 if isinstance(ikey, int):

5971 ikey = str(ikey).strip()

5972

5973 if len(tok) == 1:

5974 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5975

5976 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5977 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5978

5979 else:

5980 try:

5981 dictvalue = tok.copy() # ParseResults(i)

5982 except Exception:

5983 exc = TypeError(

5984 "could not extract dict values from parsed results"

5985 " - Dict expression must contain Grouped expressions"

5986 )

5987 raise exc from None

5988

5989 del dictvalue[0]

5990

5991 if len(dictvalue) != 1 or (

5992 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5993 ):

5994 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5995 else:

5996 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5997

5998 if self._asPythonDict:

5999 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6000

6001 return [tokenlist] if self.resultsName else tokenlist

6002

6003

6004class Suppress(TokenConverter):

6005 """Converter for ignoring the results of a parsed expression.

6006

6007 Example::

6008

6009 source = "a, b, c,d"

6010 wd = Word(alphas)

6011 wd_list1 = wd + (',' + wd)[...]

6012 print(wd_list1.parse_string(source))

6013

6014 # often, delimiters that are useful during parsing are just in the

6015 # way afterward - use Suppress to keep them out of the parsed output

6016 wd_list2 = wd + (Suppress(',') + wd)[...]

6017 print(wd_list2.parse_string(source))

6018

6019 # Skipped text (using '...') can be suppressed as well

6020 source = "lead in START relevant text END trailing text"

6021 start_marker = Keyword("START")

6022 end_marker = Keyword("END")

6023 find_body = Suppress(...) + start_marker + ... + end_marker

6024 print(find_body.parse_string(source)

6025

6026 prints::

6027

6028 ['a', ',', 'b', ',', 'c', ',', 'd']

6029 ['a', 'b', 'c', 'd']

6030 ['START', 'relevant text ', 'END']

6031

6032 (See also :class:`DelimitedList`.)

6033 """

6034

6035 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6036 if expr is ...:

6037 expr = _PendingSkip(NoMatch())

6038 super().__init__(expr)

6039

6040 def __add__(self, other) -> ParserElement:

6041 if isinstance(self.expr, _PendingSkip):

6042 return Suppress(SkipTo(other)) + other

6043

6044 return super().__add__(other)

6045

6046 def __sub__(self, other) -> ParserElement:

6047 if isinstance(self.expr, _PendingSkip):

6048 return Suppress(SkipTo(other)) - other

6049

6050 return super().__sub__(other)

6051

6052 def postParse(self, instring, loc, tokenlist):

6053 return []

6054

6055 def suppress(self) -> ParserElement:

6056 return self

6057

6058

6059def trace_parse_action(f: ParseAction) -> ParseAction:

6060 """Decorator for debugging parse actions.

6061

6062 When the parse action is called, this decorator will print

6063 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6064 When the parse action completes, the decorator will print

6065 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6066

6067 Example::

6068

6069 wd = Word(alphas)

6070

6071 @trace_parse_action

6072 def remove_duplicate_chars(tokens):

6073 return ''.join(sorted(set(''.join(tokens))))

6074

6075 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6076 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6077

6078 prints::

6079

6080 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6081 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6082 ['dfjkls']

6083 """

6084 f = _trim_arity(f)

6085

6086 def z(*paArgs):

6087 thisFunc = f.__name__

6088 s, l, t = paArgs[-3:]

6089 if len(paArgs) > 3:

6090 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6091 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6092 try:

6093 ret = f(*paArgs)

6094 except Exception as exc:

6095 sys.stderr.write(

6096 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6097 )

6098 raise

6099 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6100 return ret

6101

6102 z.__name__ = f.__name__

6103 return z

6104

6105

6106# convenience constants for positional expressions

6107empty = Empty().set_name("empty")

6108line_start = LineStart().set_name("line_start")

6109line_end = LineEnd().set_name("line_end")

6110string_start = StringStart().set_name("string_start")

6111string_end = StringEnd().set_name("string_end")

6112

6113_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6114 lambda s, l, t: t[0][1]

6115)

6116_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6117 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6118)

6119_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6120 lambda s, l, t: chr(int(t[0][1:], 8))

6121)

6122_singleChar = (

6123 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6124)

6125_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6126_reBracketExpr = (

6127 Literal("[")

6128 + Opt("^").set_results_name("negate")

6129 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6130 + Literal("]")

6131)

6132

6133

6134def srange(s: str) -> str:

6135 r"""Helper to easily define string ranges for use in :class:`Word`

6136 construction. Borrows syntax from regexp ``'[]'`` string range

6137 definitions::

6138

6139 srange("[0-9]") -> "0123456789"

6140 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6141 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6142

6143 The input string must be enclosed in []'s, and the returned string

6144 is the expanded character set joined into a single string. The

6145 values enclosed in the []'s may be:

6146

6147 - a single character

6148 - an escaped character with a leading backslash (such as ``\-``

6149 or ``\]``)

6150 - an escaped hex character with a leading ``'\x'``

6151 (``\x21``, which is a ``'!'`` character) (``\0x##``

6152 is also supported for backwards compatibility)

6153 - an escaped octal character with a leading ``'\0'``

6154 (``\041``, which is a ``'!'`` character)

6155 - a range of any of the above, separated by a dash (``'a-z'``,

6156 etc.)

6157 - any combination of the above (``'aeiouy'``,

6158 ``'a-zA-Z0-9_$'``, etc.)

6159 """

6160

6161 def _expanded(p):

6162 if isinstance(p, ParseResults):

6163 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6164 else:

6165 yield p

6166

6167 try:

6168 return "".join(

6169 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6170 )

6171 except Exception as e:

6172 return ""

6173

6174

6175def token_map(func, *args) -> ParseAction:

6176 """Helper to define a parse action by mapping a function to all

6177 elements of a :class:`ParseResults` list. If any additional args are passed,

6178 they are forwarded to the given function as additional arguments

6179 after the token, as in

6180 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6181 which will convert the parsed data to an integer using base 16.

6182

6183 Example (compare the last to example in :class:`ParserElement.transform_string`::

6184

6185 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6186 hex_ints.run_tests('''

6187 00 11 22 aa FF 0a 0d 1a

6188 ''')

6189

6190 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6191 upperword[1, ...].run_tests('''

6192 my kingdom for a horse

6193 ''')

6194

6195 wd = Word(alphas).set_parse_action(token_map(str.title))

6196 wd[1, ...].set_parse_action(' '.join).run_tests('''

6197 now is the winter of our discontent made glorious summer by this sun of york

6198 ''')

6199

6200 prints::

6201

6202 00 11 22 aa FF 0a 0d 1a

6203 [0, 17, 34, 170, 255, 10, 13, 26]

6204

6205 my kingdom for a horse

6206 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6207

6208 now is the winter of our discontent made glorious summer by this sun of york

6209 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6210 """

6211

6212 def pa(s, l, t):

6213 return [func(tokn, *args) for tokn in t]

6214

6215 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6216 pa.__name__ = func_name

6217

6218 return pa

6219

6220

6221def autoname_elements() -> None:

6222 """

6223 Utility to simplify mass-naming of parser elements, for

6224 generating railroad diagram with named subdiagrams.

6225 """

6226

6227 # guard against _getframe not being implemented in the current Python

6228 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6229 calling_frame = getframe_fn(1)

6230 if calling_frame is None:

6231 return

6232

6233 # find all locals in the calling frame that are ParserElements

6234 calling_frame = typing.cast(types.FrameType, calling_frame)

6235 for name, var in calling_frame.f_locals.items():

6236 # if no custom name defined, set the name to the var name

6237 if isinstance(var, ParserElement) and not var.customName:

6238 var.set_name(name)

6239

6240

6241dbl_quoted_string = Combine(

6242 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6243).set_name("string enclosed in double quotes")

6244

6245sgl_quoted_string = Combine(

6246 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6247).set_name("string enclosed in single quotes")

6248

6249quoted_string = Combine(

6250 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6251 "double quoted string"

6252 )

6253 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6254 "single quoted string"

6255 )

6256).set_name("quoted string using single or double quotes")

6257

6258python_quoted_string = Combine(

6259 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6260 "multiline double quoted string"

6261 )

6262 ^ (

6263 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6264 ).set_name("multiline single quoted string")

6265 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6266 "double quoted string"

6267 )

6268 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6269 "single quoted string"

6270 )

6271).set_name("Python quoted string")

6272

6273unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6274

6275

6276alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6277punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6278

6279# build list of built-in expressions, for future reference if a global default value

6280# gets updated

6281_builtin_exprs: list[ParserElement] = [

6282 v for v in vars().values() if isinstance(v, ParserElement)

6283]

6284

6285# Compatibility synonyms

6286# fmt: off

6287sglQuotedString = sgl_quoted_string

6288dblQuotedString = dbl_quoted_string

6289quotedString = quoted_string

6290unicodeString = unicode_string

6291lineStart = line_start

6292lineEnd = line_end

6293stringStart = string_start

6294stringEnd = string_end

6295nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6296traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6297conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6298tokenMap = replaced_by_pep8("tokenMap", token_map)

6299# fmt: on