Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyparsing/core.py: 44%

1837 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1838

1839 - ``exception_action`` - method to be called when expression fails to parse;

1840 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1841 """

1842 self.debugActions = self.DebugActions(

1843 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1844 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1845 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1846 )

1847 self.debug = True

1848 return self

1849

1850 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

1851 """

1852 Enable display of debugging messages while doing pattern matching.

1853 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1854 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1855

1856 Example::

1857

1858 wd = Word(alphas).set_name("alphaword")

1859 integer = Word(nums).set_name("numword")

1860 term = wd | integer

1861

1862 # turn on debugging for wd

1863 wd.set_debug()

1864

1865 term[1, ...].parse_string("abc 123 xyz 890")

1866

1867 prints::

1868

1869 Match alphaword at loc 0(1,1)

1870 Matched alphaword -> ['abc']

1871 Match alphaword at loc 3(1,4)

1872 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1873 Match alphaword at loc 7(1,8)

1874 Matched alphaword -> ['xyz']

1875 Match alphaword at loc 11(1,12)

1876 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1877 Match alphaword at loc 15(1,16)

1878 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1879

1880 The output shown is that produced by the default debug actions - custom debug actions can be

1881 specified using :class:`set_debug_actions`. Prior to attempting

1882 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1883 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1884 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1885 which makes debugging and exception messages easier to understand - for instance, the default

1886 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1887 """

1888 if recurse:

1889 for expr in self.visit_all():

1890 expr.set_debug(flag, recurse=False)

1891 return self

1892

1893 if flag:

1894 self.set_debug_actions(

1895 _default_start_debug_action,

1896 _default_success_debug_action,

1897 _default_exception_debug_action,

1898 )

1899 else:

1900 self.debug = False

1901 return self

1902

1903 @property

1904 def default_name(self) -> str:

1905 if self._defaultName is None:

1906 self._defaultName = self._generateDefaultName()

1907 return self._defaultName

1908

1909 @abstractmethod

1910 def _generateDefaultName(self) -> str:

1911 """

1912 Child classes must define this method, which defines how the ``default_name`` is set.

1913 """

1914

1915 def set_name(self, name: typing.Optional[str]) -> ParserElement:

1916 """

1917 Define name for this expression, makes debugging and exception messages clearer. If

1918 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

1919 enable debug for this expression.

1920

1921 If `name` is None, clears any custom name for this expression, and clears the

1922 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

1923

1924 Example::

1925

1926 integer = Word(nums)

1927 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1928

1929 integer.set_name("integer")

1930 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1931 """

1932 self.customName = name # type: ignore[assignment]

1933 self.errmsg = f"Expected {str(self)}"

1934

1935 if __diag__.enable_debug_on_named_expressions:

1936 self.set_debug(name is not None)

1937

1938 return self

1939

1940 @property

1941 def name(self) -> str:

1942 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1943 return self.customName if self.customName is not None else self.default_name

1944

1945 @name.setter

1946 def name(self, new_name) -> None:

1947 self.set_name(new_name)

1948

1949 def __str__(self) -> str:

1950 return self.name

1951

1952 def __repr__(self) -> str:

1953 return str(self)

1954

1955 def streamline(self) -> ParserElement:

1956 self.streamlined = True

1957 self._defaultName = None

1958 return self

1959

1960 def recurse(self) -> list[ParserElement]:

1961 return []

1962

1963 def _checkRecursion(self, parseElementList):

1964 subRecCheckList = parseElementList[:] + [self]

1965 for e in self.recurse():

1966 e._checkRecursion(subRecCheckList)

1967

1968 def validate(self, validateTrace=None) -> None:

1969 """

1970 Check defined expressions for valid structure, check for infinite recursive definitions.

1971 """

1972 warnings.warn(

1973 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

1974 DeprecationWarning,

1975 stacklevel=2,

1976 )

1977 self._checkRecursion([])

1978

1979 def parse_file(

1980 self,

1981 file_or_filename: Union[str, Path, TextIO],

1982 encoding: str = "utf-8",

1983 parse_all: bool = False,

1984 *,

1985 parseAll: bool = False,

1986 ) -> ParseResults:

1987 """

1988 Execute the parse expression on the given file or filename.

1989 If a filename is specified (instead of a file object),

1990 the entire file is opened, read, and closed before parsing.

1991 """

1992 parseAll = parseAll or parse_all

1993 try:

1994 file_or_filename = typing.cast(TextIO, file_or_filename)

1995 file_contents = file_or_filename.read()

1996 except AttributeError:

1997 file_or_filename = typing.cast(str, file_or_filename)

1998 with open(file_or_filename, "r", encoding=encoding) as f:

1999 file_contents = f.read()

2000 try:

2001 return self.parse_string(file_contents, parseAll)

2002 except ParseBaseException as exc:

2003 if ParserElement.verbose_stacktrace:

2004 raise

2005

2006 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2007 raise exc.with_traceback(None)

2008

2009 def __eq__(self, other):

2010 if self is other:

2011 return True

2012 elif isinstance(other, str_type):

2013 return self.matches(other, parse_all=True)

2014 elif isinstance(other, ParserElement):

2015 return vars(self) == vars(other)

2016 return False

2017

2018 def __hash__(self):

2019 return id(self)

2020

2021 def matches(

2022 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

2023 ) -> bool:

2024 """

2025 Method for quick testing of a parser against a test string. Good for simple

2026 inline microtests of sub expressions while building up larger parser.

2027

2028 Parameters:

2029

2030 - ``test_string`` - to test against this expression for a match

2031 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2032

2033 Example::

2034

2035 expr = Word(nums)

2036 assert expr.matches("100")

2037 """

2038 parseAll = parseAll and parse_all

2039 try:

2040 self.parse_string(str(test_string), parse_all=parseAll)

2041 return True

2042 except ParseBaseException:

2043 return False

2044

2045 def run_tests(

2046 self,

2047 tests: Union[str, list[str]],

2048 parse_all: bool = True,

2049 comment: typing.Optional[Union[ParserElement, str]] = "#",

2050 full_dump: bool = True,

2051 print_results: bool = True,

2052 failure_tests: bool = False,

2053 post_parse: typing.Optional[

2054 Callable[[str, ParseResults], typing.Optional[str]]

2055 ] = None,

2056 file: typing.Optional[TextIO] = None,

2057 with_line_numbers: bool = False,

2058 *,

2059 parseAll: bool = True,

2060 fullDump: bool = True,

2061 printResults: bool = True,

2062 failureTests: bool = False,

2063 postParse: typing.Optional[

2064 Callable[[str, ParseResults], typing.Optional[str]]

2065 ] = None,

2066 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2067 """

2068 Execute the parse expression on a series of test strings, showing each

2069 test, the parsed results or where the parse failed. Quick and easy way to

2070 run a parse expression against a list of sample strings.

2071

2072 Parameters:

2073

2074 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2075 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2076 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2077 string; pass None to disable comment filtering

2078 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2079 if False, only dump nested list

2080 - ``print_results`` - (default= ``True``) prints test output to stdout

2081 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2082 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2083 `fn(test_string, parse_results)` and returns a string to be added to the test output

2084 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2085 if None, will default to ``sys.stdout``

2086 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2087

2088 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2089 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2090 test's output

2091

2092 Example::

2093

2094 number_expr = pyparsing_common.number.copy()

2095

2096 result = number_expr.run_tests('''

2097 # unsigned integer

2098 100

2099 # negative integer

2100 -100

2101 # float with scientific notation

2102 6.02e23

2103 # integer with scientific notation

2104 1e-12

2105 ''')

2106 print("Success" if result[0] else "Failed!")

2107

2108 result = number_expr.run_tests('''

2109 # stray character

2110 100Z

2111 # missing leading digit before '.'

2112 -.100

2113 # too many '.'

2114 3.14.159

2115 ''', failure_tests=True)

2116 print("Success" if result[0] else "Failed!")

2117

2118 prints::

2119

2120 # unsigned integer

2121 100

2122 [100]

2123

2124 # negative integer

2125 -100

2126 [-100]

2127

2128 # float with scientific notation

2129 6.02e23

2130 [6.02e+23]

2131

2132 # integer with scientific notation

2133 1e-12

2134 [1e-12]

2135

2136 Success

2137

2138 # stray character

2139 100Z

2140 ^

2141 FAIL: Expected end of text (at char 3), (line:1, col:4)

2142

2143 # missing leading digit before '.'

2144 -.100

2145 ^

2146 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2147

2148 # too many '.'

2149 3.14.159

2150 ^

2151 FAIL: Expected end of text (at char 4), (line:1, col:5)

2152

2153 Success

2154

2155 Each test string must be on a single line. If you want to test a string that spans multiple

2156 lines, create a test like this::

2157

2158 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2159

2160 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2161 """

2162 from .testing import pyparsing_test

2163

2164 parseAll = parseAll and parse_all

2165 fullDump = fullDump and full_dump

2166 printResults = printResults and print_results

2167 failureTests = failureTests or failure_tests

2168 postParse = postParse or post_parse

2169 if isinstance(tests, str_type):

2170 tests = typing.cast(str, tests)

2171 line_strip = type(tests).strip

2172 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2173 comment_specified = comment is not None

2174 if comment_specified:

2175 if isinstance(comment, str_type):

2176 comment = typing.cast(str, comment)

2177 comment = Literal(comment)

2178 comment = typing.cast(ParserElement, comment)

2179 if file is None:

2180 file = sys.stdout

2181 print_ = file.write

2182

2183 result: Union[ParseResults, Exception]

2184 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2185 comments: list[str] = []

2186 success = True

2187 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2188 BOM = "\ufeff"

2189 nlstr = "\n"

2190 for t in tests:

2191 if comment_specified and comment.matches(t, False) or comments and not t:

2192 comments.append(

2193 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2194 )

2195 continue

2196 if not t:

2197 continue

2198 out = [

2199 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2201 ]

2202 comments.clear()

2203 try:

2204 # convert newline marks to actual newlines, and strip leading BOM if present

2205 t = NL.transform_string(t.lstrip(BOM))

2206 result = self.parse_string(t, parse_all=parseAll)

2207 except ParseBaseException as pe:

2208 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2209 out.append(pe.explain())

2210 out.append(f"FAIL: {fatal}{pe}")

2211 if ParserElement.verbose_stacktrace:

2212 out.extend(traceback.format_tb(pe.__traceback__))

2213 success = success and failureTests

2214 result = pe

2215 except Exception as exc:

2216 tag = "FAIL-EXCEPTION"

2217

2218 # see if this exception was raised in a parse action

2219 tb = exc.__traceback__

2220 it = iter(traceback.walk_tb(tb))

2221 for f, line in it:

2222 if (f.f_code.co_filename, line) == pa_call_line_synth:

2223 next_f = next(it)[0]

2224 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2225 break

2226

2227 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2228 if ParserElement.verbose_stacktrace:

2229 out.extend(traceback.format_tb(exc.__traceback__))

2230 success = success and failureTests

2231 result = exc

2232 else:

2233 success = success and not failureTests

2234 if postParse is not None:

2235 try:

2236 pp_value = postParse(t, result)

2237 if pp_value is not None:

2238 if isinstance(pp_value, ParseResults):

2239 out.append(pp_value.dump())

2240 else:

2241 out.append(str(pp_value))

2242 else:

2243 out.append(result.dump())

2244 except Exception as e:

2245 out.append(result.dump(full=fullDump))

2246 out.append(

2247 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2248 )

2249 else:

2250 out.append(result.dump(full=fullDump))

2251 out.append("")

2252

2253 if printResults:

2254 print_("\n".join(out))

2255

2256 allResults.append((t, result))

2257

2258 return success, allResults

2259

2260 def create_diagram(

2261 self,

2262 output_html: Union[TextIO, Path, str],

2263 vertical: int = 3,

2264 show_results_names: bool = False,

2265 show_groups: bool = False,

2266 embed: bool = False,

2267 **kwargs,

2268 ) -> None:

2269 """

2270 Create a railroad diagram for the parser.

2271

2272 Parameters:

2273

2274 - ``output_html`` (str or file-like object) - output target for generated

2275 diagram HTML

2276 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2277 instead of horizontally (default=3)

2278 - ``show_results_names`` - bool flag whether diagram should show annotations for

2279 defined results names

2280 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2281 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2282 the resulting HTML in an enclosing HTML source

2283 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2284 can be used to insert custom CSS styling

2285 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2286 generated code

2287

2288 Additional diagram-formatting keyword arguments can also be included;

2289 see railroad.Diagram class.

2290 """

2291

2292 try:

2293 from .diagram import to_railroad, railroad_to_html

2294 except ImportError as ie:

2295 raise Exception(

2296 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2297 ) from ie

2298

2299 self.streamline()

2300

2301 railroad = to_railroad(

2302 self,

2303 vertical=vertical,

2304 show_results_names=show_results_names,

2305 show_groups=show_groups,

2306 diagram_kwargs=kwargs,

2307 )

2308 if not isinstance(output_html, (str, Path)):

2309 # we were passed a file-like object, just write to it

2310 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2311 return

2312

2313 with open(output_html, "w", encoding="utf-8") as diag_file:

2314 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2315

2316 # Compatibility synonyms

2317 # fmt: off

2318 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2319 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2320 "setDefaultWhitespaceChars", set_default_whitespace_chars

2321 ))

2322 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2323 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2324 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2325 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2326

2327 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2328 setBreak = replaced_by_pep8("setBreak", set_break)

2329 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2330 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2331 addCondition = replaced_by_pep8("addCondition", add_condition)

2332 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2333 tryParse = replaced_by_pep8("tryParse", try_parse)

2334 parseString = replaced_by_pep8("parseString", parse_string)

2335 scanString = replaced_by_pep8("scanString", scan_string)

2336 transformString = replaced_by_pep8("transformString", transform_string)

2337 searchString = replaced_by_pep8("searchString", search_string)

2338 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2339 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2340 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2341 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2342 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2343 setDebug = replaced_by_pep8("setDebug", set_debug)

2344 setName = replaced_by_pep8("setName", set_name)

2345 parseFile = replaced_by_pep8("parseFile", parse_file)

2346 runTests = replaced_by_pep8("runTests", run_tests)

2347 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2348 defaultName = default_name

2349 # fmt: on

2350

2351

2352class _PendingSkip(ParserElement):

2353 # internal placeholder class to hold a place were '...' is added to a parser element,

2354 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2355 def __init__(self, expr: ParserElement, must_skip: bool = False):

2356 super().__init__()

2357 self.anchor = expr

2358 self.must_skip = must_skip

2359

2360 def _generateDefaultName(self) -> str:

2361 return str(self.anchor + Empty()).replace("Empty", "...")

2362

2363 def __add__(self, other) -> ParserElement:

2364 skipper = SkipTo(other).set_name("...")("_skipped*")

2365 if self.must_skip:

2366

2367 def must_skip(t):

2368 if not t._skipped or t._skipped.as_list() == [""]:

2369 del t[0]

2370 t.pop("_skipped", None)

2371

2372 def show_skip(t):

2373 if t._skipped.as_list()[-1:] == [""]:

2374 t.pop("_skipped")

2375 t["_skipped"] = f"missing <{self.anchor!r}>"

2376

2377 return (

2378 self.anchor + skipper().add_parse_action(must_skip)

2379 | skipper().add_parse_action(show_skip)

2380 ) + other

2381

2382 return self.anchor + skipper + other

2383

2384 def __repr__(self):

2385 return self.defaultName

2386

2387 def parseImpl(self, *args) -> ParseImplReturnType:

2388 raise Exception(

2389 "use of `...` expression without following SkipTo target expression"

2390 )

2391

2392

2393class Token(ParserElement):

2394 """Abstract :class:`ParserElement` subclass, for defining atomic

2395 matching patterns.

2396 """

2397

2398 def __init__(self):

2399 super().__init__(savelist=False)

2400

2401 def _generateDefaultName(self) -> str:

2402 return type(self).__name__

2403

2404

2405class NoMatch(Token):

2406 """

2407 A token that will never match.

2408 """

2409

2410 def __init__(self):

2411 super().__init__()

2412 self.mayReturnEmpty = True

2413 self.mayIndexError = False

2414 self.errmsg = "Unmatchable token"

2415

2416 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2417 raise ParseException(instring, loc, self.errmsg, self)

2418

2419

2420class Literal(Token):

2421 """

2422 Token to exactly match a specified string.

2423

2424 Example::

2425

2426 Literal('abc').parse_string('abc') # -> ['abc']

2427 Literal('abc').parse_string('abcdef') # -> ['abc']

2428 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2429

2430 For case-insensitive matching, use :class:`CaselessLiteral`.

2431

2432 For keyword matching (force word break before and after the matched string),

2433 use :class:`Keyword` or :class:`CaselessKeyword`.

2434 """

2435

2436 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2437 # Performance tuning: select a subclass with optimized parseImpl

2438 if cls is Literal:

2439 match_string = matchString or match_string

2440 if not match_string:

2441 return super().__new__(Empty)

2442 if len(match_string) == 1:

2443 return super().__new__(_SingleCharLiteral)

2444

2445 # Default behavior

2446 return super().__new__(cls)

2447

2448 # Needed to make copy.copy() work correctly if we customize __new__

2449 def __getnewargs__(self):

2450 return (self.match,)

2451

2452 def __init__(self, match_string: str = "", *, matchString: str = ""):

2453 super().__init__()

2454 match_string = matchString or match_string

2455 self.match = match_string

2456 self.matchLen = len(match_string)

2457 self.firstMatchChar = match_string[:1]

2458 self.errmsg = f"Expected {self.name}"

2459 self.mayReturnEmpty = False

2460 self.mayIndexError = False

2461

2462 def _generateDefaultName(self) -> str:

2463 return repr(self.match)

2464

2465 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2466 if instring[loc] == self.firstMatchChar and instring.startswith(

2467 self.match, loc

2468 ):

2469 return loc + self.matchLen, self.match

2470 raise ParseException(instring, loc, self.errmsg, self)

2471

2472

2473class Empty(Literal):

2474 """

2475 An empty token, will always match.

2476 """

2477

2478 def __init__(self, match_string="", *, matchString=""):

2479 super().__init__("")

2480 self.mayReturnEmpty = True

2481 self.mayIndexError = False

2482

2483 def _generateDefaultName(self) -> str:

2484 return "Empty"

2485

2486 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2487 return loc, []

2488

2489

2490class _SingleCharLiteral(Literal):

2491 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2492 if instring[loc] == self.firstMatchChar:

2493 return loc + 1, self.match

2494 raise ParseException(instring, loc, self.errmsg, self)

2495

2496

2497ParserElement._literalStringClass = Literal

2498

2499

2500class Keyword(Token):

2501 """

2502 Token to exactly match a specified string as a keyword, that is,

2503 it must be immediately preceded and followed by whitespace or

2504 non-keyword characters. Compare with :class:`Literal`:

2505

2506 - ``Literal("if")`` will match the leading ``'if'`` in

2507 ``'ifAndOnlyIf'``.

2508 - ``Keyword("if")`` will not; it will only match the leading

2509 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2510

2511 Accepts two optional constructor arguments in addition to the

2512 keyword string:

2513

2514 - ``ident_chars`` is a string of characters that would be valid

2515 identifier characters, defaulting to all alphanumerics + "_" and

2516 "$"

2517 - ``caseless`` allows case-insensitive matching, default is ``False``.

2518

2519 Example::

2520

2521 Keyword("start").parse_string("start") # -> ['start']

2522 Keyword("start").parse_string("starting") # -> Exception

2523

2524 For case-insensitive matching, use :class:`CaselessKeyword`.

2525 """

2526

2527 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2528

2529 def __init__(

2530 self,

2531 match_string: str = "",

2532 ident_chars: typing.Optional[str] = None,

2533 caseless: bool = False,

2534 *,

2535 matchString: str = "",

2536 identChars: typing.Optional[str] = None,

2537 ):

2538 super().__init__()

2539 identChars = identChars or ident_chars

2540 if identChars is None:

2541 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2542 match_string = matchString or match_string

2543 self.match = match_string

2544 self.matchLen = len(match_string)

2545 self.firstMatchChar = match_string[:1]

2546 if not self.firstMatchChar:

2547 raise ValueError("null string passed to Keyword; use Empty() instead")

2548 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2549 self.mayReturnEmpty = False

2550 self.mayIndexError = False

2551 self.caseless = caseless

2552 if caseless:

2553 self.caselessmatch = match_string.upper()

2554 identChars = identChars.upper()

2555 self.identChars = set(identChars)

2556

2557 def _generateDefaultName(self) -> str:

2558 return repr(self.match)

2559

2560 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2561 errmsg = self.errmsg or ""

2562 errloc = loc

2563 if self.caseless:

2564 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2565 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2566 if (

2567 loc >= len(instring) - self.matchLen

2568 or instring[loc + self.matchLen].upper() not in self.identChars

2569 ):

2570 return loc + self.matchLen, self.match

2571

2572 # followed by keyword char

2573 errmsg += ", was immediately followed by keyword character"

2574 errloc = loc + self.matchLen

2575 else:

2576 # preceded by keyword char

2577 errmsg += ", keyword was immediately preceded by keyword character"

2578 errloc = loc - 1

2579 # else no match just raise plain exception

2580

2581 elif (

2582 instring[loc] == self.firstMatchChar

2583 and self.matchLen == 1

2584 or instring.startswith(self.match, loc)

2585 ):

2586 if loc == 0 or instring[loc - 1] not in self.identChars:

2587 if (

2588 loc >= len(instring) - self.matchLen

2589 or instring[loc + self.matchLen] not in self.identChars

2590 ):

2591 return loc + self.matchLen, self.match

2592

2593 # followed by keyword char

2594 errmsg += ", keyword was immediately followed by keyword character"

2595 errloc = loc + self.matchLen

2596 else:

2597 # preceded by keyword char

2598 errmsg += ", keyword was immediately preceded by keyword character"

2599 errloc = loc - 1

2600 # else no match just raise plain exception

2601

2602 raise ParseException(instring, errloc, errmsg, self)

2603

2604 @staticmethod

2605 def set_default_keyword_chars(chars) -> None:

2606 """

2607 Overrides the default characters used by :class:`Keyword` expressions.

2608 """

2609 Keyword.DEFAULT_KEYWORD_CHARS = chars

2610

2611 # Compatibility synonyms

2612 setDefaultKeywordChars = staticmethod(

2613 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2614 )

2615

2616

2617class CaselessLiteral(Literal):

2618 """

2619 Token to match a specified string, ignoring case of letters.

2620 Note: the matched results will always be in the case of the given

2621 match string, NOT the case of the input text.

2622

2623 Example::

2624

2625 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2626 # -> ['CMD', 'CMD', 'CMD']

2627

2628 (Contrast with example for :class:`CaselessKeyword`.)

2629 """

2630

2631 def __init__(self, match_string: str = "", *, matchString: str = ""):

2632 match_string = matchString or match_string

2633 super().__init__(match_string.upper())

2634 # Preserve the defining literal.

2635 self.returnString = match_string

2636 self.errmsg = f"Expected {self.name}"

2637

2638 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2639 if instring[loc : loc + self.matchLen].upper() == self.match:

2640 return loc + self.matchLen, self.returnString

2641 raise ParseException(instring, loc, self.errmsg, self)

2642

2643

2644class CaselessKeyword(Keyword):

2645 """

2646 Caseless version of :class:`Keyword`.

2647

2648 Example::

2649

2650 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2651 # -> ['CMD', 'CMD']

2652

2653 (Contrast with example for :class:`CaselessLiteral`.)

2654 """

2655

2656 def __init__(

2657 self,

2658 match_string: str = "",

2659 ident_chars: typing.Optional[str] = None,

2660 *,

2661 matchString: str = "",

2662 identChars: typing.Optional[str] = None,

2663 ):

2664 identChars = identChars or ident_chars

2665 match_string = matchString or match_string

2666 super().__init__(match_string, identChars, caseless=True)

2667

2668

2669class CloseMatch(Token):

2670 """A variation on :class:`Literal` which matches "close" matches,

2671 that is, strings with at most 'n' mismatching characters.

2672 :class:`CloseMatch` takes parameters:

2673

2674 - ``match_string`` - string to be matched

2675 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2676 - ``max_mismatches`` - (``default=1``) maximum number of

2677 mismatches allowed to count as a match

2678

2679 The results from a successful parse will contain the matched text

2680 from the input string and the following named results:

2681

2682 - ``mismatches`` - a list of the positions within the

2683 match_string where mismatches were found

2684 - ``original`` - the original match_string used to compare

2685 against the input string

2686

2687 If ``mismatches`` is an empty list, then the match was an exact

2688 match.

2689

2690 Example::

2691

2692 patt = CloseMatch("ATCATCGAATGGA")

2693 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2694 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2695

2696 # exact match

2697 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2698

2699 # close match allowing up to 2 mismatches

2700 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2701 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2702 """

2703

2704 def __init__(

2705 self,

2706 match_string: str,

2707 max_mismatches: typing.Optional[int] = None,

2708 *,

2709 maxMismatches: int = 1,

2710 caseless=False,

2711 ):

2712 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2713 super().__init__()

2714 self.match_string = match_string

2715 self.maxMismatches = maxMismatches

2716 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2717 self.caseless = caseless

2718 self.mayIndexError = False

2719 self.mayReturnEmpty = False

2720

2721 def _generateDefaultName(self) -> str:

2722 return f"{type(self).__name__}:{self.match_string!r}"

2723

2724 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2725 start = loc

2726 instrlen = len(instring)

2727 maxloc = start + len(self.match_string)

2728

2729 if maxloc <= instrlen:

2730 match_string = self.match_string

2731 match_stringloc = 0

2732 mismatches = []

2733 maxMismatches = self.maxMismatches

2734

2735 for match_stringloc, s_m in enumerate(

2736 zip(instring[loc:maxloc], match_string)

2737 ):

2738 src, mat = s_m

2739 if self.caseless:

2740 src, mat = src.lower(), mat.lower()

2741

2742 if src != mat:

2743 mismatches.append(match_stringloc)

2744 if len(mismatches) > maxMismatches:

2745 break

2746 else:

2747 loc = start + match_stringloc + 1

2748 results = ParseResults([instring[start:loc]])

2749 results["original"] = match_string

2750 results["mismatches"] = mismatches

2751 return loc, results

2752

2753 raise ParseException(instring, loc, self.errmsg, self)

2754

2755

2756class Word(Token):

2757 """Token for matching words composed of allowed character sets.

2758

2759 Parameters:

2760

2761 - ``init_chars`` - string of all characters that should be used to

2762 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2763 if ``body_chars`` is also specified, then this is the string of

2764 initial characters

2765 - ``body_chars`` - string of characters that

2766 can be used for matching after a matched initial character as

2767 given in ``init_chars``; if omitted, same as the initial characters

2768 (default=``None``)

2769 - ``min`` - minimum number of characters to match (default=1)

2770 - ``max`` - maximum number of characters to match (default=0)

2771 - ``exact`` - exact number of characters to match (default=0)

2772 - ``as_keyword`` - match as a keyword (default=``False``)

2773 - ``exclude_chars`` - characters that might be

2774 found in the input ``body_chars`` string but which should not be

2775 accepted for matching ;useful to define a word of all

2776 printables except for one or two characters, for instance

2777 (default=``None``)

2778

2779 :class:`srange` is useful for defining custom character set strings

2780 for defining :class:`Word` expressions, using range notation from

2781 regular expression character sets.

2782

2783 A common mistake is to use :class:`Word` to match a specific literal

2784 string, as in ``Word("Address")``. Remember that :class:`Word`

2785 uses the string argument to define *sets* of matchable characters.

2786 This expression would match "Add", "AAA", "dAred", or any other word

2787 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2788 exact literal string, use :class:`Literal` or :class:`Keyword`.

2789

2790 pyparsing includes helper strings for building Words:

2791

2792 - :class:`alphas`

2793 - :class:`nums`

2794 - :class:`alphanums`

2795 - :class:`hexnums`

2796 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2797 - accented, tilded, umlauted, etc.)

2798 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2799 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2800 - :class:`printables` (any non-whitespace character)

2801

2802 ``alphas``, ``nums``, and ``printables`` are also defined in several

2803 Unicode sets - see :class:`pyparsing_unicode``.

2804

2805 Example::

2806

2807 # a word composed of digits

2808 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2809

2810 # a word with a leading capital, and zero or more lowercase

2811 capitalized_word = Word(alphas.upper(), alphas.lower())

2812

2813 # hostnames are alphanumeric, with leading alpha, and '-'

2814 hostname = Word(alphas, alphanums + '-')

2815

2816 # roman numeral (not a strict parser, accepts invalid mix of characters)

2817 roman = Word("IVXLCDM")

2818

2819 # any string of non-whitespace characters, except for ','

2820 csv_value = Word(printables, exclude_chars=",")

2821 """

2822

2823 def __init__(

2824 self,

2825 init_chars: str = "",

2826 body_chars: typing.Optional[str] = None,

2827 min: int = 1,

2828 max: int = 0,

2829 exact: int = 0,

2830 as_keyword: bool = False,

2831 exclude_chars: typing.Optional[str] = None,

2832 *,

2833 initChars: typing.Optional[str] = None,

2834 bodyChars: typing.Optional[str] = None,

2835 asKeyword: bool = False,

2836 excludeChars: typing.Optional[str] = None,

2837 ):

2838 initChars = initChars or init_chars

2839 bodyChars = bodyChars or body_chars

2840 asKeyword = asKeyword or as_keyword

2841 excludeChars = excludeChars or exclude_chars

2842 super().__init__()

2843 if not initChars:

2844 raise ValueError(

2845 f"invalid {type(self).__name__}, initChars cannot be empty string"

2846 )

2847

2848 initChars_set = set(initChars)

2849 if excludeChars:

2850 excludeChars_set = set(excludeChars)

2851 initChars_set -= excludeChars_set

2852 if bodyChars:

2853 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2854 self.initChars = initChars_set

2855 self.initCharsOrig = "".join(sorted(initChars_set))

2856

2857 if bodyChars:

2858 self.bodyChars = set(bodyChars)

2859 self.bodyCharsOrig = "".join(sorted(bodyChars))

2860 else:

2861 self.bodyChars = initChars_set

2862 self.bodyCharsOrig = self.initCharsOrig

2863

2864 self.maxSpecified = max > 0

2865

2866 if min < 1:

2867 raise ValueError(

2868 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2869 )

2870

2871 if self.maxSpecified and min > max:

2872 raise ValueError(

2873 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2874 )

2875

2876 self.minLen = min

2877

2878 if max > 0:

2879 self.maxLen = max

2880 else:

2881 self.maxLen = _MAX_INT

2882

2883 if exact > 0:

2884 min = max = exact

2885 self.maxLen = exact

2886 self.minLen = exact

2887

2888 self.errmsg = f"Expected {self.name}"

2889 self.mayIndexError = False

2890 self.asKeyword = asKeyword

2891 if self.asKeyword:

2892 self.errmsg += " as a keyword"

2893

2894 # see if we can make a regex for this Word

2895 if " " not in (self.initChars | self.bodyChars):

2896 if len(self.initChars) == 1:

2897 re_leading_fragment = re.escape(self.initCharsOrig)

2898 else:

2899 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2900

2901 if self.bodyChars == self.initChars:

2902 if max == 0 and self.minLen == 1:

2903 repeat = "+"

2904 elif max == 1:

2905 repeat = ""

2906 else:

2907 if self.minLen != self.maxLen:

2908 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2909 else:

2910 repeat = f"{{{self.minLen}}}"

2911 self.reString = f"{re_leading_fragment}{repeat}"

2912 else:

2913 if max == 1:

2914 re_body_fragment = ""

2915 repeat = ""

2916 else:

2917 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2918 if max == 0 and self.minLen == 1:

2919 repeat = "*"

2920 elif max == 2:

2921 repeat = "?" if min <= 1 else ""

2922 else:

2923 if min != max:

2924 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2925 else:

2926 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2927

2928 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2929

2930 if self.asKeyword:

2931 self.reString = rf"\b{self.reString}\b"

2932

2933 try:

2934 self.re = re.compile(self.reString)

2935 except re.error:

2936 self.re = None # type: ignore[assignment]

2937 else:

2938 self.re_match = self.re.match

2939 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

2940

2941 def _generateDefaultName(self) -> str:

2942 def charsAsStr(s):

2943 max_repr_len = 16

2944 s = _collapse_string_to_ranges(s, re_escape=False)

2945

2946 if len(s) > max_repr_len:

2947 return s[: max_repr_len - 3] + "..."

2948

2949 return s

2950

2951 if self.initChars != self.bodyChars:

2952 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2953 else:

2954 base = f"W:({charsAsStr(self.initChars)})"

2955

2956 # add length specification

2957 if self.minLen > 1 or self.maxLen != _MAX_INT:

2958 if self.minLen == self.maxLen:

2959 if self.minLen == 1:

2960 return base[2:]

2961 else:

2962 return base + f"{{{self.minLen}}}"

2963 elif self.maxLen == _MAX_INT:

2964 return base + f"{{{self.minLen},...}}"

2965 else:

2966 return base + f"{{{self.minLen},{self.maxLen}}}"

2967 return base

2968

2969 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2970 if instring[loc] not in self.initChars:

2971 raise ParseException(instring, loc, self.errmsg, self)

2972

2973 start = loc

2974 loc += 1

2975 instrlen = len(instring)

2976 body_chars: set[str] = self.bodyChars

2977 maxloc = start + self.maxLen

2978 maxloc = min(maxloc, instrlen)

2979 while loc < maxloc and instring[loc] in body_chars:

2980 loc += 1

2981

2982 throw_exception = False

2983 if loc - start < self.minLen:

2984 throw_exception = True

2985 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

2986 throw_exception = True

2987 elif self.asKeyword and (

2988 (start > 0 and instring[start - 1] in body_chars)

2989 or (loc < instrlen and instring[loc] in body_chars)

2990 ):

2991 throw_exception = True

2992

2993 if throw_exception:

2994 raise ParseException(instring, loc, self.errmsg, self)

2995

2996 return loc, instring[start:loc]

2997

2998 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2999 result = self.re_match(instring, loc)

3000 if not result:

3001 raise ParseException(instring, loc, self.errmsg, self)

3002

3003 loc = result.end()

3004 return loc, result.group()

3005

3006

3007class Char(Word):

3008 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3009 when defining a match of any single character in a string of

3010 characters.

3011 """

3012

3013 def __init__(

3014 self,

3015 charset: str,

3016 as_keyword: bool = False,

3017 exclude_chars: typing.Optional[str] = None,

3018 *,

3019 asKeyword: bool = False,

3020 excludeChars: typing.Optional[str] = None,

3021 ):

3022 asKeyword = asKeyword or as_keyword

3023 excludeChars = excludeChars or exclude_chars

3024 super().__init__(

3025 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3026 )

3027

3028

3029class Regex(Token):

3030 r"""Token for matching strings that match a given regular

3031 expression. Defined with string specifying the regular expression in

3032 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3033 If the given regex contains named groups (defined using ``(?P<name>...)``),

3034 these will be preserved as named :class:`ParseResults`.

3035

3036 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3037 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3038 a compiled RE that was compiled using ``regex``.

3039

3040 Example::

3041

3042 realnum = Regex(r"[+-]?\d+\.\d*")

3043 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3044 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3045

3046 # named fields in a regex will be returned as named results

3047 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3048

3049 # the Regex class will accept re's compiled using the regex module

3050 import regex

3051 parser = pp.Regex(regex.compile(r'[0-9]'))

3052 """

3053

3054 def __init__(

3055 self,

3056 pattern: Any,

3057 flags: Union[re.RegexFlag, int] = 0,

3058 as_group_list: bool = False,

3059 as_match: bool = False,

3060 *,

3061 asGroupList: bool = False,

3062 asMatch: bool = False,

3063 ):

3064 """The parameters ``pattern`` and ``flags`` are passed

3065 to the ``re.compile()`` function as-is. See the Python

3066 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3067 explanation of the acceptable patterns and flags.

3068 """

3069 super().__init__()

3070 asGroupList = asGroupList or as_group_list

3071 asMatch = asMatch or as_match

3072

3073 if isinstance(pattern, str_type):

3074 if not pattern:

3075 raise ValueError("null string passed to Regex; use Empty() instead")

3076

3077 self._re = None

3078 self.reString = self.pattern = pattern

3079

3080 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3081 self._re = pattern

3082 self.pattern = self.reString = pattern.pattern

3083

3084 elif callable(pattern):

3085 # defer creating this pattern until we really need it

3086 self.pattern = pattern

3087 self._re = None

3088

3089 else:

3090 raise TypeError(

3091 "Regex may only be constructed with a string or a compiled RE object,"

3092 " or a callable that takes no arguments and returns a string or a"

3093 " compiled RE object"

3094 )

3095

3096 self.flags = flags

3097 self.errmsg = f"Expected {self.name}"

3098 self.mayIndexError = False

3099 self.asGroupList = asGroupList

3100 self.asMatch = asMatch

3101 if self.asGroupList:

3102 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3103 if self.asMatch:

3104 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3105

3106 @cached_property

3107 def re(self) -> re.Pattern:

3108 if self._re:

3109 return self._re

3110

3111 if callable(self.pattern):

3112 # replace self.pattern with the string returned by calling self.pattern()

3113 self.pattern = cast(Callable[[], str], self.pattern)()

3114

3115 # see if we got a compiled RE back instead of a str - if so, we're done

3116 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3117 self._re = cast(re.Pattern[str], self.pattern)

3118 self.pattern = self.reString = self._re.pattern

3119 return self._re

3120

3121 try:

3122 self._re = re.compile(self.pattern, self.flags)

3123 return self._re

3124 except re.error:

3125 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3126

3127 @cached_property

3128 def re_match(self) -> Callable[[str, int], Any]:

3129 return self.re.match

3130

3131 @cached_property

3132 def mayReturnEmpty(self) -> bool: # type: ignore[override]

3133 return self.re_match("", 0) is not None

3134

3135 def _generateDefaultName(self) -> str:

3136 unescaped = repr(self.pattern).replace("\\\\", "\\")

3137 return f"Re:({unescaped})"

3138

3139 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3140 result = self.re_match(instring, loc)

3141 if not result:

3142 raise ParseException(instring, loc, self.errmsg, self)

3143

3144 loc = result.end()

3145 ret = ParseResults(result.group())

3146 d = result.groupdict()

3147

3148 for k, v in d.items():

3149 ret[k] = v

3150

3151 return loc, ret

3152

3153 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3154 result = self.re_match(instring, loc)

3155 if not result:

3156 raise ParseException(instring, loc, self.errmsg, self)

3157

3158 loc = result.end()

3159 ret = result.groups()

3160 return loc, ret

3161

3162 def parseImplAsMatch(self, instring, loc, do_actions=True):

3163 result = self.re_match(instring, loc)

3164 if not result:

3165 raise ParseException(instring, loc, self.errmsg, self)

3166

3167 loc = result.end()

3168 ret = result

3169 return loc, ret

3170

3171 def sub(self, repl: str) -> ParserElement:

3172 r"""

3173 Return :class:`Regex` with an attached parse action to transform the parsed

3174 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3175

3176 Example::

3177

3178 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3179 print(make_html.transform_string("h1:main title:"))

3180 # prints "<h1>main title</h1>"

3181 """

3182 if self.asGroupList:

3183 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3184

3185 if self.asMatch and callable(repl):

3186 raise TypeError(

3187 "cannot use sub() with a callable with Regex(as_match=True)"

3188 )

3189

3190 if self.asMatch:

3191

3192 def pa(tokens):

3193 return tokens[0].expand(repl)

3194

3195 else:

3196

3197 def pa(tokens):

3198 return self.re.sub(repl, tokens[0])

3199

3200 return self.add_parse_action(pa)

3201

3202

3203class QuotedString(Token):

3204 r"""

3205 Token for matching strings that are delimited by quoting characters.

3206

3207 Defined with the following parameters:

3208

3209 - ``quote_char`` - string of one or more characters defining the

3210 quote delimiting string

3211 - ``esc_char`` - character to re_escape quotes, typically backslash

3212 (default= ``None``)

3213 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3214 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3215 (default= ``None``)

3216 - ``multiline`` - boolean indicating whether quotes can span

3217 multiple lines (default= ``False``)

3218 - ``unquote_results`` - boolean indicating whether the matched text

3219 should be unquoted (default= ``True``)

3220 - ``end_quote_char`` - string of one or more characters defining the

3221 end of the quote delimited string (default= ``None`` => same as

3222 quote_char)

3223 - ``convert_whitespace_escapes`` - convert escaped whitespace

3224 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3225 (default= ``True``)

3226

3227 Example::

3228

3229 qs = QuotedString('"')

3230 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3231 complex_qs = QuotedString('{{', end_quote_char='}}')

3232 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3233 sql_qs = QuotedString('"', esc_quote='""')

3234 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3235

3236 prints::

3237

3238 [['This is the quote']]

3239 [['This is the "quote"']]

3240 [['This is the quote with "embedded" quotes']]

3241 """

3242

3243 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3244

3245 def __init__(

3246 self,

3247 quote_char: str = "",

3248 esc_char: typing.Optional[str] = None,

3249 esc_quote: typing.Optional[str] = None,

3250 multiline: bool = False,

3251 unquote_results: bool = True,

3252 end_quote_char: typing.Optional[str] = None,

3253 convert_whitespace_escapes: bool = True,

3254 *,

3255 quoteChar: str = "",

3256 escChar: typing.Optional[str] = None,

3257 escQuote: typing.Optional[str] = None,

3258 unquoteResults: bool = True,

3259 endQuoteChar: typing.Optional[str] = None,

3260 convertWhitespaceEscapes: bool = True,

3261 ):

3262 super().__init__()

3263 esc_char = escChar or esc_char

3264 esc_quote = escQuote or esc_quote

3265 unquote_results = unquoteResults and unquote_results

3266 end_quote_char = endQuoteChar or end_quote_char

3267 convert_whitespace_escapes = (

3268 convertWhitespaceEscapes and convert_whitespace_escapes

3269 )

3270 quote_char = quoteChar or quote_char

3271

3272 # remove white space from quote chars

3273 quote_char = quote_char.strip()

3274 if not quote_char:

3275 raise ValueError("quote_char cannot be the empty string")

3276

3277 if end_quote_char is None:

3278 end_quote_char = quote_char

3279 else:

3280 end_quote_char = end_quote_char.strip()

3281 if not end_quote_char:

3282 raise ValueError("end_quote_char cannot be the empty string")

3283

3284 self.quote_char: str = quote_char

3285 self.quote_char_len: int = len(quote_char)

3286 self.first_quote_char: str = quote_char[0]

3287 self.end_quote_char: str = end_quote_char

3288 self.end_quote_char_len: int = len(end_quote_char)

3289 self.esc_char: str = esc_char or ""

3290 self.has_esc_char: bool = esc_char is not None

3291 self.esc_quote: str = esc_quote or ""

3292 self.unquote_results: bool = unquote_results

3293 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3294 self.multiline = multiline

3295 self.re_flags = re.RegexFlag(0)

3296

3297 # fmt: off

3298 # build up re pattern for the content between the quote delimiters

3299 inner_pattern: list[str] = []

3300

3301 if esc_quote:

3302 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3303

3304 if esc_char:

3305 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3306

3307 if len(self.end_quote_char) > 1:

3308 inner_pattern.append(

3309 "(?:"

3310 + "|".join(

3311 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3312 for i in range(len(self.end_quote_char) - 1, 0, -1)

3313 )

3314 + ")"

3315 )

3316

3317 if self.multiline:

3318 self.re_flags |= re.MULTILINE | re.DOTALL

3319 inner_pattern.append(

3320 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3321 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3322 )

3323 else:

3324 inner_pattern.append(

3325 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3326 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3327 )

3328

3329 self.pattern = "".join(

3330 [

3331 re.escape(self.quote_char),

3332 "(?:",

3333 '|'.join(inner_pattern),

3334 ")*",

3335 re.escape(self.end_quote_char),

3336 ]

3337 )

3338

3339 if self.unquote_results:

3340 if self.convert_whitespace_escapes:

3341 self.unquote_scan_re = re.compile(

3342 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3343 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3344 rf"|({re.escape(self.esc_char)}.)"

3345 rf"|(\n|.)",

3346 flags=self.re_flags,

3347 )

3348 else:

3349 self.unquote_scan_re = re.compile(

3350 rf"({re.escape(self.esc_char)}.)"

3351 rf"|(\n|.)",

3352 flags=self.re_flags

3353 )

3354 # fmt: on

3355

3356 try:

3357 self.re = re.compile(self.pattern, self.re_flags)

3358 self.reString = self.pattern

3359 self.re_match = self.re.match

3360 except re.error:

3361 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3362

3363 self.errmsg = f"Expected {self.name}"

3364 self.mayIndexError = False

3365 self.mayReturnEmpty = True

3366

3367 def _generateDefaultName(self) -> str:

3368 if self.quote_char == self.end_quote_char and isinstance(

3369 self.quote_char, str_type

3370 ):

3371 return f"string enclosed in {self.quote_char!r}"

3372

3373 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3374

3375 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3376 # check first character of opening quote to see if that is a match

3377 # before doing the more complicated regex match

3378 result = (

3379 instring[loc] == self.first_quote_char

3380 and self.re_match(instring, loc)

3381 or None

3382 )

3383 if not result:

3384 raise ParseException(instring, loc, self.errmsg, self)

3385

3386 # get ending loc and matched string from regex matching result

3387 loc = result.end()

3388 ret = result.group()

3389

3390 def convert_escaped_numerics(s: str) -> str:

3391 if s == "0":

3392 return "\0"

3393 if s.isdigit() and len(s) == 3:

3394 return chr(int(s, base=8))

3395 elif s.startswith(("u", "x")):

3396 return chr(int(s[1:], base=16))

3397 else:

3398 return s

3399

3400 if self.unquote_results:

3401 # strip off quotes

3402 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3403

3404 if isinstance(ret, str_type):

3405 # fmt: off

3406 if self.convert_whitespace_escapes:

3407 # as we iterate over matches in the input string,

3408 # collect from whichever match group of the unquote_scan_re

3409 # regex matches (only 1 group will match at any given time)

3410 ret = "".join(

3411 # match group 1 matches \t, \n, etc.

3412 self.ws_map[match.group(1)] if match.group(1)

3413 # match group 2 matches escaped octal, null, hex, and Unicode

3414 # sequences

3415 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3416 # match group 3 matches escaped characters

3417 else match.group(3)[-1] if match.group(3)

3418 # match group 4 matches any character

3419 else match.group(4)

3420 for match in self.unquote_scan_re.finditer(ret)

3421 )

3422 else:

3423 ret = "".join(

3424 # match group 1 matches escaped characters

3425 match.group(1)[-1] if match.group(1)

3426 # match group 2 matches any character

3427 else match.group(2)

3428 for match in self.unquote_scan_re.finditer(ret)

3429 )

3430 # fmt: on

3431

3432 # replace escaped quotes

3433 if self.esc_quote:

3434 ret = ret.replace(self.esc_quote, self.end_quote_char)

3435

3436 return loc, ret

3437

3438

3439class CharsNotIn(Token):

3440 """Token for matching words composed of characters *not* in a given

3441 set (will include whitespace in matched characters if not listed in

3442 the provided exclusion set - see example). Defined with string

3443 containing all disallowed characters, and an optional minimum,

3444 maximum, and/or exact length. The default value for ``min`` is

3445 1 (a minimum value < 1 is not valid); the default values for

3446 ``max`` and ``exact`` are 0, meaning no maximum or exact

3447 length restriction.

3448

3449 Example::

3450

3451 # define a comma-separated-value as anything that is not a ','

3452 csv_value = CharsNotIn(',')

3453 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3454

3455 prints::

3456

3457 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3458 """

3459

3460 def __init__(

3461 self,

3462 not_chars: str = "",

3463 min: int = 1,

3464 max: int = 0,

3465 exact: int = 0,

3466 *,

3467 notChars: str = "",

3468 ):

3469 super().__init__()

3470 self.skipWhitespace = False

3471 self.notChars = not_chars or notChars

3472 self.notCharsSet = set(self.notChars)

3473

3474 if min < 1:

3475 raise ValueError(

3476 "cannot specify a minimum length < 1; use"

3477 " Opt(CharsNotIn()) if zero-length char group is permitted"

3478 )

3479

3480 self.minLen = min

3481

3482 if max > 0:

3483 self.maxLen = max

3484 else:

3485 self.maxLen = _MAX_INT

3486

3487 if exact > 0:

3488 self.maxLen = exact

3489 self.minLen = exact

3490

3491 self.errmsg = f"Expected {self.name}"

3492 self.mayReturnEmpty = self.minLen == 0

3493 self.mayIndexError = False

3494

3495 def _generateDefaultName(self) -> str:

3496 not_chars_str = _collapse_string_to_ranges(self.notChars)

3497 if len(not_chars_str) > 16:

3498 return f"!W:({self.notChars[: 16 - 3]}...)"

3499 else:

3500 return f"!W:({self.notChars})"

3501

3502 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3503 notchars = self.notCharsSet

3504 if instring[loc] in notchars:

3505 raise ParseException(instring, loc, self.errmsg, self)

3506

3507 start = loc

3508 loc += 1

3509 maxlen = min(start + self.maxLen, len(instring))

3510 while loc < maxlen and instring[loc] not in notchars:

3511 loc += 1

3512

3513 if loc - start < self.minLen:

3514 raise ParseException(instring, loc, self.errmsg, self)

3515

3516 return loc, instring[start:loc]

3517

3518

3519class White(Token):

3520 """Special matching class for matching whitespace. Normally,

3521 whitespace is ignored by pyparsing grammars. This class is included

3522 when some whitespace structures are significant. Define with

3523 a string containing the whitespace characters to be matched; default

3524 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3525 ``max``, and ``exact`` arguments, as defined for the

3526 :class:`Word` class.

3527 """

3528

3529 whiteStrs = {

3530 " ": "<SP>",

3531 "\t": "<TAB>",

3532 "\n": "<LF>",

3533 "\r": "<CR>",

3534 "\f": "<FF>",

3535 "\u00A0": "<NBSP>",

3536 "\u1680": "<OGHAM_SPACE_MARK>",

3537 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3538 "\u2000": "<EN_QUAD>",

3539 "\u2001": "<EM_QUAD>",

3540 "\u2002": "<EN_SPACE>",

3541 "\u2003": "<EM_SPACE>",

3542 "\u2004": "<THREE-PER-EM_SPACE>",

3543 "\u2005": "<FOUR-PER-EM_SPACE>",

3544 "\u2006": "<SIX-PER-EM_SPACE>",

3545 "\u2007": "<FIGURE_SPACE>",

3546 "\u2008": "<PUNCTUATION_SPACE>",

3547 "\u2009": "<THIN_SPACE>",

3548 "\u200A": "<HAIR_SPACE>",

3549 "\u200B": "<ZERO_WIDTH_SPACE>",

3550 "\u202F": "<NNBSP>",

3551 "\u205F": "<MMSP>",

3552 "\u3000": "<IDEOGRAPHIC_SPACE>",

3553 }

3554

3555 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):

3556 super().__init__()

3557 self.matchWhite = ws

3558 self.set_whitespace_chars(

3559 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3560 copy_defaults=True,

3561 )

3562 # self.leave_whitespace()

3563 self.mayReturnEmpty = True

3564 self.errmsg = f"Expected {self.name}"

3565

3566 self.minLen = min

3567

3568 if max > 0:

3569 self.maxLen = max

3570 else:

3571 self.maxLen = _MAX_INT

3572

3573 if exact > 0:

3574 self.maxLen = exact

3575 self.minLen = exact

3576

3577 def _generateDefaultName(self) -> str:

3578 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3579

3580 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3581 if instring[loc] not in self.matchWhite:

3582 raise ParseException(instring, loc, self.errmsg, self)

3583 start = loc

3584 loc += 1

3585 maxloc = start + self.maxLen

3586 maxloc = min(maxloc, len(instring))

3587 while loc < maxloc and instring[loc] in self.matchWhite:

3588 loc += 1

3589

3590 if loc - start < self.minLen:

3591 raise ParseException(instring, loc, self.errmsg, self)

3592

3593 return loc, instring[start:loc]

3594

3595

3596class PositionToken(Token):

3597 def __init__(self):

3598 super().__init__()

3599 self.mayReturnEmpty = True

3600 self.mayIndexError = False

3601

3602

3603class GoToColumn(PositionToken):

3604 """Token to advance to a specific column of input text; useful for

3605 tabular report scraping.

3606 """

3607

3608 def __init__(self, colno: int):

3609 super().__init__()

3610 self.col = colno

3611

3612 def preParse(self, instring: str, loc: int) -> int:

3613 if col(loc, instring) == self.col:

3614 return loc

3615

3616 instrlen = len(instring)

3617 if self.ignoreExprs:

3618 loc = self._skipIgnorables(instring, loc)

3619 while (

3620 loc < instrlen

3621 and instring[loc].isspace()

3622 and col(loc, instring) != self.col

3623 ):

3624 loc += 1

3625

3626 return loc

3627

3628 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3629 thiscol = col(loc, instring)

3630 if thiscol > self.col:

3631 raise ParseException(instring, loc, "Text not in expected column", self)

3632 newloc = loc + self.col - thiscol

3633 ret = instring[loc:newloc]

3634 return newloc, ret

3635

3636

3637class LineStart(PositionToken):

3638 r"""Matches if current position is at the beginning of a line within

3639 the parse string

3640

3641 Example::

3642

3643 test = '''\

3644 AAA this line

3645 AAA and this line

3646 AAA but not this one

3647 B AAA and definitely not this one

3648 '''

3649

3650 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3651 print(t)

3652

3653 prints::

3654

3655 ['AAA', ' this line']

3656 ['AAA', ' and this line']

3657

3658 """

3659

3660 def __init__(self):

3661 super().__init__()

3662 self.leave_whitespace()

3663 self.orig_whiteChars = set() | self.whiteChars

3664 self.whiteChars.discard("\n")

3665 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3666 self.set_name("start of line")

3667

3668 def preParse(self, instring: str, loc: int) -> int:

3669 if loc == 0:

3670 return loc

3671

3672 ret = self.skipper.preParse(instring, loc)

3673

3674 if "\n" in self.orig_whiteChars:

3675 while instring[ret : ret + 1] == "\n":

3676 ret = self.skipper.preParse(instring, ret + 1)

3677

3678 return ret

3679

3680 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3681 if col(loc, instring) == 1:

3682 return loc, []

3683 raise ParseException(instring, loc, self.errmsg, self)

3684

3685

3686class LineEnd(PositionToken):

3687 """Matches if current position is at the end of a line within the

3688 parse string

3689 """

3690

3691 def __init__(self):

3692 super().__init__()

3693 self.whiteChars.discard("\n")

3694 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3695 self.set_name("end of line")

3696

3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3698 if loc < len(instring):

3699 if instring[loc] == "\n":

3700 return loc + 1, "\n"

3701 else:

3702 raise ParseException(instring, loc, self.errmsg, self)

3703 elif loc == len(instring):

3704 return loc + 1, []

3705 else:

3706 raise ParseException(instring, loc, self.errmsg, self)

3707

3708

3709class StringStart(PositionToken):

3710 """Matches if current position is at the beginning of the parse

3711 string

3712 """

3713

3714 def __init__(self):

3715 super().__init__()

3716 self.set_name("start of text")

3717

3718 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3719 # see if entire string up to here is just whitespace and ignoreables

3720 if loc != 0 and loc != self.preParse(instring, 0):

3721 raise ParseException(instring, loc, self.errmsg, self)

3722

3723 return loc, []

3724

3725

3726class StringEnd(PositionToken):

3727 """

3728 Matches if current position is at the end of the parse string

3729 """

3730

3731 def __init__(self):

3732 super().__init__()

3733 self.set_name("end of text")

3734

3735 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3736 if loc < len(instring):

3737 raise ParseException(instring, loc, self.errmsg, self)

3738 if loc == len(instring):

3739 return loc + 1, []

3740 if loc > len(instring):

3741 return loc, []

3742

3743 raise ParseException(instring, loc, self.errmsg, self)

3744

3745

3746class WordStart(PositionToken):

3747 """Matches if the current position is at the beginning of a

3748 :class:`Word`, and is not preceded by any character in a given

3749 set of ``word_chars`` (default= ``printables``). To emulate the

3750 ``\b`` behavior of regular expressions, use

3751 ``WordStart(alphanums)``. ``WordStart`` will also match at

3752 the beginning of the string being parsed, or at the beginning of

3753 a line.

3754 """

3755

3756 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3757 wordChars = word_chars if wordChars == printables else wordChars

3758 super().__init__()

3759 self.wordChars = set(wordChars)

3760 self.set_name("start of a word")

3761

3762 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3763 if loc != 0:

3764 if (

3765 instring[loc - 1] in self.wordChars

3766 or instring[loc] not in self.wordChars

3767 ):

3768 raise ParseException(instring, loc, self.errmsg, self)

3769 return loc, []

3770

3771

3772class WordEnd(PositionToken):

3773 """Matches if the current position is at the end of a :class:`Word`,

3774 and is not followed by any character in a given set of ``word_chars``

3775 (default= ``printables``). To emulate the ``\b`` behavior of

3776 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3777 will also match at the end of the string being parsed, or at the end

3778 of a line.

3779 """

3780

3781 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3782 wordChars = word_chars if wordChars == printables else wordChars

3783 super().__init__()

3784 self.wordChars = set(wordChars)

3785 self.skipWhitespace = False

3786 self.set_name("end of a word")

3787

3788 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3789 instrlen = len(instring)

3790 if instrlen > 0 and loc < instrlen:

3791 if (

3792 instring[loc] in self.wordChars

3793 or instring[loc - 1] not in self.wordChars

3794 ):

3795 raise ParseException(instring, loc, self.errmsg, self)

3796 return loc, []

3797

3798

3799class Tag(Token):

3800 """

3801 A meta-element for inserting a named result into the parsed

3802 tokens that may be checked later in a parse action or while

3803 processing the parsed results. Accepts an optional tag value,

3804 defaulting to `True`.

3805

3806 Example::

3807

3808 end_punc = "." | ("!" + Tag("enthusiastic")))

3809 greeting = "Hello," + Word(alphas) + end_punc

3810

3811 result = greeting.parse_string("Hello, World.")

3812 print(result.dump())

3813

3814 result = greeting.parse_string("Hello, World!")

3815 print(result.dump())

3816

3817 prints::

3818

3819 ['Hello,', 'World', '.']

3820

3821 ['Hello,', 'World', '!']

3822 - enthusiastic: True

3823 """

3824

3825 def __init__(self, tag_name: str, value: Any = True):

3826 super().__init__()

3827 self.mayReturnEmpty = True

3828 self.mayIndexError = False

3829 self.leave_whitespace()

3830 self.tag_name = tag_name

3831 self.tag_value = value

3832 self.add_parse_action(self._add_tag)

3833

3834 def _add_tag(self, tokens: ParseResults):

3835 tokens[self.tag_name] = self.tag_value

3836

3837 def _generateDefaultName(self) -> str:

3838 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

3839

3840

3841class ParseExpression(ParserElement):

3842 """Abstract subclass of ParserElement, for combining and

3843 post-processing parsed tokens.

3844 """

3845

3846 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3847 super().__init__(savelist)

3848 self.exprs: list[ParserElement]

3849 if isinstance(exprs, _generatorType):

3850 exprs = list(exprs)

3851

3852 if isinstance(exprs, str_type):

3853 self.exprs = [self._literalStringClass(exprs)]

3854 elif isinstance(exprs, ParserElement):

3855 self.exprs = [exprs]

3856 elif isinstance(exprs, Iterable):

3857 exprs = list(exprs)

3858 # if sequence of strings provided, wrap with Literal

3859 if any(isinstance(expr, str_type) for expr in exprs):

3860 exprs = (

3861 self._literalStringClass(e) if isinstance(e, str_type) else e

3862 for e in exprs

3863 )

3864 self.exprs = list(exprs)

3865 else:

3866 try:

3867 self.exprs = list(exprs)

3868 except TypeError:

3869 self.exprs = [exprs]

3870 self.callPreparse = False

3871

3872 def recurse(self) -> list[ParserElement]:

3873 return self.exprs[:]

3874

3875 def append(self, other) -> ParserElement:

3876 self.exprs.append(other)

3877 self._defaultName = None

3878 return self

3879

3880 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3881 """

3882 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3883 all contained expressions.

3884 """

3885 super().leave_whitespace(recursive)

3886

3887 if recursive:

3888 self.exprs = [e.copy() for e in self.exprs]

3889 for e in self.exprs:

3890 e.leave_whitespace(recursive)

3891 return self

3892

3893 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3894 """

3895 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3896 all contained expressions.

3897 """

3898 super().ignore_whitespace(recursive)

3899 if recursive:

3900 self.exprs = [e.copy() for e in self.exprs]

3901 for e in self.exprs:

3902 e.ignore_whitespace(recursive)

3903 return self

3904

3905 def ignore(self, other) -> ParserElement:

3906 if isinstance(other, Suppress):

3907 if other not in self.ignoreExprs:

3908 super().ignore(other)

3909 for e in self.exprs:

3910 e.ignore(self.ignoreExprs[-1])

3911 else:

3912 super().ignore(other)

3913 for e in self.exprs:

3914 e.ignore(self.ignoreExprs[-1])

3915 return self

3916

3917 def _generateDefaultName(self) -> str:

3918 return f"{type(self).__name__}:({self.exprs})"

3919

3920 def streamline(self) -> ParserElement:

3921 if self.streamlined:

3922 return self

3923

3924 super().streamline()

3925

3926 for e in self.exprs:

3927 e.streamline()

3928

3929 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3930 # but only if there are no parse actions or resultsNames on the nested And's

3931 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3932 if len(self.exprs) == 2:

3933 other = self.exprs[0]

3934 if (

3935 isinstance(other, self.__class__)

3936 and not other.parseAction

3937 and other.resultsName is None

3938 and not other.debug

3939 ):

3940 self.exprs = other.exprs[:] + [self.exprs[1]]

3941 self._defaultName = None

3942 self.mayReturnEmpty |= other.mayReturnEmpty

3943 self.mayIndexError |= other.mayIndexError

3944

3945 other = self.exprs[-1]

3946 if (

3947 isinstance(other, self.__class__)

3948 and not other.parseAction

3949 and other.resultsName is None

3950 and not other.debug

3951 ):

3952 self.exprs = self.exprs[:-1] + other.exprs[:]

3953 self._defaultName = None

3954 self.mayReturnEmpty |= other.mayReturnEmpty

3955 self.mayIndexError |= other.mayIndexError

3956

3957 self.errmsg = f"Expected {self}"

3958

3959 return self

3960

3961 def validate(self, validateTrace=None) -> None:

3962 warnings.warn(

3963 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

3964 DeprecationWarning,

3965 stacklevel=2,

3966 )

3967 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

3968 for e in self.exprs:

3969 e.validate(tmp)

3970 self._checkRecursion([])

3971

3972 def copy(self) -> ParserElement:

3973 ret = super().copy()

3974 ret = typing.cast(ParseExpression, ret)

3975 ret.exprs = [e.copy() for e in self.exprs]

3976 return ret

3977

3978 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

3979 if not (

3980 __diag__.warn_ungrouped_named_tokens_in_collection

3981 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3982 not in self.suppress_warnings_

3983 ):

3984 return super()._setResultsName(name, list_all_matches)

3985

3986 for e in self.exprs:

3987 if (

3988 isinstance(e, ParserElement)

3989 and e.resultsName

3990 and (

3991 Diagnostics.warn_ungrouped_named_tokens_in_collection

3992 not in e.suppress_warnings_

3993 )

3994 ):

3995 warning = (

3996 "warn_ungrouped_named_tokens_in_collection:"

3997 f" setting results name {name!r} on {type(self).__name__} expression"

3998 f" collides with {e.resultsName!r} on contained expression"

3999 )

4000 warnings.warn(warning, stacklevel=3)

4001 break

4002

4003 return super()._setResultsName(name, list_all_matches)

4004

4005 # Compatibility synonyms

4006 # fmt: off

4007 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4008 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4009 # fmt: on

4010

4011

4012class And(ParseExpression):

4013 """

4014 Requires all given :class:`ParserElement` s to be found in the given order.

4015 Expressions may be separated by whitespace.

4016 May be constructed using the ``'+'`` operator.

4017 May also be constructed using the ``'-'`` operator, which will

4018 suppress backtracking.

4019

4020 Example::

4021

4022 integer = Word(nums)

4023 name_expr = Word(alphas)[1, ...]

4024

4025 expr = And([integer("id"), name_expr("name"), integer("age")])

4026 # more easily written as:

4027 expr = integer("id") + name_expr("name") + integer("age")

4028 """

4029

4030 class _ErrorStop(Empty):

4031 def __init__(self, *args, **kwargs):

4032 super().__init__(*args, **kwargs)

4033 self.leave_whitespace()

4034

4035 def _generateDefaultName(self) -> str:

4036 return "-"

4037

4038 def __init__(

4039 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True

4040 ):

4041 exprs: list[ParserElement] = list(exprs_arg)

4042 if exprs and Ellipsis in exprs:

4043 tmp: list[ParserElement] = []

4044 for i, expr in enumerate(exprs):

4045 if expr is not Ellipsis:

4046 tmp.append(expr)

4047 continue

4048

4049 if i < len(exprs) - 1:

4050 skipto_arg: ParserElement = typing.cast(

4051 ParseExpression, (Empty() + exprs[i + 1])

4052 ).exprs[-1]

4053 tmp.append(SkipTo(skipto_arg)("_skipped*"))

4054 continue

4055

4056 raise Exception("cannot construct And with sequence ending in ...")

4057 exprs[:] = tmp

4058 super().__init__(exprs, savelist)

4059 if self.exprs:

4060 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4061 if not isinstance(self.exprs[0], White):

4062 self.set_whitespace_chars(

4063 self.exprs[0].whiteChars,

4064 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4065 )

4066 self.skipWhitespace = self.exprs[0].skipWhitespace

4067 else:

4068 self.skipWhitespace = False

4069 else:

4070 self.mayReturnEmpty = True

4071 self.callPreparse = True

4072

4073 def streamline(self) -> ParserElement:

4074 # collapse any _PendingSkip's

4075 if self.exprs and any(

4076 isinstance(e, ParseExpression)

4077 and e.exprs

4078 and isinstance(e.exprs[-1], _PendingSkip)

4079 for e in self.exprs[:-1]

4080 ):

4081 deleted_expr_marker = NoMatch()

4082 for i, e in enumerate(self.exprs[:-1]):

4083 if e is deleted_expr_marker:

4084 continue

4085 if (

4086 isinstance(e, ParseExpression)

4087 and e.exprs

4088 and isinstance(e.exprs[-1], _PendingSkip)

4089 ):

4090 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4091 self.exprs[i + 1] = deleted_expr_marker

4092 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4093

4094 super().streamline()

4095

4096 # link any IndentedBlocks to the prior expression

4097 prev: ParserElement

4098 cur: ParserElement

4099 for prev, cur in zip(self.exprs, self.exprs[1:]):

4100 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4101 # (but watch out for recursive grammar)

4102 seen = set()

4103 while True:

4104 if id(cur) in seen:

4105 break

4106 seen.add(id(cur))

4107 if isinstance(cur, IndentedBlock):

4108 prev.add_parse_action(

4109 lambda s, l, t, cur_=cur: setattr(

4110 cur_, "parent_anchor", col(l, s)

4111 )

4112 )

4113 break

4114 subs = cur.recurse()

4115 next_first = next(iter(subs), None)

4116 if next_first is None:

4117 break

4118 cur = typing.cast(ParserElement, next_first)

4119

4120 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4121 return self

4122

4123 def parseImpl(self, instring, loc, do_actions=True):

4124 # pass False as callPreParse arg to _parse for first element, since we already

4125 # pre-parsed the string as part of our And pre-parsing

4126 loc, resultlist = self.exprs[0]._parse(

4127 instring, loc, do_actions, callPreParse=False

4128 )

4129 errorStop = False

4130 for e in self.exprs[1:]:

4131 # if isinstance(e, And._ErrorStop):

4132 if type(e) is And._ErrorStop:

4133 errorStop = True

4134 continue

4135 if errorStop:

4136 try:

4137 loc, exprtokens = e._parse(instring, loc, do_actions)

4138 except ParseSyntaxException:

4139 raise

4140 except ParseBaseException as pe:

4141 pe.__traceback__ = None

4142 raise ParseSyntaxException._from_exception(pe)

4143 except IndexError:

4144 raise ParseSyntaxException(

4145 instring, len(instring), self.errmsg, self

4146 )

4147 else:

4148 loc, exprtokens = e._parse(instring, loc, do_actions)

4149 resultlist += exprtokens

4150 return loc, resultlist

4151

4152 def __iadd__(self, other):

4153 if isinstance(other, str_type):

4154 other = self._literalStringClass(other)

4155 if not isinstance(other, ParserElement):

4156 return NotImplemented

4157 return self.append(other) # And([self, other])

4158

4159 def _checkRecursion(self, parseElementList):

4160 subRecCheckList = parseElementList[:] + [self]

4161 for e in self.exprs:

4162 e._checkRecursion(subRecCheckList)

4163 if not e.mayReturnEmpty:

4164 break

4165

4166 def _generateDefaultName(self) -> str:

4167 inner = " ".join(str(e) for e in self.exprs)

4168 # strip off redundant inner {}'s

4169 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4170 inner = inner[1:-1]

4171 return f"{{{inner}}}"

4172

4173

4174class Or(ParseExpression):

4175 """Requires that at least one :class:`ParserElement` is found. If

4176 two expressions match, the expression that matches the longest

4177 string will be used. May be constructed using the ``'^'``

4178 operator.

4179

4180 Example::

4181

4182 # construct Or using '^' operator

4183

4184 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4185 print(number.search_string("123 3.1416 789"))

4186

4187 prints::

4188

4189 [['123'], ['3.1416'], ['789']]

4190 """

4191

4192 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4193 super().__init__(exprs, savelist)

4194 if self.exprs:

4195 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4196 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4197 else:

4198 self.mayReturnEmpty = True

4199

4200 def streamline(self) -> ParserElement:

4201 super().streamline()

4202 if self.exprs:

4203 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4204 self.saveAsList = any(e.saveAsList for e in self.exprs)

4205 self.skipWhitespace = all(

4206 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4207 )

4208 else:

4209 self.saveAsList = False

4210 return self

4211

4212 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4213 maxExcLoc = -1

4214 maxException = None

4215 matches: list[tuple[int, ParserElement]] = []

4216 fatals: list[ParseFatalException] = []

4217 if all(e.callPreparse for e in self.exprs):

4218 loc = self.preParse(instring, loc)

4219 for e in self.exprs:

4220 try:

4221 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4222 except ParseFatalException as pfe:

4223 pfe.__traceback__ = None

4224 pfe.parser_element = e

4225 fatals.append(pfe)

4226 maxException = None

4227 maxExcLoc = -1

4228 except ParseException as err:

4229 if not fatals:

4230 err.__traceback__ = None

4231 if err.loc > maxExcLoc:

4232 maxException = err

4233 maxExcLoc = err.loc

4234 except IndexError:

4235 if len(instring) > maxExcLoc:

4236 maxException = ParseException(

4237 instring, len(instring), e.errmsg, self

4238 )

4239 maxExcLoc = len(instring)

4240 else:

4241 # save match among all matches, to retry longest to shortest

4242 matches.append((loc2, e))

4243

4244 if matches:

4245 # re-evaluate all matches in descending order of length of match, in case attached actions

4246 # might change whether or how much they match of the input.

4247 matches.sort(key=itemgetter(0), reverse=True)

4248

4249 if not do_actions:

4250 # no further conditions or parse actions to change the selection of

4251 # alternative, so the first match will be the best match

4252 best_expr = matches[0][1]

4253 return best_expr._parse(instring, loc, do_actions)

4254

4255 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4256 for loc1, expr1 in matches:

4257 if loc1 <= longest[0]:

4258 # already have a longer match than this one will deliver, we are done

4259 return longest

4260

4261 try:

4262 loc2, toks = expr1._parse(instring, loc, do_actions)

4263 except ParseException as err:

4264 err.__traceback__ = None

4265 if err.loc > maxExcLoc:

4266 maxException = err

4267 maxExcLoc = err.loc

4268 else:

4269 if loc2 >= loc1:

4270 return loc2, toks

4271 # didn't match as much as before

4272 elif loc2 > longest[0]:

4273 longest = loc2, toks

4274

4275 if longest != (-1, None):

4276 return longest

4277

4278 if fatals:

4279 if len(fatals) > 1:

4280 fatals.sort(key=lambda e: -e.loc)

4281 if fatals[0].loc == fatals[1].loc:

4282 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4283 max_fatal = fatals[0]

4284 raise max_fatal

4285

4286 if maxException is not None:

4287 # infer from this check that all alternatives failed at the current position

4288 # so emit this collective error message instead of any single error message

4289 if maxExcLoc == loc:

4290 maxException.msg = self.errmsg or ""

4291 raise maxException

4292

4293 raise ParseException(instring, loc, "no defined alternatives to match", self)

4294

4295 def __ixor__(self, other):

4296 if isinstance(other, str_type):

4297 other = self._literalStringClass(other)

4298 if not isinstance(other, ParserElement):

4299 return NotImplemented

4300 return self.append(other) # Or([self, other])

4301

4302 def _generateDefaultName(self) -> str:

4303 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4304

4305 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4306 if (

4307 __diag__.warn_multiple_tokens_in_named_alternation

4308 and Diagnostics.warn_multiple_tokens_in_named_alternation

4309 not in self.suppress_warnings_

4310 ):

4311 if any(

4312 isinstance(e, And)

4313 and Diagnostics.warn_multiple_tokens_in_named_alternation

4314 not in e.suppress_warnings_

4315 for e in self.exprs

4316 ):

4317 warning = (

4318 "warn_multiple_tokens_in_named_alternation:"

4319 f" setting results name {name!r} on {type(self).__name__} expression"

4320 " will return a list of all parsed tokens in an And alternative,"

4321 " in prior versions only the first token was returned; enclose"

4322 " contained argument in Group"

4323 )

4324 warnings.warn(warning, stacklevel=3)

4325

4326 return super()._setResultsName(name, list_all_matches)

4327

4328

4329class MatchFirst(ParseExpression):

4330 """Requires that at least one :class:`ParserElement` is found. If

4331 more than one expression matches, the first one listed is the one that will

4332 match. May be constructed using the ``'|'`` operator.

4333

4334 Example::

4335

4336 # construct MatchFirst using '|' operator

4337

4338 # watch the order of expressions to match

4339 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4340 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4341

4342 # put more selective expression first

4343 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4344 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4345 """

4346

4347 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4348 super().__init__(exprs, savelist)

4349 if self.exprs:

4350 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4351 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4352 else:

4353 self.mayReturnEmpty = True

4354

4355 def streamline(self) -> ParserElement:

4356 if self.streamlined:

4357 return self

4358

4359 super().streamline()

4360 if self.exprs:

4361 self.saveAsList = any(e.saveAsList for e in self.exprs)

4362 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4363 self.skipWhitespace = all(

4364 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4365 )

4366 else:

4367 self.saveAsList = False

4368 self.mayReturnEmpty = True

4369 return self

4370

4371 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4372 maxExcLoc = -1

4373 maxException = None

4374

4375 for e in self.exprs:

4376 try:

4377 return e._parse(instring, loc, do_actions)

4378 except ParseFatalException as pfe:

4379 pfe.__traceback__ = None

4380 pfe.parser_element = e

4381 raise

4382 except ParseException as err:

4383 if err.loc > maxExcLoc:

4384 maxException = err

4385 maxExcLoc = err.loc

4386 except IndexError:

4387 if len(instring) > maxExcLoc:

4388 maxException = ParseException(

4389 instring, len(instring), e.errmsg, self

4390 )

4391 maxExcLoc = len(instring)

4392

4393 if maxException is not None:

4394 # infer from this check that all alternatives failed at the current position

4395 # so emit this collective error message instead of any individual error message

4396 if maxExcLoc == loc:

4397 maxException.msg = self.errmsg or ""

4398 raise maxException

4399

4400 raise ParseException(instring, loc, "no defined alternatives to match", self)

4401

4402 def __ior__(self, other):

4403 if isinstance(other, str_type):

4404 other = self._literalStringClass(other)

4405 if not isinstance(other, ParserElement):

4406 return NotImplemented

4407 return self.append(other) # MatchFirst([self, other])

4408

4409 def _generateDefaultName(self) -> str:

4410 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4411

4412 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4413 if (

4414 __diag__.warn_multiple_tokens_in_named_alternation

4415 and Diagnostics.warn_multiple_tokens_in_named_alternation

4416 not in self.suppress_warnings_

4417 ):

4418 if any(

4419 isinstance(e, And)

4420 and Diagnostics.warn_multiple_tokens_in_named_alternation

4421 not in e.suppress_warnings_

4422 for e in self.exprs

4423 ):

4424 warning = (

4425 "warn_multiple_tokens_in_named_alternation:"

4426 f" setting results name {name!r} on {type(self).__name__} expression"

4427 " will return a list of all parsed tokens in an And alternative,"

4428 " in prior versions only the first token was returned; enclose"

4429 " contained argument in Group"

4430 )

4431 warnings.warn(warning, stacklevel=3)

4432

4433 return super()._setResultsName(name, list_all_matches)

4434

4435

4436class Each(ParseExpression):

4437 """Requires all given :class:`ParserElement` s to be found, but in

4438 any order. Expressions may be separated by whitespace.

4439

4440 May be constructed using the ``'&'`` operator.

4441

4442 Example::

4443

4444 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4445 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4446 integer = Word(nums)

4447 shape_attr = "shape:" + shape_type("shape")

4448 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4449 color_attr = "color:" + color("color")

4450 size_attr = "size:" + integer("size")

4451

4452 # use Each (using operator '&') to accept attributes in any order

4453 # (shape and posn are required, color and size are optional)

4454 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4455

4456 shape_spec.run_tests('''

4457 shape: SQUARE color: BLACK posn: 100, 120

4458 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4459 color:GREEN size:20 shape:TRIANGLE posn:20,40

4460 '''

4461 )

4462

4463 prints::

4464

4465 shape: SQUARE color: BLACK posn: 100, 120

4466 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4467 - color: BLACK

4468 - posn: ['100', ',', '120']

4469 - x: 100

4470 - y: 120

4471 - shape: SQUARE

4472

4473

4474 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4475 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4476 - color: BLUE

4477 - posn: ['50', ',', '80']

4478 - x: 50

4479 - y: 80

4480 - shape: CIRCLE

4481 - size: 50

4482

4483

4484 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4485 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4486 - color: GREEN

4487 - posn: ['20', ',', '40']

4488 - x: 20

4489 - y: 40

4490 - shape: TRIANGLE

4491 - size: 20

4492 """

4493

4494 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):

4495 super().__init__(exprs, savelist)

4496 if self.exprs:

4497 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4498 else:

4499 self.mayReturnEmpty = True

4500 self.skipWhitespace = True

4501 self.initExprGroups = True

4502 self.saveAsList = True

4503

4504 def __iand__(self, other):

4505 if isinstance(other, str_type):

4506 other = self._literalStringClass(other)

4507 if not isinstance(other, ParserElement):

4508 return NotImplemented

4509 return self.append(other) # Each([self, other])

4510

4511 def streamline(self) -> ParserElement:

4512 super().streamline()

4513 if self.exprs:

4514 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4515 else:

4516 self.mayReturnEmpty = True

4517 return self

4518

4519 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4520 if self.initExprGroups:

4521 self.opt1map = dict(

4522 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4523 )

4524 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4525 opt2 = [

4526 e

4527 for e in self.exprs

4528 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4529 ]

4530 self.optionals = opt1 + opt2

4531 self.multioptionals = [

4532 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4533 for e in self.exprs

4534 if isinstance(e, _MultipleMatch)

4535 ]

4536 self.multirequired = [

4537 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4538 for e in self.exprs

4539 if isinstance(e, OneOrMore)

4540 ]

4541 self.required = [

4542 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4543 ]

4544 self.required += self.multirequired

4545 self.initExprGroups = False

4546

4547 tmpLoc = loc

4548 tmpReqd = self.required[:]

4549 tmpOpt = self.optionals[:]

4550 multis = self.multioptionals[:]

4551 matchOrder: list[ParserElement] = []

4552

4553 keepMatching = True

4554 failed: list[ParserElement] = []

4555 fatals: list[ParseFatalException] = []

4556 while keepMatching:

4557 tmpExprs = tmpReqd + tmpOpt + multis

4558 failed.clear()

4559 fatals.clear()

4560 for e in tmpExprs:

4561 try:

4562 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4563 except ParseFatalException as pfe:

4564 pfe.__traceback__ = None

4565 pfe.parser_element = e

4566 fatals.append(pfe)

4567 failed.append(e)

4568 except ParseException:

4569 failed.append(e)

4570 else:

4571 matchOrder.append(self.opt1map.get(id(e), e))

4572 if e in tmpReqd:

4573 tmpReqd.remove(e)

4574 elif e in tmpOpt:

4575 tmpOpt.remove(e)

4576 if len(failed) == len(tmpExprs):

4577 keepMatching = False

4578

4579 # look for any ParseFatalExceptions

4580 if fatals:

4581 if len(fatals) > 1:

4582 fatals.sort(key=lambda e: -e.loc)

4583 if fatals[0].loc == fatals[1].loc:

4584 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4585 max_fatal = fatals[0]

4586 raise max_fatal

4587

4588 if tmpReqd:

4589 missing = ", ".join([str(e) for e in tmpReqd])

4590 raise ParseException(

4591 instring,

4592 loc,

4593 f"Missing one or more required elements ({missing})",

4594 )

4595

4596 # add any unmatched Opts, in case they have default values defined

4597 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4598

4599 total_results = ParseResults([])

4600 for e in matchOrder:

4601 loc, results = e._parse(instring, loc, do_actions)

4602 total_results += results

4603

4604 return loc, total_results

4605

4606 def _generateDefaultName(self) -> str:

4607 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4608

4609

4610class ParseElementEnhance(ParserElement):

4611 """Abstract subclass of :class:`ParserElement`, for combining and

4612 post-processing parsed tokens.

4613 """

4614

4615 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

4616 super().__init__(savelist)

4617 if isinstance(expr, str_type):

4618 expr_str = typing.cast(str, expr)

4619 if issubclass(self._literalStringClass, Token):

4620 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4621 elif issubclass(type(self), self._literalStringClass):

4622 expr = Literal(expr_str)

4623 else:

4624 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4625 expr = typing.cast(ParserElement, expr)

4626 self.expr = expr

4627 if expr is not None:

4628 self.mayIndexError = expr.mayIndexError

4629 self.mayReturnEmpty = expr.mayReturnEmpty

4630 self.set_whitespace_chars(

4631 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4632 )

4633 self.skipWhitespace = expr.skipWhitespace

4634 self.saveAsList = expr.saveAsList

4635 self.callPreparse = expr.callPreparse

4636 self.ignoreExprs.extend(expr.ignoreExprs)

4637

4638 def recurse(self) -> list[ParserElement]:

4639 return [self.expr] if self.expr is not None else []

4640

4641 def parseImpl(self, instring, loc, do_actions=True):

4642 if self.expr is None:

4643 raise ParseException(instring, loc, "No expression defined", self)

4644

4645 try:

4646 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

4647 except ParseSyntaxException:

4648 raise

4649 except ParseBaseException as pbe:

4650 pbe.pstr = pbe.pstr or instring

4651 pbe.loc = pbe.loc or loc

4652 pbe.parser_element = pbe.parser_element or self

4653 if not isinstance(self, Forward) and self.customName is not None:

4654 if self.errmsg:

4655 pbe.msg = self.errmsg

4656 raise

4657

4658 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4659 super().leave_whitespace(recursive)

4660

4661 if recursive:

4662 if self.expr is not None:

4663 self.expr = self.expr.copy()

4664 self.expr.leave_whitespace(recursive)

4665 return self

4666

4667 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4668 super().ignore_whitespace(recursive)

4669

4670 if recursive:

4671 if self.expr is not None:

4672 self.expr = self.expr.copy()

4673 self.expr.ignore_whitespace(recursive)

4674 return self

4675

4676 def ignore(self, other) -> ParserElement:

4677 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4678 super().ignore(other)

4679 if self.expr is not None:

4680 self.expr.ignore(self.ignoreExprs[-1])

4681

4682 return self

4683

4684 def streamline(self) -> ParserElement:

4685 super().streamline()

4686 if self.expr is not None:

4687 self.expr.streamline()

4688 return self

4689

4690 def _checkRecursion(self, parseElementList):

4691 if self in parseElementList:

4692 raise RecursiveGrammarException(parseElementList + [self])

4693 subRecCheckList = parseElementList[:] + [self]

4694 if self.expr is not None:

4695 self.expr._checkRecursion(subRecCheckList)

4696

4697 def validate(self, validateTrace=None) -> None:

4698 warnings.warn(

4699 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4700 DeprecationWarning,

4701 stacklevel=2,

4702 )

4703 if validateTrace is None:

4704 validateTrace = []

4705 tmp = validateTrace[:] + [self]

4706 if self.expr is not None:

4707 self.expr.validate(tmp)

4708 self._checkRecursion([])

4709

4710 def _generateDefaultName(self) -> str:

4711 return f"{type(self).__name__}:({self.expr})"

4712

4713 # Compatibility synonyms

4714 # fmt: off

4715 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4716 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4717 # fmt: on

4718

4719

4720class IndentedBlock(ParseElementEnhance):

4721 """

4722 Expression to match one or more expressions at a given indentation level.

4723 Useful for parsing text where structure is implied by indentation (like Python source code).

4724 """

4725

4726 class _Indent(Empty):

4727 def __init__(self, ref_col: int):

4728 super().__init__()

4729 self.errmsg = f"expected indent at column {ref_col}"

4730 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4731

4732 class _IndentGreater(Empty):

4733 def __init__(self, ref_col: int):

4734 super().__init__()

4735 self.errmsg = f"expected indent at column greater than {ref_col}"

4736 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4737

4738 def __init__(

4739 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4740 ):

4741 super().__init__(expr, savelist=True)

4742 # if recursive:

4743 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4744 self._recursive = recursive

4745 self._grouped = grouped

4746 self.parent_anchor = 1

4747

4748 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4749 # advance parse position to non-whitespace by using an Empty()

4750 # this should be the column to be used for all subsequent indented lines

4751 anchor_loc = Empty().preParse(instring, loc)

4752

4753 # see if self.expr matches at the current location - if not it will raise an exception

4754 # and no further work is necessary

4755 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

4756

4757 indent_col = col(anchor_loc, instring)

4758 peer_detect_expr = self._Indent(indent_col)

4759

4760 inner_expr = Empty() + peer_detect_expr + self.expr

4761 if self._recursive:

4762 sub_indent = self._IndentGreater(indent_col)

4763 nested_block = IndentedBlock(

4764 self.expr, recursive=self._recursive, grouped=self._grouped

4765 )

4766 nested_block.set_debug(self.debug)

4767 nested_block.parent_anchor = indent_col

4768 inner_expr += Opt(sub_indent + nested_block)

4769

4770 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4771 block = OneOrMore(inner_expr)

4772

4773 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4774

4775 if self._grouped:

4776 wrapper = Group

4777 else:

4778 wrapper = lambda expr: expr # type: ignore[misc, assignment]

4779 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4780 instring, anchor_loc, do_actions

4781 )

4782

4783

4784class AtStringStart(ParseElementEnhance):

4785 """Matches if expression matches at the beginning of the parse

4786 string::

4787

4788 AtStringStart(Word(nums)).parse_string("123")

4789 # prints ["123"]

4790

4791 AtStringStart(Word(nums)).parse_string(" 123")

4792 # raises ParseException

4793 """

4794

4795 def __init__(self, expr: Union[ParserElement, str]):

4796 super().__init__(expr)

4797 self.callPreparse = False

4798

4799 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4800 if loc != 0:

4801 raise ParseException(instring, loc, "not found at string start")

4802 return super().parseImpl(instring, loc, do_actions)

4803

4804

4805class AtLineStart(ParseElementEnhance):

4806 r"""Matches if an expression matches at the beginning of a line within

4807 the parse string

4808

4809 Example::

4810

4811 test = '''\

4812 AAA this line

4813 AAA and this line

4814 AAA but not this one

4815 B AAA and definitely not this one

4816 '''

4817

4818 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4819 print(t)

4820

4821 prints::

4822

4823 ['AAA', ' this line']

4824 ['AAA', ' and this line']

4825

4826 """

4827

4828 def __init__(self, expr: Union[ParserElement, str]):

4829 super().__init__(expr)

4830 self.callPreparse = False

4831

4832 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4833 if col(loc, instring) != 1:

4834 raise ParseException(instring, loc, "not found at line start")

4835 return super().parseImpl(instring, loc, do_actions)

4836

4837

4838class FollowedBy(ParseElementEnhance):

4839 """Lookahead matching of the given parse expression.

4840 ``FollowedBy`` does *not* advance the parsing position within

4841 the input string, it only verifies that the specified parse

4842 expression matches at the current position. ``FollowedBy``

4843 always returns a null token list. If any results names are defined

4844 in the lookahead expression, those *will* be returned for access by

4845 name.

4846

4847 Example::

4848

4849 # use FollowedBy to match a label only if it is followed by a ':'

4850 data_word = Word(alphas)

4851 label = data_word + FollowedBy(':')

4852 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4853

4854 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4855

4856 prints::

4857

4858 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4859 """

4860

4861 def __init__(self, expr: Union[ParserElement, str]):

4862 super().__init__(expr)

4863 self.mayReturnEmpty = True

4864

4865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4866 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4867 # we keep any named results that were defined in the FollowedBy expression

4868 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

4869 del ret[:]

4870

4871 return loc, ret

4872

4873

4874class PrecededBy(ParseElementEnhance):

4875 """Lookbehind matching of the given parse expression.

4876 ``PrecededBy`` does not advance the parsing position within the

4877 input string, it only verifies that the specified parse expression

4878 matches prior to the current position. ``PrecededBy`` always

4879 returns a null token list, but if a results name is defined on the

4880 given expression, it is returned.

4881

4882 Parameters:

4883

4884 - ``expr`` - expression that must match prior to the current parse

4885 location

4886 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4887 to lookbehind prior to the current parse location

4888

4889 If the lookbehind expression is a string, :class:`Literal`,

4890 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4891 with a specified exact or maximum length, then the retreat

4892 parameter is not required. Otherwise, retreat must be specified to

4893 give a maximum number of characters to look back from

4894 the current parse position for a lookbehind match.

4895

4896 Example::

4897

4898 # VB-style variable names with type prefixes

4899 int_var = PrecededBy("#") + pyparsing_common.identifier

4900 str_var = PrecededBy("$") + pyparsing_common.identifier

4901

4902 """

4903

4904 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0):

4905 super().__init__(expr)

4906 self.expr = self.expr().leave_whitespace()

4907 self.mayReturnEmpty = True

4908 self.mayIndexError = False

4909 self.exact = False

4910 if isinstance(expr, str_type):

4911 expr = typing.cast(str, expr)

4912 retreat = len(expr)

4913 self.exact = True

4914 elif isinstance(expr, (Literal, Keyword)):

4915 retreat = expr.matchLen

4916 self.exact = True

4917 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4918 retreat = expr.maxLen

4919 self.exact = True

4920 elif isinstance(expr, PositionToken):

4921 retreat = 0

4922 self.exact = True

4923 self.retreat = retreat

4924 self.errmsg = f"not preceded by {expr}"

4925 self.skipWhitespace = False

4926 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4927

4928 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

4929 if self.exact:

4930 if loc < self.retreat:

4931 raise ParseException(instring, loc, self.errmsg, self)

4932 start = loc - self.retreat

4933 _, ret = self.expr._parse(instring, start)

4934 return loc, ret

4935

4936 # retreat specified a maximum lookbehind window, iterate

4937 test_expr = self.expr + StringEnd()

4938 instring_slice = instring[max(0, loc - self.retreat) : loc]

4939 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

4940

4941 for offset in range(1, min(loc, self.retreat + 1) + 1):

4942 try:

4943 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

4944 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

4945 except ParseBaseException as pbe:

4946 last_expr = pbe

4947 else:

4948 break

4949 else:

4950 raise last_expr

4951

4952 return loc, ret

4953

4954

4955class Located(ParseElementEnhance):

4956 """

4957 Decorates a returned token with its starting and ending

4958 locations in the input string.

4959

4960 This helper adds the following results names:

4961

4962 - ``locn_start`` - location where matched expression begins

4963 - ``locn_end`` - location where matched expression ends

4964 - ``value`` - the actual parsed results

4965

4966 Be careful if the input text contains ``<TAB>`` characters, you

4967 may want to call :class:`ParserElement.parse_with_tabs`

4968

4969 Example::

4970

4971 wd = Word(alphas)

4972 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

4973 print(match)

4974

4975 prints::

4976

4977 [0, ['ljsdf'], 5]

4978 [8, ['lksdjjf'], 15]

4979 [18, ['lkkjj'], 23]

4980

4981 """

4982

4983 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4984 start = loc

4985 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

4986 ret_tokens = ParseResults([start, tokens, loc])

4987 ret_tokens["locn_start"] = start

4988 ret_tokens["value"] = tokens

4989 ret_tokens["locn_end"] = loc

4990 if self.resultsName:

4991 # must return as a list, so that the name will be attached to the complete group

4992 return loc, [ret_tokens]

4993 else:

4994 return loc, ret_tokens

4995

4996

4997class NotAny(ParseElementEnhance):

4998 """

4999 Lookahead to disallow matching with the given parse expression.

5000 ``NotAny`` does *not* advance the parsing position within the

5001 input string, it only verifies that the specified parse expression

5002 does *not* match at the current position. Also, ``NotAny`` does

5003 *not* skip over leading whitespace. ``NotAny`` always returns

5004 a null token list. May be constructed using the ``'~'`` operator.

5005

5006 Example::

5007

5008 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5009

5010 # take care not to mistake keywords for identifiers

5011 ident = ~(AND | OR | NOT) + Word(alphas)

5012 boolean_term = Opt(NOT) + ident

5013

5014 # very crude boolean expression - to support parenthesis groups and

5015 # operation hierarchy, use infix_notation

5016 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5017

5018 # integers that are followed by "." are actually floats

5019 integer = Word(nums) + ~Char(".")

5020 """

5021

5022 def __init__(self, expr: Union[ParserElement, str]):

5023 super().__init__(expr)

5024 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5025 # self.leave_whitespace()

5026 self.skipWhitespace = False

5027

5028 self.mayReturnEmpty = True

5029 self.errmsg = f"Found unwanted token, {self.expr}"

5030

5031 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5032 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5033 raise ParseException(instring, loc, self.errmsg, self)

5034 return loc, []

5035

5036 def _generateDefaultName(self) -> str:

5037 return f"~{{{self.expr}}}"

5038

5039

5040class _MultipleMatch(ParseElementEnhance):

5041 def __init__(

5042 self,

5043 expr: Union[str, ParserElement],

5044 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5045 *,

5046 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5047 ):

5048 super().__init__(expr)

5049 stopOn = stopOn or stop_on

5050 self.saveAsList = True

5051 ender = stopOn

5052 if isinstance(ender, str_type):

5053 ender = self._literalStringClass(ender)

5054 self.stopOn(ender)

5055

5056 def stopOn(self, ender) -> ParserElement:

5057 if isinstance(ender, str_type):

5058 ender = self._literalStringClass(ender)

5059 self.not_ender = ~ender if ender is not None else None

5060 return self

5061

5062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5063 self_expr_parse = self.expr._parse

5064 self_skip_ignorables = self._skipIgnorables

5065 check_ender = self.not_ender is not None

5066 if check_ender:

5067 try_not_ender = self.not_ender.try_parse

5068

5069 # must be at least one (but first see if we are the stopOn sentinel;

5070 # if so, fail)

5071 if check_ender:

5072 try_not_ender(instring, loc)

5073 loc, tokens = self_expr_parse(instring, loc, do_actions)

5074 try:

5075 hasIgnoreExprs = not not self.ignoreExprs

5076 while 1:

5077 if check_ender:

5078 try_not_ender(instring, loc)

5079 if hasIgnoreExprs:

5080 preloc = self_skip_ignorables(instring, loc)

5081 else:

5082 preloc = loc

5083 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5084 tokens += tmptokens

5085 except (ParseException, IndexError):

5086 pass

5087

5088 return loc, tokens

5089

5090 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5091 if (

5092 __diag__.warn_ungrouped_named_tokens_in_collection

5093 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5094 not in self.suppress_warnings_

5095 ):

5096 for e in [self.expr] + self.expr.recurse():

5097 if (

5098 isinstance(e, ParserElement)

5099 and e.resultsName

5100 and (

5101 Diagnostics.warn_ungrouped_named_tokens_in_collection

5102 not in e.suppress_warnings_

5103 )

5104 ):

5105 warning = (

5106 "warn_ungrouped_named_tokens_in_collection:"

5107 f" setting results name {name!r} on {type(self).__name__} expression"

5108 f" collides with {e.resultsName!r} on contained expression"

5109 )

5110 warnings.warn(warning, stacklevel=3)

5111 break

5112

5113 return super()._setResultsName(name, list_all_matches)

5114

5115

5116class OneOrMore(_MultipleMatch):

5117 """

5118 Repetition of one or more of the given expression.

5119

5120 Parameters:

5121

5122 - ``expr`` - expression that must match one or more times

5123 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5124 (only required if the sentinel would ordinarily match the repetition

5125 expression)

5126

5127 Example::

5128

5129 data_word = Word(alphas)

5130 label = data_word + FollowedBy(':')

5131 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5132

5133 text = "shape: SQUARE posn: upper left color: BLACK"

5134 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5135

5136 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5137 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5138 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5139

5140 # could also be written as

5141 (attr_expr * (1,)).parse_string(text).pprint()

5142 """

5143

5144 def _generateDefaultName(self) -> str:

5145 return f"{{{self.expr}}}..."

5146

5147

5148class ZeroOrMore(_MultipleMatch):

5149 """

5150 Optional repetition of zero or more of the given expression.

5151

5152 Parameters:

5153

5154 - ``expr`` - expression that must match zero or more times

5155 - ``stop_on`` - expression for a terminating sentinel

5156 (only required if the sentinel would ordinarily match the repetition

5157 expression) - (default= ``None``)

5158

5159 Example: similar to :class:`OneOrMore`

5160 """

5161

5162 def __init__(

5163 self,

5164 expr: Union[str, ParserElement],

5165 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5166 *,

5167 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5168 ):

5169 super().__init__(expr, stopOn=stopOn or stop_on)

5170 self.mayReturnEmpty = True

5171

5172 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5173 try:

5174 return super().parseImpl(instring, loc, do_actions)

5175 except (ParseException, IndexError):

5176 return loc, ParseResults([], name=self.resultsName)

5177

5178 def _generateDefaultName(self) -> str:

5179 return f"[{self.expr}]..."

5180

5181

5182class DelimitedList(ParseElementEnhance):

5183 def __init__(

5184 self,

5185 expr: Union[str, ParserElement],

5186 delim: Union[str, ParserElement] = ",",

5187 combine: bool = False,

5188 min: typing.Optional[int] = None,

5189 max: typing.Optional[int] = None,

5190 *,

5191 allow_trailing_delim: bool = False,

5192 ):

5193 """Helper to define a delimited list of expressions - the delimiter

5194 defaults to ','. By default, the list elements and delimiters can

5195 have intervening whitespace, and comments, but this can be

5196 overridden by passing ``combine=True`` in the constructor. If

5197 ``combine`` is set to ``True``, the matching tokens are

5198 returned as a single token string, with the delimiters included;

5199 otherwise, the matching tokens are returned as a list of tokens,

5200 with the delimiters suppressed.

5201

5202 If ``allow_trailing_delim`` is set to True, then the list may end with

5203 a delimiter.

5204

5205 Example::

5206

5207 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5208 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5209 """

5210 if isinstance(expr, str_type):

5211 expr = ParserElement._literalStringClass(expr)

5212 expr = typing.cast(ParserElement, expr)

5213

5214 if min is not None and min < 1:

5215 raise ValueError("min must be greater than 0")

5216

5217 if max is not None and min is not None and max < min:

5218 raise ValueError("max must be greater than, or equal to min")

5219

5220 self.content = expr

5221 self.raw_delim = str(delim)

5222 self.delim = delim

5223 self.combine = combine

5224 if not combine:

5225 self.delim = Suppress(delim)

5226 self.min = min or 1

5227 self.max = max

5228 self.allow_trailing_delim = allow_trailing_delim

5229

5230 delim_list_expr = self.content + (self.delim + self.content) * (

5231 self.min - 1,

5232 None if self.max is None else self.max - 1,

5233 )

5234 if self.allow_trailing_delim:

5235 delim_list_expr += Opt(self.delim)

5236

5237 if self.combine:

5238 delim_list_expr = Combine(delim_list_expr)

5239

5240 super().__init__(delim_list_expr, savelist=True)

5241

5242 def _generateDefaultName(self) -> str:

5243 content_expr = self.content.streamline()

5244 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5245

5246

5247class _NullToken:

5248 def __bool__(self):

5249 return False

5250

5251 def __str__(self):

5252 return ""

5253

5254

5255class Opt(ParseElementEnhance):

5256 """

5257 Optional matching of the given expression.

5258

5259 Parameters:

5260

5261 - ``expr`` - expression that must match zero or more times

5262 - ``default`` (optional) - value to be returned if the optional expression is not found.

5263

5264 Example::

5265

5266 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5267 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5268 zip.run_tests('''

5269 # traditional ZIP code

5270 12345

5271

5272 # ZIP+4 form

5273 12101-0001

5274

5275 # invalid ZIP

5276 98765-

5277 ''')

5278

5279 prints::

5280

5281 # traditional ZIP code

5282 12345

5283 ['12345']

5284

5285 # ZIP+4 form

5286 12101-0001

5287 ['12101-0001']

5288

5289 # invalid ZIP

5290 98765-

5291 ^

5292 FAIL: Expected end of text (at char 5), (line:1, col:6)

5293 """

5294

5295 __optionalNotMatched = _NullToken()

5296

5297 def __init__(

5298 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5299 ):

5300 super().__init__(expr, savelist=False)

5301 self.saveAsList = self.expr.saveAsList

5302 self.defaultValue = default

5303 self.mayReturnEmpty = True

5304

5305 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5306 self_expr = self.expr

5307 try:

5308 loc, tokens = self_expr._parse(

5309 instring, loc, do_actions, callPreParse=False

5310 )

5311 except (ParseException, IndexError):

5312 default_value = self.defaultValue

5313 if default_value is not self.__optionalNotMatched:

5314 if self_expr.resultsName:

5315 tokens = ParseResults([default_value])

5316 tokens[self_expr.resultsName] = default_value

5317 else:

5318 tokens = [default_value] # type: ignore[assignment]

5319 else:

5320 tokens = [] # type: ignore[assignment]

5321 return loc, tokens

5322

5323 def _generateDefaultName(self) -> str:

5324 inner = str(self.expr)

5325 # strip off redundant inner {}'s

5326 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5327 inner = inner[1:-1]

5328 return f"[{inner}]"

5331Optional = Opt

5334class SkipTo(ParseElementEnhance):

5335 """

5336 Token for skipping over all undefined text until the matched

5337 expression is found.

5338

5339 Parameters:

5340

5341 - ``expr`` - target expression marking the end of the data to be skipped

5342 - ``include`` - if ``True``, the target expression is also parsed

5343 (the skipped text and target expression are returned as a 2-element

5344 list) (default= ``False``).

5345 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5346 comments) that might contain false matches to the target expression

5347 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5348 included in the skipped test; if found before the target expression is found,

5349 the :class:`SkipTo` is not a match

5350

5351 Example::

5352

5353 report = '''

5354 Outstanding Issues Report - 1 Jan 2000

5355

5356 # | Severity | Description | Days Open

5357 -----+----------+-------------------------------------------+-----------

5358 101 | Critical | Intermittent system crash | 6

5359 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5360 79 | Minor | System slow when running too many reports | 47

5361 '''

5362 integer = Word(nums)

5363 SEP = Suppress('|')

5364 # use SkipTo to simply match everything up until the next SEP

5365 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5366 # - parse action will call token.strip() for each matched token, i.e., the description body

5367 string_data = SkipTo(SEP, ignore=quoted_string)

5368 string_data.set_parse_action(token_map(str.strip))

5369 ticket_expr = (integer("issue_num") + SEP

5370 + string_data("sev") + SEP

5371 + string_data("desc") + SEP

5372 + integer("days_open"))

5373

5374 for tkt in ticket_expr.search_string(report):

5375 print tkt.dump()

5376

5377 prints::

5378

5379 ['101', 'Critical', 'Intermittent system crash', '6']

5380 - days_open: '6'

5381 - desc: 'Intermittent system crash'

5382 - issue_num: '101'

5383 - sev: 'Critical'

5384 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5385 - days_open: '14'

5386 - desc: "Spelling error on Login ('log|n')"

5387 - issue_num: '94'

5388 - sev: 'Cosmetic'

5389 ['79', 'Minor', 'System slow when running too many reports', '47']

5390 - days_open: '47'

5391 - desc: 'System slow when running too many reports'

5392 - issue_num: '79'

5393 - sev: 'Minor'

5394 """

5395

5396 def __init__(

5397 self,

5398 other: Union[ParserElement, str],

5399 include: bool = False,

5400 ignore: typing.Optional[Union[ParserElement, str]] = None,

5401 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5402 *,

5403 failOn: typing.Optional[Union[ParserElement, str]] = None,

5404 ):

5405 super().__init__(other)

5406 failOn = failOn or fail_on

5407 self.ignoreExpr = ignore

5408 self.mayReturnEmpty = True

5409 self.mayIndexError = False

5410 self.includeMatch = include

5411 self.saveAsList = False

5412 if isinstance(failOn, str_type):

5413 self.failOn = self._literalStringClass(failOn)

5414 else:

5415 self.failOn = failOn

5416 self.errmsg = f"No match found for {self.expr}"

5417 self.ignorer = Empty().leave_whitespace()

5418 self._update_ignorer()

5419

5420 def _update_ignorer(self):

5421 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5422 self.ignorer.ignoreExprs.clear()

5423 for e in self.expr.ignoreExprs:

5424 self.ignorer.ignore(e)

5425 if self.ignoreExpr:

5426 self.ignorer.ignore(self.ignoreExpr)

5427

5428 def ignore(self, expr):

5429 super().ignore(expr)

5430 self._update_ignorer()

5431

5432 def parseImpl(self, instring, loc, do_actions=True):

5433 startloc = loc

5434 instrlen = len(instring)

5435 self_expr_parse = self.expr._parse

5436 self_failOn_canParseNext = (

5437 self.failOn.canParseNext if self.failOn is not None else None

5438 )

5439 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5440

5441 tmploc = loc

5442 while tmploc <= instrlen:

5443 if self_failOn_canParseNext is not None:

5444 # break if failOn expression matches

5445 if self_failOn_canParseNext(instring, tmploc):

5446 break

5447

5448 if ignorer_try_parse is not None:

5449 # advance past ignore expressions

5450 prev_tmploc = tmploc

5451 while 1:

5452 try:

5453 tmploc = ignorer_try_parse(instring, tmploc)

5454 except ParseBaseException:

5455 break

5456 # see if all ignorers matched, but didn't actually ignore anything

5457 if tmploc == prev_tmploc:

5458 break

5459 prev_tmploc = tmploc

5460

5461 try:

5462 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5463 except (ParseException, IndexError):

5464 # no match, advance loc in string

5465 tmploc += 1

5466 else:

5467 # matched skipto expr, done

5468 break

5469

5470 else:

5471 # ran off the end of the input string without matching skipto expr, fail

5472 raise ParseException(instring, loc, self.errmsg, self)

5473

5474 # build up return values

5475 loc = tmploc

5476 skiptext = instring[startloc:loc]

5477 skipresult = ParseResults(skiptext)

5478

5479 if self.includeMatch:

5480 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5481 skipresult += mat

5482

5483 return loc, skipresult

5484

5485

5486class Forward(ParseElementEnhance):

5487 """

5488 Forward declaration of an expression to be defined later -

5489 used for recursive grammars, such as algebraic infix notation.

5490 When the expression is known, it is assigned to the ``Forward``

5491 variable using the ``'<<'`` operator.

5492

5493 Note: take care when assigning to ``Forward`` not to overlook

5494 precedence of operators.

5495

5496 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5497

5498 fwd_expr << a | b | c

5499

5500 will actually be evaluated as::

5501

5502 (fwd_expr << a) | b | c

5503

5504 thereby leaving b and c out as parseable alternatives. It is recommended that you

5505 explicitly group the values inserted into the ``Forward``::

5506

5507 fwd_expr << (a | b | c)

5508

5509 Converting to use the ``'<<='`` operator instead will avoid this problem.

5510

5511 See :class:`ParseResults.pprint` for an example of a recursive

5512 parser created using ``Forward``.

5513 """

5514

5515 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):

5516 self.caller_frame = traceback.extract_stack(limit=2)[0]

5517 super().__init__(other, savelist=False) # type: ignore[arg-type]

5518 self.lshift_line = None

5519

5520 def __lshift__(self, other) -> Forward:

5521 if hasattr(self, "caller_frame"):

5522 del self.caller_frame

5523 if isinstance(other, str_type):

5524 other = self._literalStringClass(other)

5525

5526 if not isinstance(other, ParserElement):

5527 return NotImplemented

5528

5529 self.expr = other

5530 self.streamlined = other.streamlined

5531 self.mayIndexError = self.expr.mayIndexError

5532 self.mayReturnEmpty = self.expr.mayReturnEmpty

5533 self.set_whitespace_chars(

5534 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5535 )

5536 self.skipWhitespace = self.expr.skipWhitespace

5537 self.saveAsList = self.expr.saveAsList

5538 self.ignoreExprs.extend(self.expr.ignoreExprs)

5539 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5540 return self

5541

5542 def __ilshift__(self, other) -> Forward:

5543 if not isinstance(other, ParserElement):

5544 return NotImplemented

5545

5546 return self << other

5547

5548 def __or__(self, other) -> ParserElement:

5549 caller_line = traceback.extract_stack(limit=2)[-2]

5550 if (

5551 __diag__.warn_on_match_first_with_lshift_operator

5552 and caller_line == self.lshift_line

5553 and Diagnostics.warn_on_match_first_with_lshift_operator

5554 not in self.suppress_warnings_

5555 ):

5556 warnings.warn(

5557 "warn_on_match_first_with_lshift_operator:"

5558 " using '<<' operator with '|' is probably an error, use '<<='",

5559 stacklevel=2,

5560 )

5561 ret = super().__or__(other)

5562 return ret

5563

5564 def __del__(self):

5565 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5566 if (

5567 self.expr is None

5568 and __diag__.warn_on_assignment_to_Forward

5569 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5570 ):

5571 warnings.warn_explicit(

5572 "warn_on_assignment_to_Forward:"

5573 " Forward defined here but no expression attached later using '<<=' or '<<'",

5574 UserWarning,

5575 filename=self.caller_frame.filename,

5576 lineno=self.caller_frame.lineno,

5577 )

5578

5579 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5580 if (

5581 self.expr is None

5582 and __diag__.warn_on_parse_using_empty_Forward

5583 and Diagnostics.warn_on_parse_using_empty_Forward

5584 not in self.suppress_warnings_

5585 ):

5586 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5587 parse_fns = (

5588 "parse_string",

5589 "scan_string",

5590 "search_string",

5591 "transform_string",

5592 )

5593 tb = traceback.extract_stack(limit=200)

5594 for i, frm in enumerate(reversed(tb), start=1):

5595 if frm.name in parse_fns:

5596 stacklevel = i + 1

5597 break

5598 else:

5599 stacklevel = 2

5600 warnings.warn(

5601 "warn_on_parse_using_empty_Forward:"

5602 " Forward expression was never assigned a value, will not parse any input",

5603 stacklevel=stacklevel,

5604 )

5605 if not ParserElement._left_recursion_enabled:

5606 return super().parseImpl(instring, loc, do_actions)

5607 # ## Bounded Recursion algorithm ##

5608 # Recursion only needs to be processed at ``Forward`` elements, since they are

5609 # the only ones that can actually refer to themselves. The general idea is

5610 # to handle recursion stepwise: We start at no recursion, then recurse once,

5611 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5612 #

5613 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5614 # - to *match* a specific recursion level, and

5615 # - to *search* the bounded recursion level

5616 # and the two run concurrently. The *search* must *match* each recursion level

5617 # to find the best possible match. This is handled by a memo table, which

5618 # provides the previous match to the next level match attempt.

5619 #

5620 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5621 #

5622 # There is a complication since we not only *parse* but also *transform* via

5623 # actions: We do not want to run the actions too often while expanding. Thus,

5624 # we expand using `do_actions=False` and only run `do_actions=True` if the next

5625 # recursion level is acceptable.

5626 with ParserElement.recursion_lock:

5627 memo = ParserElement.recursion_memos

5628 try:

5629 # we are parsing at a specific recursion expansion - use it as-is

5630 prev_loc, prev_result = memo[loc, self, do_actions]

5631 if isinstance(prev_result, Exception):

5632 raise prev_result

5633 return prev_loc, prev_result.copy()

5634 except KeyError:

5635 act_key = (loc, self, True)

5636 peek_key = (loc, self, False)

5637 # we are searching for the best recursion expansion - keep on improving

5638 # both `do_actions` cases must be tracked separately here!

5639 prev_loc, prev_peek = memo[peek_key] = (

5640 loc - 1,

5641 ParseException(

5642 instring, loc, "Forward recursion without base case", self

5643 ),

5644 )

5645 if do_actions:

5646 memo[act_key] = memo[peek_key]

5647 while True:

5648 try:

5649 new_loc, new_peek = super().parseImpl(instring, loc, False)

5650 except ParseException:

5651 # we failed before getting any match – do not hide the error

5652 if isinstance(prev_peek, Exception):

5653 raise

5654 new_loc, new_peek = prev_loc, prev_peek

5655 # the match did not get better: we are done

5656 if new_loc <= prev_loc:

5657 if do_actions:

5658 # replace the match for do_actions=False as well,

5659 # in case the action did backtrack

5660 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5661 del memo[peek_key], memo[act_key]

5662 return prev_loc, copy.copy(prev_result)

5663 del memo[peek_key]

5664 return prev_loc, copy.copy(prev_peek)

5665 # the match did get better: see if we can improve further

5666 if do_actions:

5667 try:

5668 memo[act_key] = super().parseImpl(instring, loc, True)

5669 except ParseException as e:

5670 memo[peek_key] = memo[act_key] = (new_loc, e)

5671 raise

5672 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5673

5674 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5675 self.skipWhitespace = False

5676 return self

5677

5678 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5679 self.skipWhitespace = True

5680 return self

5681

5682 def streamline(self) -> ParserElement:

5683 if not self.streamlined:

5684 self.streamlined = True

5685 if self.expr is not None:

5686 self.expr.streamline()

5687 return self

5688

5689 def validate(self, validateTrace=None) -> None:

5690 warnings.warn(

5691 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5692 DeprecationWarning,

5693 stacklevel=2,

5694 )

5695 if validateTrace is None:

5696 validateTrace = []

5697

5698 if self not in validateTrace:

5699 tmp = validateTrace[:] + [self]

5700 if self.expr is not None:

5701 self.expr.validate(tmp)

5702 self._checkRecursion([])

5703

5704 def _generateDefaultName(self) -> str:

5705 # Avoid infinite recursion by setting a temporary _defaultName

5706 self._defaultName = ": ..."

5707

5708 # Use the string representation of main expression.

5709 retString = "..."

5710 try:

5711 if self.expr is not None:

5712 retString = str(self.expr)[:1000]

5713 else:

5714 retString = "None"

5715 finally:

5716 return f"{type(self).__name__}: {retString}"

5717

5718 def copy(self) -> ParserElement:

5719 if self.expr is not None:

5720 return super().copy()

5721 else:

5722 ret = Forward()

5723 ret <<= self

5724 return ret

5725

5726 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5727 # fmt: off

5728 if (

5729 __diag__.warn_name_set_on_empty_Forward

5730 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5731 and self.expr is None

5732 ):

5733 warning = (

5734 "warn_name_set_on_empty_Forward:"

5735 f" setting results name {name!r} on {type(self).__name__} expression"

5736 " that has no contained expression"

5737 )

5738 warnings.warn(warning, stacklevel=3)

5739 # fmt: on

5740

5741 return super()._setResultsName(name, list_all_matches)

5742

5743 # Compatibility synonyms

5744 # fmt: off

5745 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5746 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5747 # fmt: on

5748

5749

5750class TokenConverter(ParseElementEnhance):

5751 """

5752 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

5753 """

5754

5755 def __init__(self, expr: Union[ParserElement, str], savelist=False):

5756 super().__init__(expr) # , savelist)

5757 self.saveAsList = False

5758

5759

5760class Combine(TokenConverter):

5761 """Converter to concatenate all matching tokens to a single string.

5762 By default, the matching patterns must also be contiguous in the

5763 input string; this can be disabled by specifying

5764 ``'adjacent=False'`` in the constructor.

5765

5766 Example::

5767

5768 real = Word(nums) + '.' + Word(nums)

5769 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5770 # will also erroneously match the following

5771 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5772

5773 real = Combine(Word(nums) + '.' + Word(nums))

5774 print(real.parse_string('3.1416')) # -> ['3.1416']

5775 # no match when there are internal spaces

5776 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5777 """

5778

5779 def __init__(

5780 self,

5781 expr: ParserElement,

5782 join_string: str = "",

5783 adjacent: bool = True,

5784 *,

5785 joinString: typing.Optional[str] = None,

5786 ):

5787 super().__init__(expr)

5788 joinString = joinString if joinString is not None else join_string

5789 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5790 if adjacent:

5791 self.leave_whitespace()

5792 self.adjacent = adjacent

5793 self.skipWhitespace = True

5794 self.joinString = joinString

5795 self.callPreparse = True

5796

5797 def ignore(self, other) -> ParserElement:

5798 if self.adjacent:

5799 ParserElement.ignore(self, other)

5800 else:

5801 super().ignore(other)

5802 return self

5803

5804 def postParse(self, instring, loc, tokenlist):

5805 retToks = tokenlist.copy()

5806 del retToks[:]

5807 retToks += ParseResults(

5808 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5809 )

5810

5811 if self.resultsName and retToks.haskeys():

5812 return [retToks]

5813 else:

5814 return retToks

5815

5816

5817class Group(TokenConverter):

5818 """Converter to return the matched tokens as a list - useful for

5819 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5820

5821 The optional ``aslist`` argument when set to True will return the

5822 parsed tokens as a Python list instead of a pyparsing ParseResults.

5823

5824 Example::

5825

5826 ident = Word(alphas)

5827 num = Word(nums)

5828 term = ident | num

5829 func = ident + Opt(DelimitedList(term))

5830 print(func.parse_string("fn a, b, 100"))

5831 # -> ['fn', 'a', 'b', '100']

5832

5833 func = ident + Group(Opt(DelimitedList(term)))

5834 print(func.parse_string("fn a, b, 100"))

5835 # -> ['fn', ['a', 'b', '100']]

5836 """

5837

5838 def __init__(self, expr: ParserElement, aslist: bool = False):

5839 super().__init__(expr)

5840 self.saveAsList = True

5841 self._asPythonList = aslist

5842

5843 def postParse(self, instring, loc, tokenlist):

5844 if self._asPythonList:

5845 return ParseResults.List(

5846 tokenlist.asList()

5847 if isinstance(tokenlist, ParseResults)

5848 else list(tokenlist)

5849 )

5850

5851 return [tokenlist]

5852

5853

5854class Dict(TokenConverter):

5855 """Converter to return a repetitive expression as a list, but also

5856 as a dictionary. Each element can also be referenced using the first

5857 token in the expression as its key. Useful for tabular report

5858 scraping when the first column can be used as a item key.

5859

5860 The optional ``asdict`` argument when set to True will return the

5861 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5862

5863 Example::

5864

5865 data_word = Word(alphas)

5866 label = data_word + FollowedBy(':')

5867

5868 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5869 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5870

5871 # print attributes as plain groups

5872 print(attr_expr[1, ...].parse_string(text).dump())

5873

5874 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5875 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5876 print(result.dump())

5877

5878 # access named fields as dict entries, or output as dict

5879 print(result['shape'])

5880 print(result.as_dict())

5881

5882 prints::

5883

5884 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5885 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5886 - color: 'light blue'

5887 - posn: 'upper left'

5888 - shape: 'SQUARE'

5889 - texture: 'burlap'

5890 SQUARE

5891 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5892

5893 See more examples at :class:`ParseResults` of accessing fields by results name.

5894 """

5895

5896 def __init__(self, expr: ParserElement, asdict: bool = False):

5897 super().__init__(expr)

5898 self.saveAsList = True

5899 self._asPythonDict = asdict

5900

5901 def postParse(self, instring, loc, tokenlist):

5902 for i, tok in enumerate(tokenlist):

5903 if len(tok) == 0:

5904 continue

5905

5906 ikey = tok[0]

5907 if isinstance(ikey, int):

5908 ikey = str(ikey).strip()

5909

5910 if len(tok) == 1:

5911 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5912

5913 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5914 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5915

5916 else:

5917 try:

5918 dictvalue = tok.copy() # ParseResults(i)

5919 except Exception:

5920 exc = TypeError(

5921 "could not extract dict values from parsed results"

5922 " - Dict expression must contain Grouped expressions"

5923 )

5924 raise exc from None

5925

5926 del dictvalue[0]

5927

5928 if len(dictvalue) != 1 or (

5929 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5930 ):

5931 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5932 else:

5933 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5934

5935 if self._asPythonDict:

5936 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

5937

5938 return [tokenlist] if self.resultsName else tokenlist

5939

5940

5941class Suppress(TokenConverter):

5942 """Converter for ignoring the results of a parsed expression.

5943

5944 Example::

5945

5946 source = "a, b, c,d"

5947 wd = Word(alphas)

5948 wd_list1 = wd + (',' + wd)[...]

5949 print(wd_list1.parse_string(source))

5950

5951 # often, delimiters that are useful during parsing are just in the

5952 # way afterward - use Suppress to keep them out of the parsed output

5953 wd_list2 = wd + (Suppress(',') + wd)[...]

5954 print(wd_list2.parse_string(source))

5955

5956 # Skipped text (using '...') can be suppressed as well

5957 source = "lead in START relevant text END trailing text"

5958 start_marker = Keyword("START")

5959 end_marker = Keyword("END")

5960 find_body = Suppress(...) + start_marker + ... + end_marker

5961 print(find_body.parse_string(source)

5962

5963 prints::

5964

5965 ['a', ',', 'b', ',', 'c', ',', 'd']

5966 ['a', 'b', 'c', 'd']

5967 ['START', 'relevant text ', 'END']

5968

5969 (See also :class:`DelimitedList`.)

5970 """

5971

5972 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

5973 if expr is ...:

5974 expr = _PendingSkip(NoMatch())

5975 super().__init__(expr)

5976

5977 def __add__(self, other) -> ParserElement:

5978 if isinstance(self.expr, _PendingSkip):

5979 return Suppress(SkipTo(other)) + other

5980

5981 return super().__add__(other)

5982

5983 def __sub__(self, other) -> ParserElement:

5984 if isinstance(self.expr, _PendingSkip):

5985 return Suppress(SkipTo(other)) - other

5986

5987 return super().__sub__(other)

5988

5989 def postParse(self, instring, loc, tokenlist):

5990 return []

5991

5992 def suppress(self) -> ParserElement:

5993 return self

5994

5995

5996def trace_parse_action(f: ParseAction) -> ParseAction:

5997 """Decorator for debugging parse actions.

5998

5999 When the parse action is called, this decorator will print

6000 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6001 When the parse action completes, the decorator will print

6002 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6003

6004 Example::

6005

6006 wd = Word(alphas)

6007

6008 @trace_parse_action

6009 def remove_duplicate_chars(tokens):

6010 return ''.join(sorted(set(''.join(tokens))))

6011

6012 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6013 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6014

6015 prints::

6016

6017 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6018 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6019 ['dfjkls']

6020 """

6021 f = _trim_arity(f)

6022

6023 def z(*paArgs):

6024 thisFunc = f.__name__

6025 s, l, t = paArgs[-3:]

6026 if len(paArgs) > 3:

6027 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6028 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6029 try:

6030 ret = f(*paArgs)

6031 except Exception as exc:

6032 sys.stderr.write(

6033 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6034 )

6035 raise

6036 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6037 return ret

6038

6039 z.__name__ = f.__name__

6040 return z

6041

6042

6043# convenience constants for positional expressions

6044empty = Empty().set_name("empty")

6045line_start = LineStart().set_name("line_start")

6046line_end = LineEnd().set_name("line_end")

6047string_start = StringStart().set_name("string_start")

6048string_end = StringEnd().set_name("string_end")

6049

6050_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6051 lambda s, l, t: t[0][1]

6052)

6053_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6054 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6055)

6056_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6057 lambda s, l, t: chr(int(t[0][1:], 8))

6058)

6059_singleChar = (

6060 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6061)

6062_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6063_reBracketExpr = (

6064 Literal("[")

6065 + Opt("^").set_results_name("negate")

6066 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6067 + Literal("]")

6068)

6069

6070

6071def srange(s: str) -> str:

6072 r"""Helper to easily define string ranges for use in :class:`Word`

6073 construction. Borrows syntax from regexp ``'[]'`` string range

6074 definitions::

6075

6076 srange("[0-9]") -> "0123456789"

6077 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6078 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6079

6080 The input string must be enclosed in []'s, and the returned string

6081 is the expanded character set joined into a single string. The

6082 values enclosed in the []'s may be:

6083

6084 - a single character

6085 - an escaped character with a leading backslash (such as ``\-``

6086 or ``\]``)

6087 - an escaped hex character with a leading ``'\x'``

6088 (``\x21``, which is a ``'!'`` character) (``\0x##``

6089 is also supported for backwards compatibility)

6090 - an escaped octal character with a leading ``'\0'``

6091 (``\041``, which is a ``'!'`` character)

6092 - a range of any of the above, separated by a dash (``'a-z'``,

6093 etc.)

6094 - any combination of the above (``'aeiouy'``,

6095 ``'a-zA-Z0-9_$'``, etc.)

6096 """

6097 _expanded = lambda p: (

6098 p

6099 if not isinstance(p, ParseResults)

6100 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6101 )

6102 try:

6103 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)

6104 except Exception as e:

6105 return ""

6106

6107

6108def token_map(func, *args) -> ParseAction:

6109 """Helper to define a parse action by mapping a function to all

6110 elements of a :class:`ParseResults` list. If any additional args are passed,

6111 they are forwarded to the given function as additional arguments

6112 after the token, as in

6113 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6114 which will convert the parsed data to an integer using base 16.

6115

6116 Example (compare the last to example in :class:`ParserElement.transform_string`::

6117

6118 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6119 hex_ints.run_tests('''

6120 00 11 22 aa FF 0a 0d 1a

6121 ''')

6122

6123 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6124 upperword[1, ...].run_tests('''

6125 my kingdom for a horse

6126 ''')

6127

6128 wd = Word(alphas).set_parse_action(token_map(str.title))

6129 wd[1, ...].set_parse_action(' '.join).run_tests('''

6130 now is the winter of our discontent made glorious summer by this sun of york

6131 ''')

6132

6133 prints::

6134

6135 00 11 22 aa FF 0a 0d 1a

6136 [0, 17, 34, 170, 255, 10, 13, 26]

6137

6138 my kingdom for a horse

6139 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6140

6141 now is the winter of our discontent made glorious summer by this sun of york

6142 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6143 """

6144

6145 def pa(s, l, t):

6146 return [func(tokn, *args) for tokn in t]

6147

6148 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6149 pa.__name__ = func_name

6150

6151 return pa

6152

6153

6154def autoname_elements() -> None:

6155 """

6156 Utility to simplify mass-naming of parser elements, for

6157 generating railroad diagram with named subdiagrams.

6158 """

6159 calling_frame = sys._getframe(1)

6160 if calling_frame is None:

6161 return

6162 calling_frame = typing.cast(types.FrameType, calling_frame)

6163 for name, var in calling_frame.f_locals.items():

6164 if isinstance(var, ParserElement) and not var.customName:

6165 var.set_name(name)

6166

6167

6168dbl_quoted_string = Combine(

6169 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6170).set_name("string enclosed in double quotes")

6171

6172sgl_quoted_string = Combine(

6173 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6174).set_name("string enclosed in single quotes")

6175

6176quoted_string = Combine(

6177 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6178 "double quoted string"

6179 )

6180 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6181 "single quoted string"

6182 )

6183).set_name("quoted string using single or double quotes")

6184

6185python_quoted_string = Combine(

6186 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6187 "multiline double quoted string"

6188 )

6189 ^ (

6190 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6191 ).set_name("multiline single quoted string")

6192 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6193 "double quoted string"

6194 )

6195 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6196 "single quoted string"

6197 )

6198).set_name("Python quoted string")

6199

6200unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6201

6202

6203alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6204punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6205

6206# build list of built-in expressions, for future reference if a global default value

6207# gets updated

6208_builtin_exprs: list[ParserElement] = [

6209 v for v in vars().values() if isinstance(v, ParserElement)

6210]

6211

6212# Compatibility synonyms

6213# fmt: off

6214sglQuotedString = sgl_quoted_string

6215dblQuotedString = dbl_quoted_string

6216quotedString = quoted_string

6217unicodeString = unicode_string

6218lineStart = line_start

6219lineEnd = line_end

6220stringStart = string_start

6221stringEnd = string_end

6222nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6223traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6224conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6225tokenMap = replaced_by_pep8("tokenMap", token_map)

6226# fmt: on