Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 43%

1812 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1813

1814 - ``exception_action`` - method to be called when expression fails to parse;

1815 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1816 """

1817 self.debugActions = self.DebugActions(

1818 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1819 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1820 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1821 )

1822 self.debug = True

1823 return self

1824

1825 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement":

1826 """

1827 Enable display of debugging messages while doing pattern matching.

1828 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1829 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1830

1831 Example::

1832

1833 wd = Word(alphas).set_name("alphaword")

1834 integer = Word(nums).set_name("numword")

1835 term = wd | integer

1836

1837 # turn on debugging for wd

1838 wd.set_debug()

1839

1840 term[1, ...].parse_string("abc 123 xyz 890")

1841

1842 prints::

1843

1844 Match alphaword at loc 0(1,1)

1845 Matched alphaword -> ['abc']

1846 Match alphaword at loc 3(1,4)

1847 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1848 Match alphaword at loc 7(1,8)

1849 Matched alphaword -> ['xyz']

1850 Match alphaword at loc 11(1,12)

1851 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1852 Match alphaword at loc 15(1,16)

1853 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1854

1855 The output shown is that produced by the default debug actions - custom debug actions can be

1856 specified using :class:`set_debug_actions`. Prior to attempting

1857 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1858 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1859 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1860 which makes debugging and exception messages easier to understand - for instance, the default

1861 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1862 """

1863 if recurse:

1864 for expr in self.visit_all():

1865 expr.set_debug(flag, recurse=False)

1866 return self

1867

1868 if flag:

1869 self.set_debug_actions(

1870 _default_start_debug_action,

1871 _default_success_debug_action,

1872 _default_exception_debug_action,

1873 )

1874 else:

1875 self.debug = False

1876 return self

1877

1878 @property

1879 def default_name(self) -> str:

1880 if self._defaultName is None:

1881 self._defaultName = self._generateDefaultName()

1882 return self._defaultName

1883

1884 @abstractmethod

1885 def _generateDefaultName(self) -> str:

1886 """

1887 Child classes must define this method, which defines how the ``default_name`` is set.

1888 """

1889

1890 def set_name(self, name: str) -> "ParserElement":

1891 """

1892 Define name for this expression, makes debugging and exception messages clearer.

1893

1894 Example::

1895

1896 integer = Word(nums)

1897 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1898

1899 integer.set_name("integer")

1900 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1901 """

1902 self.customName = name

1903 self.errmsg = f"Expected {self.name}"

1904 if __diag__.enable_debug_on_named_expressions:

1905 self.set_debug()

1906 return self

1907

1908 @property

1909 def name(self) -> str:

1910 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1911 return self.customName if self.customName is not None else self.default_name

1912

1913 def __str__(self) -> str:

1914 return self.name

1915

1916 def __repr__(self) -> str:

1917 return str(self)

1918

1919 def streamline(self) -> "ParserElement":

1920 self.streamlined = True

1921 self._defaultName = None

1922 return self

1923

1924 def recurse(self) -> List["ParserElement"]:

1925 return []

1926

1927 def _checkRecursion(self, parseElementList):

1928 subRecCheckList = parseElementList[:] + [self]

1929 for e in self.recurse():

1930 e._checkRecursion(subRecCheckList)

1931

1932 def validate(self, validateTrace=None) -> None:

1933 """

1934 Check defined expressions for valid structure, check for infinite recursive definitions.

1935 """

1936 warnings.warn(

1937 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

1938 DeprecationWarning,

1939 stacklevel=2,

1940 )

1941 self._checkRecursion([])

1942

1943 def parse_file(

1944 self,

1945 file_or_filename: Union[str, Path, TextIO],

1946 encoding: str = "utf-8",

1947 parse_all: bool = False,

1948 *,

1949 parseAll: bool = False,

1950 ) -> ParseResults:

1951 """

1952 Execute the parse expression on the given file or filename.

1953 If a filename is specified (instead of a file object),

1954 the entire file is opened, read, and closed before parsing.

1955 """

1956 parseAll = parseAll or parse_all

1957 try:

1958 file_or_filename = typing.cast(TextIO, file_or_filename)

1959 file_contents = file_or_filename.read()

1960 except AttributeError:

1961 file_or_filename = typing.cast(str, file_or_filename)

1962 with open(file_or_filename, "r", encoding=encoding) as f:

1963 file_contents = f.read()

1964 try:

1965 return self.parse_string(file_contents, parseAll)

1966 except ParseBaseException as exc:

1967 if ParserElement.verbose_stacktrace:

1968 raise

1969

1970 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1971 raise exc.with_traceback(None)

1972

1973 def __eq__(self, other):

1974 if self is other:

1975 return True

1976 elif isinstance(other, str_type):

1977 return self.matches(other, parse_all=True)

1978 elif isinstance(other, ParserElement):

1979 return vars(self) == vars(other)

1980 return False

1981

1982 def __hash__(self):

1983 return id(self)

1984

1985 def matches(

1986 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

1987 ) -> bool:

1988 """

1989 Method for quick testing of a parser against a test string. Good for simple

1990 inline microtests of sub expressions while building up larger parser.

1991

1992 Parameters:

1993

1994 - ``test_string`` - to test against this expression for a match

1995 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

1996

1997 Example::

1998

1999 expr = Word(nums)

2000 assert expr.matches("100")

2001 """

2002 parseAll = parseAll and parse_all

2003 try:

2004 self.parse_string(str(test_string), parse_all=parseAll)

2005 return True

2006 except ParseBaseException:

2007 return False

2008

2009 def run_tests(

2010 self,

2011 tests: Union[str, List[str]],

2012 parse_all: bool = True,

2013 comment: typing.Optional[Union["ParserElement", str]] = "#",

2014 full_dump: bool = True,

2015 print_results: bool = True,

2016 failure_tests: bool = False,

2017 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None,

2018 file: typing.Optional[TextIO] = None,

2019 with_line_numbers: bool = False,

2020 *,

2021 parseAll: bool = True,

2022 fullDump: bool = True,

2023 printResults: bool = True,

2024 failureTests: bool = False,

2025 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None,

2026 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:

2027 """

2028 Execute the parse expression on a series of test strings, showing each

2029 test, the parsed results or where the parse failed. Quick and easy way to

2030 run a parse expression against a list of sample strings.

2031

2032 Parameters:

2033

2034 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2035 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2036 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2037 string; pass None to disable comment filtering

2038 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2039 if False, only dump nested list

2040 - ``print_results`` - (default= ``True``) prints test output to stdout

2041 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2042 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2043 `fn(test_string, parse_results)` and returns a string to be added to the test output

2044 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2045 if None, will default to ``sys.stdout``

2046 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2047

2048 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2049 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2050 test's output

2051

2052 Example::

2053

2054 number_expr = pyparsing_common.number.copy()

2055

2056 result = number_expr.run_tests('''

2057 # unsigned integer

2058 100

2059 # negative integer

2060 -100

2061 # float with scientific notation

2062 6.02e23

2063 # integer with scientific notation

2064 1e-12

2065 ''')

2066 print("Success" if result[0] else "Failed!")

2067

2068 result = number_expr.run_tests('''

2069 # stray character

2070 100Z

2071 # missing leading digit before '.'

2072 -.100

2073 # too many '.'

2074 3.14.159

2075 ''', failure_tests=True)

2076 print("Success" if result[0] else "Failed!")

2077

2078 prints::

2079

2080 # unsigned integer

2081 100

2082 [100]

2083

2084 # negative integer

2085 -100

2086 [-100]

2087

2088 # float with scientific notation

2089 6.02e23

2090 [6.02e+23]

2091

2092 # integer with scientific notation

2093 1e-12

2094 [1e-12]

2095

2096 Success

2097

2098 # stray character

2099 100Z

2100 ^

2101 FAIL: Expected end of text (at char 3), (line:1, col:4)

2102

2103 # missing leading digit before '.'

2104 -.100

2105 ^

2106 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2107

2108 # too many '.'

2109 3.14.159

2110 ^

2111 FAIL: Expected end of text (at char 4), (line:1, col:5)

2112

2113 Success

2114

2115 Each test string must be on a single line. If you want to test a string that spans multiple

2116 lines, create a test like this::

2117

2118 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2119

2120 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2121 """

2122 from .testing import pyparsing_test

2123

2124 parseAll = parseAll and parse_all

2125 fullDump = fullDump and full_dump

2126 printResults = printResults and print_results

2127 failureTests = failureTests or failure_tests

2128 postParse = postParse or post_parse

2129 if isinstance(tests, str_type):

2130 tests = typing.cast(str, tests)

2131 line_strip = type(tests).strip

2132 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2133 comment_specified = comment is not None

2134 if comment_specified:

2135 if isinstance(comment, str_type):

2136 comment = typing.cast(str, comment)

2137 comment = Literal(comment)

2138 comment = typing.cast(ParserElement, comment)

2139 if file is None:

2140 file = sys.stdout

2141 print_ = file.write

2142

2143 result: Union[ParseResults, Exception]

2144 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = []

2145 comments: List[str] = []

2146 success = True

2147 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2148 BOM = "\ufeff"

2149 nlstr = "\n"

2150 for t in tests:

2151 if comment_specified and comment.matches(t, False) or comments and not t:

2152 comments.append(

2153 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2154 )

2155 continue

2156 if not t:

2157 continue

2158 out = [

2159 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2160 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2161 ]

2162 comments = []

2163 try:

2164 # convert newline marks to actual newlines, and strip leading BOM if present

2165 t = NL.transform_string(t.lstrip(BOM))

2166 result = self.parse_string(t, parse_all=parseAll)

2167 except ParseBaseException as pe:

2168 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2169 out.append(pe.explain())

2170 out.append(f"FAIL: {fatal}{pe}")

2171 if ParserElement.verbose_stacktrace:

2172 out.extend(traceback.format_tb(pe.__traceback__))

2173 success = success and failureTests

2174 result = pe

2175 except Exception as exc:

2176 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}")

2177 if ParserElement.verbose_stacktrace:

2178 out.extend(traceback.format_tb(exc.__traceback__))

2179 success = success and failureTests

2180 result = exc

2181 else:

2182 success = success and not failureTests

2183 if postParse is not None:

2184 try:

2185 pp_value = postParse(t, result)

2186 if pp_value is not None:

2187 if isinstance(pp_value, ParseResults):

2188 out.append(pp_value.dump())

2189 else:

2190 out.append(str(pp_value))

2191 else:

2192 out.append(result.dump())

2193 except Exception as e:

2194 out.append(result.dump(full=fullDump))

2195 out.append(

2196 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2197 )

2198 else:

2199 out.append(result.dump(full=fullDump))

2200 out.append("")

2201

2202 if printResults:

2203 print_("\n".join(out))

2204

2205 allResults.append((t, result))

2206

2207 return success, allResults

2208

2209 def create_diagram(

2210 self,

2211 output_html: Union[TextIO, Path, str],

2212 vertical: int = 3,

2213 show_results_names: bool = False,

2214 show_groups: bool = False,

2215 embed: bool = False,

2216 **kwargs,

2217 ) -> None:

2218 """

2219 Create a railroad diagram for the parser.

2220

2221 Parameters:

2222

2223 - ``output_html`` (str or file-like object) - output target for generated

2224 diagram HTML

2225 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2226 instead of horizontally (default=3)

2227 - ``show_results_names`` - bool flag whether diagram should show annotations for

2228 defined results names

2229 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2230 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2231 the resulting HTML in an enclosing HTML source

2232 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2233 can be used to insert custom CSS styling

2234 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2235 generated code

2236

2237 Additional diagram-formatting keyword arguments can also be included;

2238 see railroad.Diagram class.

2239 """

2240

2241 try:

2242 from .diagram import to_railroad, railroad_to_html

2243 except ImportError as ie:

2244 raise Exception(

2245 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2246 ) from ie

2247

2248 self.streamline()

2249

2250 railroad = to_railroad(

2251 self,

2252 vertical=vertical,

2253 show_results_names=show_results_names,

2254 show_groups=show_groups,

2255 diagram_kwargs=kwargs,

2256 )

2257 if not isinstance(output_html, (str, Path)):

2258 # we were passed a file-like object, just write to it

2259 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2260 return

2261

2262 with open(output_html, "w", encoding="utf-8") as diag_file:

2263 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2264

2265 # Compatibility synonyms

2266 # fmt: off

2267 inlineLiteralsUsing = replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)

2268 setDefaultWhitespaceChars = replaced_by_pep8(

2269 "setDefaultWhitespaceChars", set_default_whitespace_chars

2270 )

2271 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2272 setBreak = replaced_by_pep8("setBreak", set_break)

2273 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2274 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2275 addCondition = replaced_by_pep8("addCondition", add_condition)

2276 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2277 tryParse = replaced_by_pep8("tryParse", try_parse)

2278 enableLeftRecursion = replaced_by_pep8("enableLeftRecursion", enable_left_recursion)

2279 enablePackrat = replaced_by_pep8("enablePackrat", enable_packrat)

2280 parseString = replaced_by_pep8("parseString", parse_string)

2281 scanString = replaced_by_pep8("scanString", scan_string)

2282 transformString = replaced_by_pep8("transformString", transform_string)

2283 searchString = replaced_by_pep8("searchString", search_string)

2284 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2285 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2286 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2287 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2288 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2289 setDebug = replaced_by_pep8("setDebug", set_debug)

2290 setName = replaced_by_pep8("setName", set_name)

2291 parseFile = replaced_by_pep8("parseFile", parse_file)

2292 runTests = replaced_by_pep8("runTests", run_tests)

2293 canParseNext = can_parse_next

2294 resetCache = reset_cache

2295 defaultName = default_name

2296 # fmt: on

2297

2298

2299class _PendingSkip(ParserElement):

2300 # internal placeholder class to hold a place were '...' is added to a parser element,

2301 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2302 def __init__(self, expr: ParserElement, must_skip: bool = False):

2303 super().__init__()

2304 self.anchor = expr

2305 self.must_skip = must_skip

2306

2307 def _generateDefaultName(self) -> str:

2308 return str(self.anchor + Empty()).replace("Empty", "...")

2309

2310 def __add__(self, other) -> "ParserElement":

2311 skipper = SkipTo(other).set_name("...")("_skipped*")

2312 if self.must_skip:

2313

2314 def must_skip(t):

2315 if not t._skipped or t._skipped.as_list() == [""]:

2316 del t[0]

2317 t.pop("_skipped", None)

2318

2319 def show_skip(t):

2320 if t._skipped.as_list()[-1:] == [""]:

2321 t.pop("_skipped")

2322 t["_skipped"] = f"missing <{self.anchor!r}>"

2323

2324 return (

2325 self.anchor + skipper().add_parse_action(must_skip)

2326 | skipper().add_parse_action(show_skip)

2327 ) + other

2328

2329 return self.anchor + skipper + other

2330

2331 def __repr__(self):

2332 return self.defaultName

2333

2334 def parseImpl(self, *args):

2335 raise Exception(

2336 "use of `...` expression without following SkipTo target expression"

2337 )

2338

2339

2340class Token(ParserElement):

2341 """Abstract :class:`ParserElement` subclass, for defining atomic

2342 matching patterns.

2343 """

2344

2345 def __init__(self):

2346 super().__init__(savelist=False)

2347

2348 def _generateDefaultName(self) -> str:

2349 return type(self).__name__

2350

2351

2352class NoMatch(Token):

2353 """

2354 A token that will never match.

2355 """

2356

2357 def __init__(self):

2358 super().__init__()

2359 self.mayReturnEmpty = True

2360 self.mayIndexError = False

2361 self.errmsg = "Unmatchable token"

2362

2363 def parseImpl(self, instring, loc, doActions=True):

2364 raise ParseException(instring, loc, self.errmsg, self)

2365

2366

2367class Literal(Token):

2368 """

2369 Token to exactly match a specified string.

2370

2371 Example::

2372

2373 Literal('abc').parse_string('abc') # -> ['abc']

2374 Literal('abc').parse_string('abcdef') # -> ['abc']

2375 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"

2376

2377 For case-insensitive matching, use :class:`CaselessLiteral`.

2378

2379 For keyword matching (force word break before and after the matched string),

2380 use :class:`Keyword` or :class:`CaselessKeyword`.

2381 """

2382

2383 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2384 # Performance tuning: select a subclass with optimized parseImpl

2385 if cls is Literal:

2386 match_string = matchString or match_string

2387 if not match_string:

2388 return super().__new__(Empty)

2389 if len(match_string) == 1:

2390 return super().__new__(_SingleCharLiteral)

2391

2392 # Default behavior

2393 return super().__new__(cls)

2394

2395 # Needed to make copy.copy() work correctly if we customize __new__

2396 def __getnewargs__(self):

2397 return (self.match,)

2398

2399 def __init__(self, match_string: str = "", *, matchString: str = ""):

2400 super().__init__()

2401 match_string = matchString or match_string

2402 self.match = match_string

2403 self.matchLen = len(match_string)

2404 self.firstMatchChar = match_string[:1]

2405 self.errmsg = f"Expected {self.name}"

2406 self.mayReturnEmpty = False

2407 self.mayIndexError = False

2408

2409 def _generateDefaultName(self) -> str:

2410 return repr(self.match)

2411

2412 def parseImpl(self, instring, loc, doActions=True):

2413 if instring[loc] == self.firstMatchChar and instring.startswith(

2414 self.match, loc

2415 ):

2416 return loc + self.matchLen, self.match

2417 raise ParseException(instring, loc, self.errmsg, self)

2418

2419

2420class Empty(Literal):

2421 """

2422 An empty token, will always match.

2423 """

2424

2425 def __init__(self, match_string="", *, matchString=""):

2426 super().__init__("")

2427 self.mayReturnEmpty = True

2428 self.mayIndexError = False

2429

2430 def _generateDefaultName(self) -> str:

2431 return "Empty"

2432

2433 def parseImpl(self, instring, loc, doActions=True):

2434 return loc, []

2435

2436

2437class _SingleCharLiteral(Literal):

2438 def parseImpl(self, instring, loc, doActions=True):

2439 if instring[loc] == self.firstMatchChar:

2440 return loc + 1, self.match

2441 raise ParseException(instring, loc, self.errmsg, self)

2442

2443

2444ParserElement._literalStringClass = Literal

2445

2446

2447class Keyword(Token):

2448 """

2449 Token to exactly match a specified string as a keyword, that is,

2450 it must be immediately preceded and followed by whitespace or

2451 non-keyword characters. Compare with :class:`Literal`:

2452

2453 - ``Literal("if")`` will match the leading ``'if'`` in

2454 ``'ifAndOnlyIf'``.

2455 - ``Keyword("if")`` will not; it will only match the leading

2456 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2457

2458 Accepts two optional constructor arguments in addition to the

2459 keyword string:

2460

2461 - ``ident_chars`` is a string of characters that would be valid

2462 identifier characters, defaulting to all alphanumerics + "_" and

2463 "$"

2464 - ``caseless`` allows case-insensitive matching, default is ``False``.

2465

2466 Example::

2467

2468 Keyword("start").parse_string("start") # -> ['start']

2469 Keyword("start").parse_string("starting") # -> Exception

2470

2471 For case-insensitive matching, use :class:`CaselessKeyword`.

2472 """

2473

2474 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2475

2476 def __init__(

2477 self,

2478 match_string: str = "",

2479 ident_chars: typing.Optional[str] = None,

2480 caseless: bool = False,

2481 *,

2482 matchString: str = "",

2483 identChars: typing.Optional[str] = None,

2484 ):

2485 super().__init__()

2486 identChars = identChars or ident_chars

2487 if identChars is None:

2488 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2489 match_string = matchString or match_string

2490 self.match = match_string

2491 self.matchLen = len(match_string)

2492 try:

2493 self.firstMatchChar = match_string[0]

2494 except IndexError:

2495 raise ValueError("null string passed to Keyword; use Empty() instead")

2496 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2497 self.mayReturnEmpty = False

2498 self.mayIndexError = False

2499 self.caseless = caseless

2500 if caseless:

2501 self.caselessmatch = match_string.upper()

2502 identChars = identChars.upper()

2503 self.identChars = set(identChars)

2504

2505 def _generateDefaultName(self) -> str:

2506 return repr(self.match)

2507

2508 def parseImpl(self, instring, loc, doActions=True):

2509 errmsg = self.errmsg

2510 errloc = loc

2511 if self.caseless:

2512 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2513 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2514 if (

2515 loc >= len(instring) - self.matchLen

2516 or instring[loc + self.matchLen].upper() not in self.identChars

2517 ):

2518 return loc + self.matchLen, self.match

2519

2520 # followed by keyword char

2521 errmsg += ", was immediately followed by keyword character"

2522 errloc = loc + self.matchLen

2523 else:

2524 # preceded by keyword char

2525 errmsg += ", keyword was immediately preceded by keyword character"

2526 errloc = loc - 1

2527 # else no match just raise plain exception

2528

2529 elif (

2530 instring[loc] == self.firstMatchChar

2531 and self.matchLen == 1

2532 or instring.startswith(self.match, loc)

2533 ):

2534 if loc == 0 or instring[loc - 1] not in self.identChars:

2535 if (

2536 loc >= len(instring) - self.matchLen

2537 or instring[loc + self.matchLen] not in self.identChars

2538 ):

2539 return loc + self.matchLen, self.match

2540

2541 # followed by keyword char

2542 errmsg += ", keyword was immediately followed by keyword character"

2543 errloc = loc + self.matchLen

2544 else:

2545 # preceded by keyword char

2546 errmsg += ", keyword was immediately preceded by keyword character"

2547 errloc = loc - 1

2548 # else no match just raise plain exception

2549

2550 raise ParseException(instring, errloc, errmsg, self)

2551

2552 @staticmethod

2553 def set_default_keyword_chars(chars) -> None:

2554 """

2555 Overrides the default characters used by :class:`Keyword` expressions.

2556 """

2557 Keyword.DEFAULT_KEYWORD_CHARS = chars

2558

2559 setDefaultKeywordChars = set_default_keyword_chars

2560

2561

2562class CaselessLiteral(Literal):

2563 """

2564 Token to match a specified string, ignoring case of letters.

2565 Note: the matched results will always be in the case of the given

2566 match string, NOT the case of the input text.

2567

2568 Example::

2569

2570 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2571 # -> ['CMD', 'CMD', 'CMD']

2572

2573 (Contrast with example for :class:`CaselessKeyword`.)

2574 """

2575

2576 def __init__(self, match_string: str = "", *, matchString: str = ""):

2577 match_string = matchString or match_string

2578 super().__init__(match_string.upper())

2579 # Preserve the defining literal.

2580 self.returnString = match_string

2581 self.errmsg = f"Expected {self.name}"

2582

2583 def parseImpl(self, instring, loc, doActions=True):

2584 if instring[loc : loc + self.matchLen].upper() == self.match:

2585 return loc + self.matchLen, self.returnString

2586 raise ParseException(instring, loc, self.errmsg, self)

2587

2588

2589class CaselessKeyword(Keyword):

2590 """

2591 Caseless version of :class:`Keyword`.

2592

2593 Example::

2594

2595 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2596 # -> ['CMD', 'CMD']

2597

2598 (Contrast with example for :class:`CaselessLiteral`.)

2599 """

2600

2601 def __init__(

2602 self,

2603 match_string: str = "",

2604 ident_chars: typing.Optional[str] = None,

2605 *,

2606 matchString: str = "",

2607 identChars: typing.Optional[str] = None,

2608 ):

2609 identChars = identChars or ident_chars

2610 match_string = matchString or match_string

2611 super().__init__(match_string, identChars, caseless=True)

2612

2613

2614class CloseMatch(Token):

2615 """A variation on :class:`Literal` which matches "close" matches,

2616 that is, strings with at most 'n' mismatching characters.

2617 :class:`CloseMatch` takes parameters:

2618

2619 - ``match_string`` - string to be matched

2620 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2621 - ``max_mismatches`` - (``default=1``) maximum number of

2622 mismatches allowed to count as a match

2623

2624 The results from a successful parse will contain the matched text

2625 from the input string and the following named results:

2626

2627 - ``mismatches`` - a list of the positions within the

2628 match_string where mismatches were found

2629 - ``original`` - the original match_string used to compare

2630 against the input string

2631

2632 If ``mismatches`` is an empty list, then the match was an exact

2633 match.

2634

2635 Example::

2636

2637 patt = CloseMatch("ATCATCGAATGGA")

2638 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2639 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2640

2641 # exact match

2642 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2643

2644 # close match allowing up to 2 mismatches

2645 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2646 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2647 """

2648

2649 def __init__(

2650 self,

2651 match_string: str,

2652 max_mismatches: typing.Optional[int] = None,

2653 *,

2654 maxMismatches: int = 1,

2655 caseless=False,

2656 ):

2657 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2658 super().__init__()

2659 self.match_string = match_string

2660 self.maxMismatches = maxMismatches

2661 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2662 self.caseless = caseless

2663 self.mayIndexError = False

2664 self.mayReturnEmpty = False

2665

2666 def _generateDefaultName(self) -> str:

2667 return f"{type(self).__name__}:{self.match_string!r}"

2668

2669 def parseImpl(self, instring, loc, doActions=True):

2670 start = loc

2671 instrlen = len(instring)

2672 maxloc = start + len(self.match_string)

2673

2674 if maxloc <= instrlen:

2675 match_string = self.match_string

2676 match_stringloc = 0

2677 mismatches = []

2678 maxMismatches = self.maxMismatches

2679

2680 for match_stringloc, s_m in enumerate(

2681 zip(instring[loc:maxloc], match_string)

2682 ):

2683 src, mat = s_m

2684 if self.caseless:

2685 src, mat = src.lower(), mat.lower()

2686

2687 if src != mat:

2688 mismatches.append(match_stringloc)

2689 if len(mismatches) > maxMismatches:

2690 break

2691 else:

2692 loc = start + match_stringloc + 1

2693 results = ParseResults([instring[start:loc]])

2694 results["original"] = match_string

2695 results["mismatches"] = mismatches

2696 return loc, results

2697

2698 raise ParseException(instring, loc, self.errmsg, self)

2699

2700

2701class Word(Token):

2702 """Token for matching words composed of allowed character sets.

2703

2704 Parameters:

2705

2706 - ``init_chars`` - string of all characters that should be used to

2707 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2708 if ``body_chars`` is also specified, then this is the string of

2709 initial characters

2710 - ``body_chars`` - string of characters that

2711 can be used for matching after a matched initial character as

2712 given in ``init_chars``; if omitted, same as the initial characters

2713 (default=``None``)

2714 - ``min`` - minimum number of characters to match (default=1)

2715 - ``max`` - maximum number of characters to match (default=0)

2716 - ``exact`` - exact number of characters to match (default=0)

2717 - ``as_keyword`` - match as a keyword (default=``False``)

2718 - ``exclude_chars`` - characters that might be

2719 found in the input ``body_chars`` string but which should not be

2720 accepted for matching ;useful to define a word of all

2721 printables except for one or two characters, for instance

2722 (default=``None``)

2723

2724 :class:`srange` is useful for defining custom character set strings

2725 for defining :class:`Word` expressions, using range notation from

2726 regular expression character sets.

2727

2728 A common mistake is to use :class:`Word` to match a specific literal

2729 string, as in ``Word("Address")``. Remember that :class:`Word`

2730 uses the string argument to define *sets* of matchable characters.

2731 This expression would match "Add", "AAA", "dAred", or any other word

2732 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2733 exact literal string, use :class:`Literal` or :class:`Keyword`.

2734

2735 pyparsing includes helper strings for building Words:

2736

2737 - :class:`alphas`

2738 - :class:`nums`

2739 - :class:`alphanums`

2740 - :class:`hexnums`

2741 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2742 - accented, tilded, umlauted, etc.)

2743 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2744 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2745 - :class:`printables` (any non-whitespace character)

2746

2747 ``alphas``, ``nums``, and ``printables`` are also defined in several

2748 Unicode sets - see :class:`pyparsing_unicode``.

2749

2750 Example::

2751

2752 # a word composed of digits

2753 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2754

2755 # a word with a leading capital, and zero or more lowercase

2756 capitalized_word = Word(alphas.upper(), alphas.lower())

2757

2758 # hostnames are alphanumeric, with leading alpha, and '-'

2759 hostname = Word(alphas, alphanums + '-')

2760

2761 # roman numeral (not a strict parser, accepts invalid mix of characters)

2762 roman = Word("IVXLCDM")

2763

2764 # any string of non-whitespace characters, except for ','

2765 csv_value = Word(printables, exclude_chars=",")

2766 """

2767

2768 def __init__(

2769 self,

2770 init_chars: str = "",

2771 body_chars: typing.Optional[str] = None,

2772 min: int = 1,

2773 max: int = 0,

2774 exact: int = 0,

2775 as_keyword: bool = False,

2776 exclude_chars: typing.Optional[str] = None,

2777 *,

2778 initChars: typing.Optional[str] = None,

2779 bodyChars: typing.Optional[str] = None,

2780 asKeyword: bool = False,

2781 excludeChars: typing.Optional[str] = None,

2782 ):

2783 initChars = initChars or init_chars

2784 bodyChars = bodyChars or body_chars

2785 asKeyword = asKeyword or as_keyword

2786 excludeChars = excludeChars or exclude_chars

2787 super().__init__()

2788 if not initChars:

2789 raise ValueError(

2790 f"invalid {type(self).__name__}, initChars cannot be empty string"

2791 )

2792

2793 initChars_set = set(initChars)

2794 if excludeChars:

2795 excludeChars_set = set(excludeChars)

2796 initChars_set -= excludeChars_set

2797 if bodyChars:

2798 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2799 self.initChars = initChars_set

2800 self.initCharsOrig = "".join(sorted(initChars_set))

2801

2802 if bodyChars:

2803 self.bodyChars = set(bodyChars)

2804 self.bodyCharsOrig = "".join(sorted(bodyChars))

2805 else:

2806 self.bodyChars = initChars_set

2807 self.bodyCharsOrig = self.initCharsOrig

2808

2809 self.maxSpecified = max > 0

2810

2811 if min < 1:

2812 raise ValueError(

2813 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2814 )

2815

2816 if self.maxSpecified and min > max:

2817 raise ValueError(

2818 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2819 )

2820

2821 self.minLen = min

2822

2823 if max > 0:

2824 self.maxLen = max

2825 else:

2826 self.maxLen = _MAX_INT

2827

2828 if exact > 0:

2829 min = max = exact

2830 self.maxLen = exact

2831 self.minLen = exact

2832

2833 self.errmsg = f"Expected {self.name}"

2834 self.mayIndexError = False

2835 self.asKeyword = asKeyword

2836 if self.asKeyword:

2837 self.errmsg += " as a keyword"

2838

2839 # see if we can make a regex for this Word

2840 if " " not in (self.initChars | self.bodyChars):

2841 if len(self.initChars) == 1:

2842 re_leading_fragment = re.escape(self.initCharsOrig)

2843 else:

2844 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2845

2846 if self.bodyChars == self.initChars:

2847 if max == 0 and self.minLen == 1:

2848 repeat = "+"

2849 elif max == 1:

2850 repeat = ""

2851 else:

2852 if self.minLen != self.maxLen:

2853 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2854 else:

2855 repeat = f"{{{self.minLen}}}"

2856 self.reString = f"{re_leading_fragment}{repeat}"

2857 else:

2858 if max == 1:

2859 re_body_fragment = ""

2860 repeat = ""

2861 else:

2862 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2863 if max == 0 and self.minLen == 1:

2864 repeat = "*"

2865 elif max == 2:

2866 repeat = "?" if min <= 1 else ""

2867 else:

2868 if min != max:

2869 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2870 else:

2871 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2872

2873 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2874

2875 if self.asKeyword:

2876 self.reString = rf"\b{self.reString}\b"

2877

2878 try:

2879 self.re = re.compile(self.reString)

2880 except re.error:

2881 self.re = None # type: ignore[assignment]

2882 else:

2883 self.re_match = self.re.match

2884 self.parseImpl = self.parseImpl_regex # type: ignore[assignment]

2885

2886 def _generateDefaultName(self) -> str:

2887 def charsAsStr(s):

2888 max_repr_len = 16

2889 s = _collapse_string_to_ranges(s, re_escape=False)

2890

2891 if len(s) > max_repr_len:

2892 return s[: max_repr_len - 3] + "..."

2893

2894 return s

2895

2896 if self.initChars != self.bodyChars:

2897 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2898 else:

2899 base = f"W:({charsAsStr(self.initChars)})"

2900

2901 # add length specification

2902 if self.minLen > 1 or self.maxLen != _MAX_INT:

2903 if self.minLen == self.maxLen:

2904 if self.minLen == 1:

2905 return base[2:]

2906 else:

2907 return base + f"{{{self.minLen}}}"

2908 elif self.maxLen == _MAX_INT:

2909 return base + f"{{{self.minLen},...}}"

2910 else:

2911 return base + f"{{{self.minLen},{self.maxLen}}}"

2912 return base

2913

2914 def parseImpl(self, instring, loc, doActions=True):

2915 if instring[loc] not in self.initChars:

2916 raise ParseException(instring, loc, self.errmsg, self)

2917

2918 start = loc

2919 loc += 1

2920 instrlen = len(instring)

2921 bodychars = self.bodyChars

2922 maxloc = start + self.maxLen

2923 maxloc = min(maxloc, instrlen)

2924 while loc < maxloc and instring[loc] in bodychars:

2925 loc += 1

2926

2927 throwException = False

2928 if loc - start < self.minLen:

2929 throwException = True

2930 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

2931 throwException = True

2932 elif self.asKeyword and (

2933 (start > 0 and instring[start - 1] in bodychars)

2934 or (loc < instrlen and instring[loc] in bodychars)

2935 ):

2936 throwException = True

2937

2938 if throwException:

2939 raise ParseException(instring, loc, self.errmsg, self)

2940

2941 return loc, instring[start:loc]

2942

2943 def parseImpl_regex(self, instring, loc, doActions=True):

2944 result = self.re_match(instring, loc)

2945 if not result:

2946 raise ParseException(instring, loc, self.errmsg, self)

2947

2948 loc = result.end()

2949 return loc, result.group()

2950

2951

2952class Char(Word):

2953 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

2954 when defining a match of any single character in a string of

2955 characters.

2956 """

2957

2958 def __init__(

2959 self,

2960 charset: str,

2961 as_keyword: bool = False,

2962 exclude_chars: typing.Optional[str] = None,

2963 *,

2964 asKeyword: bool = False,

2965 excludeChars: typing.Optional[str] = None,

2966 ):

2967 asKeyword = asKeyword or as_keyword

2968 excludeChars = excludeChars or exclude_chars

2969 super().__init__(

2970 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

2971 )

2972

2973

2974class Regex(Token):

2975 r"""Token for matching strings that match a given regular

2976 expression. Defined with string specifying the regular expression in

2977 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

2978 If the given regex contains named groups (defined using ``(?P<name>...)``),

2979 these will be preserved as named :class:`ParseResults`.

2980

2981 If instead of the Python stdlib ``re`` module you wish to use a different RE module

2982 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

2983 a compiled RE that was compiled using ``regex``.

2984

2985 Example::

2986

2987 realnum = Regex(r"[+-]?\d+\.\d*")

2988 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

2989 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

2990

2991 # named fields in a regex will be returned as named results

2992 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

2993

2994 # the Regex class will accept re's compiled using the regex module

2995 import regex

2996 parser = pp.Regex(regex.compile(r'[0-9]'))

2997 """

2998

2999 def __init__(

3000 self,

3001 pattern: Any,

3002 flags: Union[re.RegexFlag, int] = 0,

3003 as_group_list: bool = False,

3004 as_match: bool = False,

3005 *,

3006 asGroupList: bool = False,

3007 asMatch: bool = False,

3008 ):

3009 """The parameters ``pattern`` and ``flags`` are passed

3010 to the ``re.compile()`` function as-is. See the Python

3011 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3012 explanation of the acceptable patterns and flags.

3013 """

3014 super().__init__()

3015 asGroupList = asGroupList or as_group_list

3016 asMatch = asMatch or as_match

3017

3018 if isinstance(pattern, str_type):

3019 if not pattern:

3020 raise ValueError("null string passed to Regex; use Empty() instead")

3021

3022 self._re = None

3023 self.reString = self.pattern = pattern

3024 self.flags = flags

3025

3026 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3027 self._re = pattern

3028 self.pattern = self.reString = pattern.pattern

3029 self.flags = flags

3030

3031 else:

3032 raise TypeError(

3033 "Regex may only be constructed with a string or a compiled RE object"

3034 )

3035

3036 self.errmsg = f"Expected {self.name}"

3037 self.mayIndexError = False

3038 self.asGroupList = asGroupList

3039 self.asMatch = asMatch

3040 if self.asGroupList:

3041 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment]

3042 if self.asMatch:

3043 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment]

3044

3045 @cached_property

3046 def re(self):

3047 if self._re:

3048 return self._re

3049

3050 try:

3051 return re.compile(self.pattern, self.flags)

3052 except re.error:

3053 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3054

3055 @cached_property

3056 def re_match(self):

3057 return self.re.match

3058

3059 @cached_property

3060 def mayReturnEmpty(self):

3061 return self.re_match("") is not None

3062

3063 def _generateDefaultName(self) -> str:

3064 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))

3065

3066 def parseImpl(self, instring, loc, doActions=True):

3067 result = self.re_match(instring, loc)

3068 if not result:

3069 raise ParseException(instring, loc, self.errmsg, self)

3070

3071 loc = result.end()

3072 ret = ParseResults(result.group())

3073 d = result.groupdict()

3074

3075 for k, v in d.items():

3076 ret[k] = v

3077

3078 return loc, ret

3079

3080 def parseImplAsGroupList(self, instring, loc, doActions=True):

3081 result = self.re_match(instring, loc)

3082 if not result:

3083 raise ParseException(instring, loc, self.errmsg, self)

3084

3085 loc = result.end()

3086 ret = result.groups()

3087 return loc, ret

3088

3089 def parseImplAsMatch(self, instring, loc, doActions=True):

3090 result = self.re_match(instring, loc)

3091 if not result:

3092 raise ParseException(instring, loc, self.errmsg, self)

3093

3094 loc = result.end()

3095 ret = result

3096 return loc, ret

3097

3098 def sub(self, repl: str) -> ParserElement:

3099 r"""

3100 Return :class:`Regex` with an attached parse action to transform the parsed

3101 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3102

3103 Example::

3104

3105 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3106 print(make_html.transform_string("h1:main title:"))

3107 # prints "<h1>main title</h1>"

3108 """

3109 if self.asGroupList:

3110 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3111

3112 if self.asMatch and callable(repl):

3113 raise TypeError(

3114 "cannot use sub() with a callable with Regex(as_match=True)"

3115 )

3116

3117 if self.asMatch:

3118

3119 def pa(tokens):

3120 return tokens[0].expand(repl)

3121

3122 else:

3123

3124 def pa(tokens):

3125 return self.re.sub(repl, tokens[0])

3126

3127 return self.add_parse_action(pa)

3128

3129

3130class QuotedString(Token):

3131 r"""

3132 Token for matching strings that are delimited by quoting characters.

3133

3134 Defined with the following parameters:

3135

3136 - ``quote_char`` - string of one or more characters defining the

3137 quote delimiting string

3138 - ``esc_char`` - character to re_escape quotes, typically backslash

3139 (default= ``None``)

3140 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3141 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3142 (default= ``None``)

3143 - ``multiline`` - boolean indicating whether quotes can span

3144 multiple lines (default= ``False``)

3145 - ``unquote_results`` - boolean indicating whether the matched text

3146 should be unquoted (default= ``True``)

3147 - ``end_quote_char`` - string of one or more characters defining the

3148 end of the quote delimited string (default= ``None`` => same as

3149 quote_char)

3150 - ``convert_whitespace_escapes`` - convert escaped whitespace

3151 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3152 (default= ``True``)

3153

3154 Example::

3155

3156 qs = QuotedString('"')

3157 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3158 complex_qs = QuotedString('{{', end_quote_char='}}')

3159 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3160 sql_qs = QuotedString('"', esc_quote='""')

3161 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3162

3163 prints::

3164

3165 [['This is the quote']]

3166 [['This is the "quote"']]

3167 [['This is the quote with "embedded" quotes']]

3168 """

3169

3170 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3171

3172 def __init__(

3173 self,

3174 quote_char: str = "",

3175 esc_char: typing.Optional[str] = None,

3176 esc_quote: typing.Optional[str] = None,

3177 multiline: bool = False,

3178 unquote_results: bool = True,

3179 end_quote_char: typing.Optional[str] = None,

3180 convert_whitespace_escapes: bool = True,

3181 *,

3182 quoteChar: str = "",

3183 escChar: typing.Optional[str] = None,

3184 escQuote: typing.Optional[str] = None,

3185 unquoteResults: bool = True,

3186 endQuoteChar: typing.Optional[str] = None,

3187 convertWhitespaceEscapes: bool = True,

3188 ):

3189 super().__init__()

3190 esc_char = escChar or esc_char

3191 esc_quote = escQuote or esc_quote

3192 unquote_results = unquoteResults and unquote_results

3193 end_quote_char = endQuoteChar or end_quote_char

3194 convert_whitespace_escapes = (

3195 convertWhitespaceEscapes and convert_whitespace_escapes

3196 )

3197 quote_char = quoteChar or quote_char

3198

3199 # remove white space from quote chars

3200 quote_char = quote_char.strip()

3201 if not quote_char:

3202 raise ValueError("quote_char cannot be the empty string")

3203

3204 if end_quote_char is None:

3205 end_quote_char = quote_char

3206 else:

3207 end_quote_char = end_quote_char.strip()

3208 if not end_quote_char:

3209 raise ValueError("end_quote_char cannot be the empty string")

3210

3211 self.quote_char: str = quote_char

3212 self.quote_char_len: int = len(quote_char)

3213 self.first_quote_char: str = quote_char[0]

3214 self.end_quote_char: str = end_quote_char

3215 self.end_quote_char_len: int = len(end_quote_char)

3216 self.esc_char: str = esc_char or ""

3217 self.has_esc_char: bool = esc_char is not None

3218 self.esc_quote: str = esc_quote or ""

3219 self.unquote_results: bool = unquote_results

3220 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3221 self.multiline = multiline

3222 self.re_flags = re.RegexFlag(0)

3223

3224 # fmt: off

3225 # build up re pattern for the content between the quote delimiters

3226 inner_pattern = []

3227

3228 if esc_quote:

3229 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3230

3231 if esc_char:

3232 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3233

3234 if len(self.end_quote_char) > 1:

3235 inner_pattern.append(

3236 "(?:"

3237 + "|".join(

3238 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3239 for i in range(len(self.end_quote_char) - 1, 0, -1)

3240 )

3241 + ")"

3242 )

3243

3244 if self.multiline:

3245 self.re_flags |= re.MULTILINE | re.DOTALL

3246 inner_pattern.append(

3247 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3248 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"

3249 )

3250 else:

3251 inner_pattern.append(

3252 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3253 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"

3254 )

3255

3256 self.pattern = "".join(

3257 [

3258 re.escape(self.quote_char),

3259 "(?:",

3260 '|'.join(inner_pattern),

3261 ")*",

3262 re.escape(self.end_quote_char),

3263 ]

3264 )

3265

3266 if self.unquote_results:

3267 if self.convert_whitespace_escapes:

3268 self.unquote_scan_re = re.compile(

3269 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3270 rf"|({re.escape(self.esc_char)}.)"

3271 rf"|(\n|.)",

3272 flags=self.re_flags,

3273 )

3274 else:

3275 self.unquote_scan_re = re.compile(

3276 rf"({re.escape(self.esc_char)}.)"

3277 rf"|(\n|.)",

3278 flags=self.re_flags

3279 )

3280 # fmt: on

3281

3282 try:

3283 self.re = re.compile(self.pattern, self.re_flags)

3284 self.reString = self.pattern

3285 self.re_match = self.re.match

3286 except re.error:

3287 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3288

3289 self.errmsg = f"Expected {self.name}"

3290 self.mayIndexError = False

3291 self.mayReturnEmpty = True

3292

3293 def _generateDefaultName(self) -> str:

3294 if self.quote_char == self.end_quote_char and isinstance(

3295 self.quote_char, str_type

3296 ):

3297 return f"string enclosed in {self.quote_char!r}"

3298

3299 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3300

3301 def parseImpl(self, instring, loc, doActions=True):

3302 # check first character of opening quote to see if that is a match

3303 # before doing the more complicated regex match

3304 result = (

3305 instring[loc] == self.first_quote_char

3306 and self.re_match(instring, loc)

3307 or None

3308 )

3309 if not result:

3310 raise ParseException(instring, loc, self.errmsg, self)

3311

3312 # get ending loc and matched string from regex matching result

3313 loc = result.end()

3314 ret = result.group()

3315

3316 if self.unquote_results:

3317 # strip off quotes

3318 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3319

3320 if isinstance(ret, str_type):

3321 # fmt: off

3322 if self.convert_whitespace_escapes:

3323 # as we iterate over matches in the input string,

3324 # collect from whichever match group of the unquote_scan_re

3325 # regex matches (only 1 group will match at any given time)

3326 ret = "".join(

3327 # match group 1 matches \t, \n, etc.

3328 self.ws_map[match.group(1)] if match.group(1)

3329 # match group 2 matches escaped characters

3330 else match.group(2)[-1] if match.group(2)

3331 # match group 3 matches any character

3332 else match.group(3)

3333 for match in self.unquote_scan_re.finditer(ret)

3334 )

3335 else:

3336 ret = "".join(

3337 # match group 1 matches escaped characters

3338 match.group(1)[-1] if match.group(1)

3339 # match group 2 matches any character

3340 else match.group(2)

3341 for match in self.unquote_scan_re.finditer(ret)

3342 )

3343 # fmt: on

3344

3345 # replace escaped quotes

3346 if self.esc_quote:

3347 ret = ret.replace(self.esc_quote, self.end_quote_char)

3348

3349 return loc, ret

3350

3351

3352class CharsNotIn(Token):

3353 """Token for matching words composed of characters *not* in a given

3354 set (will include whitespace in matched characters if not listed in

3355 the provided exclusion set - see example). Defined with string

3356 containing all disallowed characters, and an optional minimum,

3357 maximum, and/or exact length. The default value for ``min`` is

3358 1 (a minimum value < 1 is not valid); the default values for

3359 ``max`` and ``exact`` are 0, meaning no maximum or exact

3360 length restriction.

3361

3362 Example::

3363

3364 # define a comma-separated-value as anything that is not a ','

3365 csv_value = CharsNotIn(',')

3366 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3367

3368 prints::

3369

3370 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3371 """

3372

3373 def __init__(

3374 self,

3375 not_chars: str = "",

3376 min: int = 1,

3377 max: int = 0,

3378 exact: int = 0,

3379 *,

3380 notChars: str = "",

3381 ):

3382 super().__init__()

3383 self.skipWhitespace = False

3384 self.notChars = not_chars or notChars

3385 self.notCharsSet = set(self.notChars)

3386

3387 if min < 1:

3388 raise ValueError(

3389 "cannot specify a minimum length < 1; use"

3390 " Opt(CharsNotIn()) if zero-length char group is permitted"

3391 )

3392

3393 self.minLen = min

3394

3395 if max > 0:

3396 self.maxLen = max

3397 else:

3398 self.maxLen = _MAX_INT

3399

3400 if exact > 0:

3401 self.maxLen = exact

3402 self.minLen = exact

3403

3404 self.errmsg = f"Expected {self.name}"

3405 self.mayReturnEmpty = self.minLen == 0

3406 self.mayIndexError = False

3407

3408 def _generateDefaultName(self) -> str:

3409 not_chars_str = _collapse_string_to_ranges(self.notChars)

3410 if len(not_chars_str) > 16:

3411 return f"!W:({self.notChars[: 16 - 3]}...)"

3412 else:

3413 return f"!W:({self.notChars})"

3414

3415 def parseImpl(self, instring, loc, doActions=True):

3416 notchars = self.notCharsSet

3417 if instring[loc] in notchars:

3418 raise ParseException(instring, loc, self.errmsg, self)

3419

3420 start = loc

3421 loc += 1

3422 maxlen = min(start + self.maxLen, len(instring))

3423 while loc < maxlen and instring[loc] not in notchars:

3424 loc += 1

3425

3426 if loc - start < self.minLen:

3427 raise ParseException(instring, loc, self.errmsg, self)

3428

3429 return loc, instring[start:loc]

3430

3431

3432class White(Token):

3433 """Special matching class for matching whitespace. Normally,

3434 whitespace is ignored by pyparsing grammars. This class is included

3435 when some whitespace structures are significant. Define with

3436 a string containing the whitespace characters to be matched; default

3437 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3438 ``max``, and ``exact`` arguments, as defined for the

3439 :class:`Word` class.

3440 """

3441

3442 whiteStrs = {

3443 " ": "<SP>",

3444 "\t": "<TAB>",

3445 "\n": "<LF>",

3446 "\r": "<CR>",

3447 "\f": "<FF>",

3448 "\u00A0": "<NBSP>",

3449 "\u1680": "<OGHAM_SPACE_MARK>",

3450 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3451 "\u2000": "<EN_QUAD>",

3452 "\u2001": "<EM_QUAD>",

3453 "\u2002": "<EN_SPACE>",

3454 "\u2003": "<EM_SPACE>",

3455 "\u2004": "<THREE-PER-EM_SPACE>",

3456 "\u2005": "<FOUR-PER-EM_SPACE>",

3457 "\u2006": "<SIX-PER-EM_SPACE>",

3458 "\u2007": "<FIGURE_SPACE>",

3459 "\u2008": "<PUNCTUATION_SPACE>",

3460 "\u2009": "<THIN_SPACE>",

3461 "\u200A": "<HAIR_SPACE>",

3462 "\u200B": "<ZERO_WIDTH_SPACE>",

3463 "\u202F": "<NNBSP>",

3464 "\u205F": "<MMSP>",

3465 "\u3000": "<IDEOGRAPHIC_SPACE>",

3466 }

3467

3468 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):

3469 super().__init__()

3470 self.matchWhite = ws

3471 self.set_whitespace_chars(

3472 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3473 copy_defaults=True,

3474 )

3475 # self.leave_whitespace()

3476 self.mayReturnEmpty = True

3477 self.errmsg = f"Expected {self.name}"

3478

3479 self.minLen = min

3480

3481 if max > 0:

3482 self.maxLen = max

3483 else:

3484 self.maxLen = _MAX_INT

3485

3486 if exact > 0:

3487 self.maxLen = exact

3488 self.minLen = exact

3489

3490 def _generateDefaultName(self) -> str:

3491 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3492

3493 def parseImpl(self, instring, loc, doActions=True):

3494 if instring[loc] not in self.matchWhite:

3495 raise ParseException(instring, loc, self.errmsg, self)

3496 start = loc

3497 loc += 1

3498 maxloc = start + self.maxLen

3499 maxloc = min(maxloc, len(instring))

3500 while loc < maxloc and instring[loc] in self.matchWhite:

3501 loc += 1

3502

3503 if loc - start < self.minLen:

3504 raise ParseException(instring, loc, self.errmsg, self)

3505

3506 return loc, instring[start:loc]

3507

3508

3509class PositionToken(Token):

3510 def __init__(self):

3511 super().__init__()

3512 self.mayReturnEmpty = True

3513 self.mayIndexError = False

3514

3515

3516class GoToColumn(PositionToken):

3517 """Token to advance to a specific column of input text; useful for

3518 tabular report scraping.

3519 """

3520

3521 def __init__(self, colno: int):

3522 super().__init__()

3523 self.col = colno

3524

3525 def preParse(self, instring: str, loc: int) -> int:

3526 if col(loc, instring) == self.col:

3527 return loc

3528

3529 instrlen = len(instring)

3530 if self.ignoreExprs:

3531 loc = self._skipIgnorables(instring, loc)

3532 while (

3533 loc < instrlen

3534 and instring[loc].isspace()

3535 and col(loc, instring) != self.col

3536 ):

3537 loc += 1

3538

3539 return loc

3540

3541 def parseImpl(self, instring, loc, doActions=True):

3542 thiscol = col(loc, instring)

3543 if thiscol > self.col:

3544 raise ParseException(instring, loc, "Text not in expected column", self)

3545 newloc = loc + self.col - thiscol

3546 ret = instring[loc:newloc]

3547 return newloc, ret

3548

3549

3550class LineStart(PositionToken):

3551 r"""Matches if current position is at the beginning of a line within

3552 the parse string

3553

3554 Example::

3555

3556 test = '''\

3557 AAA this line

3558 AAA and this line

3559 AAA but not this one

3560 B AAA and definitely not this one

3561 '''

3562

3563 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3564 print(t)

3565

3566 prints::

3567

3568 ['AAA', ' this line']

3569 ['AAA', ' and this line']

3570

3571 """

3572

3573 def __init__(self):

3574 super().__init__()

3575 self.leave_whitespace()

3576 self.orig_whiteChars = set() | self.whiteChars

3577 self.whiteChars.discard("\n")

3578 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3579 self.errmsg = "Expected start of line"

3580

3581 def preParse(self, instring: str, loc: int) -> int:

3582 if loc == 0:

3583 return loc

3584

3585 ret = self.skipper.preParse(instring, loc)

3586

3587 if "\n" in self.orig_whiteChars:

3588 while instring[ret : ret + 1] == "\n":

3589 ret = self.skipper.preParse(instring, ret + 1)

3590

3591 return ret

3592

3593 def parseImpl(self, instring, loc, doActions=True):

3594 if col(loc, instring) == 1:

3595 return loc, []

3596 raise ParseException(instring, loc, self.errmsg, self)

3597

3598

3599class LineEnd(PositionToken):

3600 """Matches if current position is at the end of a line within the

3601 parse string

3602 """

3603

3604 def __init__(self):

3605 super().__init__()

3606 self.whiteChars.discard("\n")

3607 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3608 self.errmsg = "Expected end of line"

3609

3610 def parseImpl(self, instring, loc, doActions=True):

3611 if loc < len(instring):

3612 if instring[loc] == "\n":

3613 return loc + 1, "\n"

3614 else:

3615 raise ParseException(instring, loc, self.errmsg, self)

3616 elif loc == len(instring):

3617 return loc + 1, []

3618 else:

3619 raise ParseException(instring, loc, self.errmsg, self)

3620

3621

3622class StringStart(PositionToken):

3623 """Matches if current position is at the beginning of the parse

3624 string

3625 """

3626

3627 def __init__(self):

3628 super().__init__()

3629 self.errmsg = "Expected start of text"

3630

3631 def parseImpl(self, instring, loc, doActions=True):

3632 # see if entire string up to here is just whitespace and ignoreables

3633 if loc != 0 and loc != self.preParse(instring, 0):

3634 raise ParseException(instring, loc, self.errmsg, self)

3635

3636 return loc, []

3637

3638

3639class StringEnd(PositionToken):

3640 """

3641 Matches if current position is at the end of the parse string

3642 """

3643

3644 def __init__(self):

3645 super().__init__()

3646 self.errmsg = "Expected end of text"

3647

3648 def parseImpl(self, instring, loc, doActions=True):

3649 if loc < len(instring):

3650 raise ParseException(instring, loc, self.errmsg, self)

3651 if loc == len(instring):

3652 return loc + 1, []

3653 if loc > len(instring):

3654 return loc, []

3655

3656 raise ParseException(instring, loc, self.errmsg, self)

3657

3658

3659class WordStart(PositionToken):

3660 """Matches if the current position is at the beginning of a

3661 :class:`Word`, and is not preceded by any character in a given

3662 set of ``word_chars`` (default= ``printables``). To emulate the

3663 ``\b`` behavior of regular expressions, use

3664 ``WordStart(alphanums)``. ``WordStart`` will also match at

3665 the beginning of the string being parsed, or at the beginning of

3666 a line.

3667 """

3668

3669 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3670 wordChars = word_chars if wordChars == printables else wordChars

3671 super().__init__()

3672 self.wordChars = set(wordChars)

3673 self.errmsg = "Not at the start of a word"

3674

3675 def parseImpl(self, instring, loc, doActions=True):

3676 if loc != 0:

3677 if (

3678 instring[loc - 1] in self.wordChars

3679 or instring[loc] not in self.wordChars

3680 ):

3681 raise ParseException(instring, loc, self.errmsg, self)

3682 return loc, []

3683

3684

3685class WordEnd(PositionToken):

3686 """Matches if the current position is at the end of a :class:`Word`,

3687 and is not followed by any character in a given set of ``word_chars``

3688 (default= ``printables``). To emulate the ``\b`` behavior of

3689 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3690 will also match at the end of the string being parsed, or at the end

3691 of a line.

3692 """

3693

3694 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3695 wordChars = word_chars if wordChars == printables else wordChars

3696 super().__init__()

3697 self.wordChars = set(wordChars)

3698 self.skipWhitespace = False

3699 self.errmsg = "Not at the end of a word"

3700

3701 def parseImpl(self, instring, loc, doActions=True):

3702 instrlen = len(instring)

3703 if instrlen > 0 and loc < instrlen:

3704 if (

3705 instring[loc] in self.wordChars

3706 or instring[loc - 1] not in self.wordChars

3707 ):

3708 raise ParseException(instring, loc, self.errmsg, self)

3709 return loc, []

3710

3711

3712class ParseExpression(ParserElement):

3713 """Abstract subclass of ParserElement, for combining and

3714 post-processing parsed tokens.

3715 """

3716

3717 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3718 super().__init__(savelist)

3719 self.exprs: List[ParserElement]

3720 if isinstance(exprs, _generatorType):

3721 exprs = list(exprs)

3722

3723 if isinstance(exprs, str_type):

3724 self.exprs = [self._literalStringClass(exprs)]

3725 elif isinstance(exprs, ParserElement):

3726 self.exprs = [exprs]

3727 elif isinstance(exprs, Iterable):

3728 exprs = list(exprs)

3729 # if sequence of strings provided, wrap with Literal

3730 if any(isinstance(expr, str_type) for expr in exprs):

3731 exprs = (

3732 self._literalStringClass(e) if isinstance(e, str_type) else e

3733 for e in exprs

3734 )

3735 self.exprs = list(exprs)

3736 else:

3737 try:

3738 self.exprs = list(exprs)

3739 except TypeError:

3740 self.exprs = [exprs]

3741 self.callPreparse = False

3742

3743 def recurse(self) -> List[ParserElement]:

3744 return self.exprs[:]

3745

3746 def append(self, other) -> ParserElement:

3747 self.exprs.append(other)

3748 self._defaultName = None

3749 return self

3750

3751 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3752 """

3753 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3754 all contained expressions.

3755 """

3756 super().leave_whitespace(recursive)

3757

3758 if recursive:

3759 self.exprs = [e.copy() for e in self.exprs]

3760 for e in self.exprs:

3761 e.leave_whitespace(recursive)

3762 return self

3763

3764 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3765 """

3766 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3767 all contained expressions.

3768 """

3769 super().ignore_whitespace(recursive)

3770 if recursive:

3771 self.exprs = [e.copy() for e in self.exprs]

3772 for e in self.exprs:

3773 e.ignore_whitespace(recursive)

3774 return self

3775

3776 def ignore(self, other) -> ParserElement:

3777 if isinstance(other, Suppress):

3778 if other not in self.ignoreExprs:

3779 super().ignore(other)

3780 for e in self.exprs:

3781 e.ignore(self.ignoreExprs[-1])

3782 else:

3783 super().ignore(other)

3784 for e in self.exprs:

3785 e.ignore(self.ignoreExprs[-1])

3786 return self

3787

3788 def _generateDefaultName(self) -> str:

3789 return f"{type(self).__name__}:({self.exprs})"

3790

3791 def streamline(self) -> ParserElement:

3792 if self.streamlined:

3793 return self

3794

3795 super().streamline()

3796

3797 for e in self.exprs:

3798 e.streamline()

3799

3800 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3801 # but only if there are no parse actions or resultsNames on the nested And's

3802 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3803 if len(self.exprs) == 2:

3804 other = self.exprs[0]

3805 if (

3806 isinstance(other, self.__class__)

3807 and not other.parseAction

3808 and other.resultsName is None

3809 and not other.debug

3810 ):

3811 self.exprs = other.exprs[:] + [self.exprs[1]]

3812 self._defaultName = None

3813 self.mayReturnEmpty |= other.mayReturnEmpty

3814 self.mayIndexError |= other.mayIndexError

3815

3816 other = self.exprs[-1]

3817 if (

3818 isinstance(other, self.__class__)

3819 and not other.parseAction

3820 and other.resultsName is None

3821 and not other.debug

3822 ):

3823 self.exprs = self.exprs[:-1] + other.exprs[:]

3824 self._defaultName = None

3825 self.mayReturnEmpty |= other.mayReturnEmpty

3826 self.mayIndexError |= other.mayIndexError

3827

3828 self.errmsg = f"Expected {self}"

3829

3830 return self

3831

3832 def validate(self, validateTrace=None) -> None:

3833 warnings.warn(

3834 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

3835 DeprecationWarning,

3836 stacklevel=2,

3837 )

3838 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

3839 for e in self.exprs:

3840 e.validate(tmp)

3841 self._checkRecursion([])

3842

3843 def copy(self) -> ParserElement:

3844 ret = super().copy()

3845 ret = typing.cast(ParseExpression, ret)

3846 ret.exprs = [e.copy() for e in self.exprs]

3847 return ret

3848

3849 def _setResultsName(self, name, listAllMatches=False):

3850 if not (

3851 __diag__.warn_ungrouped_named_tokens_in_collection

3852 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3853 not in self.suppress_warnings_

3854 ):

3855 return super()._setResultsName(name, listAllMatches)

3856

3857 for e in self.exprs:

3858 if (

3859 isinstance(e, ParserElement)

3860 and e.resultsName

3861 and (

3862 Diagnostics.warn_ungrouped_named_tokens_in_collection

3863 not in e.suppress_warnings_

3864 )

3865 ):

3866 warning = (

3867 "warn_ungrouped_named_tokens_in_collection:"

3868 f" setting results name {name!r} on {type(self).__name__} expression"

3869 f" collides with {e.resultsName!r} on contained expression"

3870 )

3871 warnings.warn(warning, stacklevel=3)

3872 break

3873

3874 return super()._setResultsName(name, listAllMatches)

3875

3876 # Compatibility synonyms

3877 # fmt: off

3878 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

3879 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

3880 # fmt: on

3881

3882

3883class And(ParseExpression):

3884 """

3885 Requires all given :class:`ParseExpression` s to be found in the given order.

3886 Expressions may be separated by whitespace.

3887 May be constructed using the ``'+'`` operator.

3888 May also be constructed using the ``'-'`` operator, which will

3889 suppress backtracking.

3890

3891 Example::

3892

3893 integer = Word(nums)

3894 name_expr = Word(alphas)[1, ...]

3895

3896 expr = And([integer("id"), name_expr("name"), integer("age")])

3897 # more easily written as:

3898 expr = integer("id") + name_expr("name") + integer("age")

3899 """

3900

3901 class _ErrorStop(Empty):

3902 def __init__(self, *args, **kwargs):

3903 super().__init__(*args, **kwargs)

3904 self.leave_whitespace()

3905

3906 def _generateDefaultName(self) -> str:

3907 return "-"

3908

3909 def __init__(

3910 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True

3911 ):

3912 exprs: List[ParserElement] = list(exprs_arg)

3913 if exprs and Ellipsis in exprs:

3914 tmp = []

3915 for i, expr in enumerate(exprs):

3916 if expr is not Ellipsis:

3917 tmp.append(expr)

3918 continue

3919

3920 if i < len(exprs) - 1:

3921 skipto_arg: ParserElement = typing.cast(

3922 ParseExpression, (Empty() + exprs[i + 1])

3923 ).exprs[-1]

3924 tmp.append(SkipTo(skipto_arg)("_skipped*"))

3925 continue

3926

3927 raise Exception("cannot construct And with sequence ending in ...")

3928 exprs[:] = tmp

3929 super().__init__(exprs, savelist)

3930 if self.exprs:

3931 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3932 if not isinstance(self.exprs[0], White):

3933 self.set_whitespace_chars(

3934 self.exprs[0].whiteChars,

3935 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

3936 )

3937 self.skipWhitespace = self.exprs[0].skipWhitespace

3938 else:

3939 self.skipWhitespace = False

3940 else:

3941 self.mayReturnEmpty = True

3942 self.callPreparse = True

3943

3944 def streamline(self) -> ParserElement:

3945 # collapse any _PendingSkip's

3946 if self.exprs and any(

3947 isinstance(e, ParseExpression)

3948 and e.exprs

3949 and isinstance(e.exprs[-1], _PendingSkip)

3950 for e in self.exprs[:-1]

3951 ):

3952 deleted_expr_marker = NoMatch()

3953 for i, e in enumerate(self.exprs[:-1]):

3954 if e is deleted_expr_marker:

3955 continue

3956 if (

3957 isinstance(e, ParseExpression)

3958 and e.exprs

3959 and isinstance(e.exprs[-1], _PendingSkip)

3960 ):

3961 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

3962 self.exprs[i + 1] = deleted_expr_marker

3963 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

3964

3965 super().streamline()

3966

3967 # link any IndentedBlocks to the prior expression

3968 prev: ParserElement

3969 cur: ParserElement

3970 for prev, cur in zip(self.exprs, self.exprs[1:]):

3971 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

3972 # (but watch out for recursive grammar)

3973 seen = set()

3974 while True:

3975 if id(cur) in seen:

3976 break

3977 seen.add(id(cur))

3978 if isinstance(cur, IndentedBlock):

3979 prev.add_parse_action(

3980 lambda s, l, t, cur_=cur: setattr(

3981 cur_, "parent_anchor", col(l, s)

3982 )

3983 )

3984 break

3985 subs = cur.recurse()

3986 next_first = next(iter(subs), None)

3987 if next_first is None:

3988 break

3989 cur = typing.cast(ParserElement, next_first)

3990

3991 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3992 return self

3993

3994 def parseImpl(self, instring, loc, doActions=True):

3995 # pass False as callPreParse arg to _parse for first element, since we already

3996 # pre-parsed the string as part of our And pre-parsing

3997 loc, resultlist = self.exprs[0]._parse(

3998 instring, loc, doActions, callPreParse=False

3999 )

4000 errorStop = False

4001 for e in self.exprs[1:]:

4002 # if isinstance(e, And._ErrorStop):

4003 if type(e) is And._ErrorStop:

4004 errorStop = True

4005 continue

4006 if errorStop:

4007 try:

4008 loc, exprtokens = e._parse(instring, loc, doActions)

4009 except ParseSyntaxException:

4010 raise

4011 except ParseBaseException as pe:

4012 pe.__traceback__ = None

4013 raise ParseSyntaxException._from_exception(pe)

4014 except IndexError:

4015 raise ParseSyntaxException(

4016 instring, len(instring), self.errmsg, self

4017 )

4018 else:

4019 loc, exprtokens = e._parse(instring, loc, doActions)

4020 resultlist += exprtokens

4021 return loc, resultlist

4022

4023 def __iadd__(self, other):

4024 if isinstance(other, str_type):

4025 other = self._literalStringClass(other)

4026 if not isinstance(other, ParserElement):

4027 return NotImplemented

4028 return self.append(other) # And([self, other])

4029

4030 def _checkRecursion(self, parseElementList):

4031 subRecCheckList = parseElementList[:] + [self]

4032 for e in self.exprs:

4033 e._checkRecursion(subRecCheckList)

4034 if not e.mayReturnEmpty:

4035 break

4036

4037 def _generateDefaultName(self) -> str:

4038 inner = " ".join(str(e) for e in self.exprs)

4039 # strip off redundant inner {}'s

4040 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4041 inner = inner[1:-1]

4042 return f"{{{inner}}}"

4043

4044

4045class Or(ParseExpression):

4046 """Requires that at least one :class:`ParseExpression` is found. If

4047 two expressions match, the expression that matches the longest

4048 string will be used. May be constructed using the ``'^'``

4049 operator.

4050

4051 Example::

4052

4053 # construct Or using '^' operator

4054

4055 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4056 print(number.search_string("123 3.1416 789"))

4057

4058 prints::

4059

4060 [['123'], ['3.1416'], ['789']]

4061 """

4062

4063 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4064 super().__init__(exprs, savelist)

4065 if self.exprs:

4066 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4067 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4068 else:

4069 self.mayReturnEmpty = True

4070

4071 def streamline(self) -> ParserElement:

4072 super().streamline()

4073 if self.exprs:

4074 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4075 self.saveAsList = any(e.saveAsList for e in self.exprs)

4076 self.skipWhitespace = all(

4077 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4078 )

4079 else:

4080 self.saveAsList = False

4081 return self

4082

4083 def parseImpl(self, instring, loc, doActions=True):

4084 maxExcLoc = -1

4085 maxException = None

4086 matches = []

4087 fatals = []

4088 if all(e.callPreparse for e in self.exprs):

4089 loc = self.preParse(instring, loc)

4090 for e in self.exprs:

4091 try:

4092 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4093 except ParseFatalException as pfe:

4094 pfe.__traceback__ = None

4095 pfe.parser_element = e

4096 fatals.append(pfe)

4097 maxException = None

4098 maxExcLoc = -1

4099 except ParseException as err:

4100 if not fatals:

4101 err.__traceback__ = None

4102 if err.loc > maxExcLoc:

4103 maxException = err

4104 maxExcLoc = err.loc

4105 except IndexError:

4106 if len(instring) > maxExcLoc:

4107 maxException = ParseException(

4108 instring, len(instring), e.errmsg, self

4109 )

4110 maxExcLoc = len(instring)

4111 else:

4112 # save match among all matches, to retry longest to shortest

4113 matches.append((loc2, e))

4114

4115 if matches:

4116 # re-evaluate all matches in descending order of length of match, in case attached actions

4117 # might change whether or how much they match of the input.

4118 matches.sort(key=itemgetter(0), reverse=True)

4119

4120 if not doActions:

4121 # no further conditions or parse actions to change the selection of

4122 # alternative, so the first match will be the best match

4123 best_expr = matches[0][1]

4124 return best_expr._parse(instring, loc, doActions)

4125

4126 longest = -1, None

4127 for loc1, expr1 in matches:

4128 if loc1 <= longest[0]:

4129 # already have a longer match than this one will deliver, we are done

4130 return longest

4131

4132 try:

4133 loc2, toks = expr1._parse(instring, loc, doActions)

4134 except ParseException as err:

4135 err.__traceback__ = None

4136 if err.loc > maxExcLoc:

4137 maxException = err

4138 maxExcLoc = err.loc

4139 else:

4140 if loc2 >= loc1:

4141 return loc2, toks

4142 # didn't match as much as before

4143 elif loc2 > longest[0]:

4144 longest = loc2, toks

4145

4146 if longest != (-1, None):

4147 return longest

4148

4149 if fatals:

4150 if len(fatals) > 1:

4151 fatals.sort(key=lambda e: -e.loc)

4152 if fatals[0].loc == fatals[1].loc:

4153 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4154 max_fatal = fatals[0]

4155 raise max_fatal

4156

4157 if maxException is not None:

4158 # infer from this check that all alternatives failed at the current position

4159 # so emit this collective error message instead of any single error message

4160 if maxExcLoc == loc:

4161 maxException.msg = self.errmsg

4162 raise maxException

4163

4164 raise ParseException(instring, loc, "no defined alternatives to match", self)

4165

4166 def __ixor__(self, other):

4167 if isinstance(other, str_type):

4168 other = self._literalStringClass(other)

4169 if not isinstance(other, ParserElement):

4170 return NotImplemented

4171 return self.append(other) # Or([self, other])

4172

4173 def _generateDefaultName(self) -> str:

4174 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4175

4176 def _setResultsName(self, name, listAllMatches=False):

4177 if (

4178 __diag__.warn_multiple_tokens_in_named_alternation

4179 and Diagnostics.warn_multiple_tokens_in_named_alternation

4180 not in self.suppress_warnings_

4181 ):

4182 if any(

4183 isinstance(e, And)

4184 and Diagnostics.warn_multiple_tokens_in_named_alternation

4185 not in e.suppress_warnings_

4186 for e in self.exprs

4187 ):

4188 warning = (

4189 "warn_multiple_tokens_in_named_alternation:"

4190 f" setting results name {name!r} on {type(self).__name__} expression"

4191 " will return a list of all parsed tokens in an And alternative,"

4192 " in prior versions only the first token was returned; enclose"

4193 " contained argument in Group"

4194 )

4195 warnings.warn(warning, stacklevel=3)

4196

4197 return super()._setResultsName(name, listAllMatches)

4198

4199

4200class MatchFirst(ParseExpression):

4201 """Requires that at least one :class:`ParseExpression` is found. If

4202 more than one expression matches, the first one listed is the one that will

4203 match. May be constructed using the ``'|'`` operator.

4204

4205 Example::

4206

4207 # construct MatchFirst using '|' operator

4208

4209 # watch the order of expressions to match

4210 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4211 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4212

4213 # put more selective expression first

4214 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4215 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4216 """

4217

4218 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4219 super().__init__(exprs, savelist)

4220 if self.exprs:

4221 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4222 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4223 else:

4224 self.mayReturnEmpty = True

4225

4226 def streamline(self) -> ParserElement:

4227 if self.streamlined:

4228 return self

4229

4230 super().streamline()

4231 if self.exprs:

4232 self.saveAsList = any(e.saveAsList for e in self.exprs)

4233 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4234 self.skipWhitespace = all(

4235 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4236 )

4237 else:

4238 self.saveAsList = False

4239 self.mayReturnEmpty = True

4240 return self

4241

4242 def parseImpl(self, instring, loc, doActions=True):

4243 maxExcLoc = -1

4244 maxException = None

4245

4246 for e in self.exprs:

4247 try:

4248 return e._parse(instring, loc, doActions)

4249 except ParseFatalException as pfe:

4250 pfe.__traceback__ = None

4251 pfe.parser_element = e

4252 raise

4253 except ParseException as err:

4254 if err.loc > maxExcLoc:

4255 maxException = err

4256 maxExcLoc = err.loc

4257 except IndexError:

4258 if len(instring) > maxExcLoc:

4259 maxException = ParseException(

4260 instring, len(instring), e.errmsg, self

4261 )

4262 maxExcLoc = len(instring)

4263

4264 if maxException is not None:

4265 # infer from this check that all alternatives failed at the current position

4266 # so emit this collective error message instead of any individual error message

4267 if maxExcLoc == loc:

4268 maxException.msg = self.errmsg

4269 raise maxException

4270

4271 raise ParseException(instring, loc, "no defined alternatives to match", self)

4272

4273 def __ior__(self, other):

4274 if isinstance(other, str_type):

4275 other = self._literalStringClass(other)

4276 if not isinstance(other, ParserElement):

4277 return NotImplemented

4278 return self.append(other) # MatchFirst([self, other])

4279

4280 def _generateDefaultName(self) -> str:

4281 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4282

4283 def _setResultsName(self, name, listAllMatches=False):

4284 if (

4285 __diag__.warn_multiple_tokens_in_named_alternation

4286 and Diagnostics.warn_multiple_tokens_in_named_alternation

4287 not in self.suppress_warnings_

4288 ):

4289 if any(

4290 isinstance(e, And)

4291 and Diagnostics.warn_multiple_tokens_in_named_alternation

4292 not in e.suppress_warnings_

4293 for e in self.exprs

4294 ):

4295 warning = (

4296 "warn_multiple_tokens_in_named_alternation:"

4297 f" setting results name {name!r} on {type(self).__name__} expression"

4298 " will return a list of all parsed tokens in an And alternative,"

4299 " in prior versions only the first token was returned; enclose"

4300 " contained argument in Group"

4301 )

4302 warnings.warn(warning, stacklevel=3)

4303

4304 return super()._setResultsName(name, listAllMatches)

4305

4306

4307class Each(ParseExpression):

4308 """Requires all given :class:`ParseExpression` s to be found, but in

4309 any order. Expressions may be separated by whitespace.

4310

4311 May be constructed using the ``'&'`` operator.

4312

4313 Example::

4314

4315 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4316 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4317 integer = Word(nums)

4318 shape_attr = "shape:" + shape_type("shape")

4319 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4320 color_attr = "color:" + color("color")

4321 size_attr = "size:" + integer("size")

4322

4323 # use Each (using operator '&') to accept attributes in any order

4324 # (shape and posn are required, color and size are optional)

4325 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4326

4327 shape_spec.run_tests('''

4328 shape: SQUARE color: BLACK posn: 100, 120

4329 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4330 color:GREEN size:20 shape:TRIANGLE posn:20,40

4331 '''

4332 )

4333

4334 prints::

4335

4336 shape: SQUARE color: BLACK posn: 100, 120

4337 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4338 - color: BLACK

4339 - posn: ['100', ',', '120']

4340 - x: 100

4341 - y: 120

4342 - shape: SQUARE

4343

4344

4345 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4346 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4347 - color: BLUE

4348 - posn: ['50', ',', '80']

4349 - x: 50

4350 - y: 80

4351 - shape: CIRCLE

4352 - size: 50

4353

4354

4355 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4356 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4357 - color: GREEN

4358 - posn: ['20', ',', '40']

4359 - x: 20

4360 - y: 40

4361 - shape: TRIANGLE

4362 - size: 20

4363 """

4364

4365 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):

4366 super().__init__(exprs, savelist)

4367 if self.exprs:

4368 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4369 else:

4370 self.mayReturnEmpty = True

4371 self.skipWhitespace = True

4372 self.initExprGroups = True

4373 self.saveAsList = True

4374

4375 def __iand__(self, other):

4376 if isinstance(other, str_type):

4377 other = self._literalStringClass(other)

4378 if not isinstance(other, ParserElement):

4379 return NotImplemented

4380 return self.append(other) # Each([self, other])

4381

4382 def streamline(self) -> ParserElement:

4383 super().streamline()

4384 if self.exprs:

4385 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4386 else:

4387 self.mayReturnEmpty = True

4388 return self

4389

4390 def parseImpl(self, instring, loc, doActions=True):

4391 if self.initExprGroups:

4392 self.opt1map = dict(

4393 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4394 )

4395 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4396 opt2 = [

4397 e

4398 for e in self.exprs

4399 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4400 ]

4401 self.optionals = opt1 + opt2

4402 self.multioptionals = [

4403 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4404 for e in self.exprs

4405 if isinstance(e, _MultipleMatch)

4406 ]

4407 self.multirequired = [

4408 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4409 for e in self.exprs

4410 if isinstance(e, OneOrMore)

4411 ]

4412 self.required = [

4413 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4414 ]

4415 self.required += self.multirequired

4416 self.initExprGroups = False

4417

4418 tmpLoc = loc

4419 tmpReqd = self.required[:]

4420 tmpOpt = self.optionals[:]

4421 multis = self.multioptionals[:]

4422 matchOrder = []

4423

4424 keepMatching = True

4425 failed = []

4426 fatals = []

4427 while keepMatching:

4428 tmpExprs = tmpReqd + tmpOpt + multis

4429 failed.clear()

4430 fatals.clear()

4431 for e in tmpExprs:

4432 try:

4433 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4434 except ParseFatalException as pfe:

4435 pfe.__traceback__ = None

4436 pfe.parser_element = e

4437 fatals.append(pfe)

4438 failed.append(e)

4439 except ParseException:

4440 failed.append(e)

4441 else:

4442 matchOrder.append(self.opt1map.get(id(e), e))

4443 if e in tmpReqd:

4444 tmpReqd.remove(e)

4445 elif e in tmpOpt:

4446 tmpOpt.remove(e)

4447 if len(failed) == len(tmpExprs):

4448 keepMatching = False

4449

4450 # look for any ParseFatalExceptions

4451 if fatals:

4452 if len(fatals) > 1:

4453 fatals.sort(key=lambda e: -e.loc)

4454 if fatals[0].loc == fatals[1].loc:

4455 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4456 max_fatal = fatals[0]

4457 raise max_fatal

4458

4459 if tmpReqd:

4460 missing = ", ".join([str(e) for e in tmpReqd])

4461 raise ParseException(

4462 instring,

4463 loc,

4464 f"Missing one or more required elements ({missing})",

4465 )

4466

4467 # add any unmatched Opts, in case they have default values defined

4468 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4469

4470 total_results = ParseResults([])

4471 for e in matchOrder:

4472 loc, results = e._parse(instring, loc, doActions)

4473 total_results += results

4474

4475 return loc, total_results

4476

4477 def _generateDefaultName(self) -> str:

4478 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4479

4480

4481class ParseElementEnhance(ParserElement):

4482 """Abstract subclass of :class:`ParserElement`, for combining and

4483 post-processing parsed tokens.

4484 """

4485

4486 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

4487 super().__init__(savelist)

4488 if isinstance(expr, str_type):

4489 expr_str = typing.cast(str, expr)

4490 if issubclass(self._literalStringClass, Token):

4491 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4492 elif issubclass(type(self), self._literalStringClass):

4493 expr = Literal(expr_str)

4494 else:

4495 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4496 expr = typing.cast(ParserElement, expr)

4497 self.expr = expr

4498 if expr is not None:

4499 self.mayIndexError = expr.mayIndexError

4500 self.mayReturnEmpty = expr.mayReturnEmpty

4501 self.set_whitespace_chars(

4502 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4503 )

4504 self.skipWhitespace = expr.skipWhitespace

4505 self.saveAsList = expr.saveAsList

4506 self.callPreparse = expr.callPreparse

4507 self.ignoreExprs.extend(expr.ignoreExprs)

4508

4509 def recurse(self) -> List[ParserElement]:

4510 return [self.expr] if self.expr is not None else []

4511

4512 def parseImpl(self, instring, loc, doActions=True):

4513 if self.expr is None:

4514 raise ParseException(instring, loc, "No expression defined", self)

4515

4516 try:

4517 return self.expr._parse(instring, loc, doActions, callPreParse=False)

4518 except ParseBaseException as pbe:

4519 if not isinstance(self, Forward) or self.customName is not None:

4520 if self.errmsg:

4521 pbe.msg = self.errmsg

4522 raise

4523

4524 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4525 super().leave_whitespace(recursive)

4526

4527 if recursive:

4528 if self.expr is not None:

4529 self.expr = self.expr.copy()

4530 self.expr.leave_whitespace(recursive)

4531 return self

4532

4533 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4534 super().ignore_whitespace(recursive)

4535

4536 if recursive:

4537 if self.expr is not None:

4538 self.expr = self.expr.copy()

4539 self.expr.ignore_whitespace(recursive)

4540 return self

4541

4542 def ignore(self, other) -> ParserElement:

4543 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

4544 super().ignore(other)

4545 if self.expr is not None:

4546 self.expr.ignore(self.ignoreExprs[-1])

4547

4548 return self

4549

4550 def streamline(self) -> ParserElement:

4551 super().streamline()

4552 if self.expr is not None:

4553 self.expr.streamline()

4554 return self

4555

4556 def _checkRecursion(self, parseElementList):

4557 if self in parseElementList:

4558 raise RecursiveGrammarException(parseElementList + [self])

4559 subRecCheckList = parseElementList[:] + [self]

4560 if self.expr is not None:

4561 self.expr._checkRecursion(subRecCheckList)

4562

4563 def validate(self, validateTrace=None) -> None:

4564 warnings.warn(

4565 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4566 DeprecationWarning,

4567 stacklevel=2,

4568 )

4569 if validateTrace is None:

4570 validateTrace = []

4571 tmp = validateTrace[:] + [self]

4572 if self.expr is not None:

4573 self.expr.validate(tmp)

4574 self._checkRecursion([])

4575

4576 def _generateDefaultName(self) -> str:

4577 return f"{type(self).__name__}:({self.expr})"

4578

4579 # Compatibility synonyms

4580 # fmt: off

4581 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4582 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4583 # fmt: on

4584

4585

4586class IndentedBlock(ParseElementEnhance):

4587 """

4588 Expression to match one or more expressions at a given indentation level.

4589 Useful for parsing text where structure is implied by indentation (like Python source code).

4590 """

4591

4592 class _Indent(Empty):

4593 def __init__(self, ref_col: int):

4594 super().__init__()

4595 self.errmsg = f"expected indent at column {ref_col}"

4596 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4597

4598 class _IndentGreater(Empty):

4599 def __init__(self, ref_col: int):

4600 super().__init__()

4601 self.errmsg = f"expected indent at column greater than {ref_col}"

4602 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4603

4604 def __init__(

4605 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4606 ):

4607 super().__init__(expr, savelist=True)

4608 # if recursive:

4609 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4610 self._recursive = recursive

4611 self._grouped = grouped

4612 self.parent_anchor = 1

4613

4614 def parseImpl(self, instring, loc, doActions=True):

4615 # advance parse position to non-whitespace by using an Empty()

4616 # this should be the column to be used for all subsequent indented lines

4617 anchor_loc = Empty().preParse(instring, loc)

4618

4619 # see if self.expr matches at the current location - if not it will raise an exception

4620 # and no further work is necessary

4621 self.expr.try_parse(instring, anchor_loc, do_actions=doActions)

4622

4623 indent_col = col(anchor_loc, instring)

4624 peer_detect_expr = self._Indent(indent_col)

4625

4626 inner_expr = Empty() + peer_detect_expr + self.expr

4627 if self._recursive:

4628 sub_indent = self._IndentGreater(indent_col)

4629 nested_block = IndentedBlock(

4630 self.expr, recursive=self._recursive, grouped=self._grouped

4631 )

4632 nested_block.set_debug(self.debug)

4633 nested_block.parent_anchor = indent_col

4634 inner_expr += Opt(sub_indent + nested_block)

4635

4636 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4637 block = OneOrMore(inner_expr)

4638

4639 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4640

4641 if self._grouped:

4642 wrapper = Group

4643 else:

4644 wrapper = lambda expr: expr

4645 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4646 instring, anchor_loc, doActions

4647 )

4648

4649

4650class AtStringStart(ParseElementEnhance):

4651 """Matches if expression matches at the beginning of the parse

4652 string::

4653

4654 AtStringStart(Word(nums)).parse_string("123")

4655 # prints ["123"]

4656

4657 AtStringStart(Word(nums)).parse_string(" 123")

4658 # raises ParseException

4659 """

4660

4661 def __init__(self, expr: Union[ParserElement, str]):

4662 super().__init__(expr)

4663 self.callPreparse = False

4664

4665 def parseImpl(self, instring, loc, doActions=True):

4666 if loc != 0:

4667 raise ParseException(instring, loc, "not found at string start")

4668 return super().parseImpl(instring, loc, doActions)

4669

4670

4671class AtLineStart(ParseElementEnhance):

4672 r"""Matches if an expression matches at the beginning of a line within

4673 the parse string

4674

4675 Example::

4676

4677 test = '''\

4678 AAA this line

4679 AAA and this line

4680 AAA but not this one

4681 B AAA and definitely not this one

4682 '''

4683

4684 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4685 print(t)

4686

4687 prints::

4688

4689 ['AAA', ' this line']

4690 ['AAA', ' and this line']

4691

4692 """

4693

4694 def __init__(self, expr: Union[ParserElement, str]):

4695 super().__init__(expr)

4696 self.callPreparse = False

4697

4698 def parseImpl(self, instring, loc, doActions=True):

4699 if col(loc, instring) != 1:

4700 raise ParseException(instring, loc, "not found at line start")

4701 return super().parseImpl(instring, loc, doActions)

4702

4703

4704class FollowedBy(ParseElementEnhance):

4705 """Lookahead matching of the given parse expression.

4706 ``FollowedBy`` does *not* advance the parsing position within

4707 the input string, it only verifies that the specified parse

4708 expression matches at the current position. ``FollowedBy``

4709 always returns a null token list. If any results names are defined

4710 in the lookahead expression, those *will* be returned for access by

4711 name.

4712

4713 Example::

4714

4715 # use FollowedBy to match a label only if it is followed by a ':'

4716 data_word = Word(alphas)

4717 label = data_word + FollowedBy(':')

4718 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4719

4720 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4721

4722 prints::

4723

4724 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4725 """

4726

4727 def __init__(self, expr: Union[ParserElement, str]):

4728 super().__init__(expr)

4729 self.mayReturnEmpty = True

4730

4731 def parseImpl(self, instring, loc, doActions=True):

4732 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4733 # we keep any named results that were defined in the FollowedBy expression

4734 _, ret = self.expr._parse(instring, loc, doActions=doActions)

4735 del ret[:]

4736

4737 return loc, ret

4738

4739

4740class PrecededBy(ParseElementEnhance):

4741 """Lookbehind matching of the given parse expression.

4742 ``PrecededBy`` does not advance the parsing position within the

4743 input string, it only verifies that the specified parse expression

4744 matches prior to the current position. ``PrecededBy`` always

4745 returns a null token list, but if a results name is defined on the

4746 given expression, it is returned.

4747

4748 Parameters:

4749

4750 - ``expr`` - expression that must match prior to the current parse

4751 location

4752 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4753 to lookbehind prior to the current parse location

4754

4755 If the lookbehind expression is a string, :class:`Literal`,

4756 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4757 with a specified exact or maximum length, then the retreat

4758 parameter is not required. Otherwise, retreat must be specified to

4759 give a maximum number of characters to look back from

4760 the current parse position for a lookbehind match.

4761

4762 Example::

4763

4764 # VB-style variable names with type prefixes

4765 int_var = PrecededBy("#") + pyparsing_common.identifier

4766 str_var = PrecededBy("$") + pyparsing_common.identifier

4767

4768 """

4769

4770 def __init__(

4771 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None

4772 ):

4773 super().__init__(expr)

4774 self.expr = self.expr().leave_whitespace()

4775 self.mayReturnEmpty = True

4776 self.mayIndexError = False

4777 self.exact = False

4778 if isinstance(expr, str_type):

4779 expr = typing.cast(str, expr)

4780 retreat = len(expr)

4781 self.exact = True

4782 elif isinstance(expr, (Literal, Keyword)):

4783 retreat = expr.matchLen

4784 self.exact = True

4785 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4786 retreat = expr.maxLen

4787 self.exact = True

4788 elif isinstance(expr, PositionToken):

4789 retreat = 0

4790 self.exact = True

4791 self.retreat = retreat

4792 self.errmsg = f"not preceded by {expr}"

4793 self.skipWhitespace = False

4794 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4795

4796 def parseImpl(self, instring, loc=0, doActions=True):

4797 if self.exact:

4798 if loc < self.retreat:

4799 raise ParseException(instring, loc, self.errmsg)

4800 start = loc - self.retreat

4801 _, ret = self.expr._parse(instring, start)

4802 return loc, ret

4803

4804 # retreat specified a maximum lookbehind window, iterate

4805 test_expr = self.expr + StringEnd()

4806 instring_slice = instring[max(0, loc - self.retreat) : loc]

4807 last_expr = ParseException(instring, loc, self.errmsg)

4808

4809 for offset in range(1, min(loc, self.retreat + 1) + 1):

4810 try:

4811 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

4812 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

4813 except ParseBaseException as pbe:

4814 last_expr = pbe

4815 else:

4816 break

4817 else:

4818 raise last_expr

4819

4820 return loc, ret

4821

4822

4823class Located(ParseElementEnhance):

4824 """

4825 Decorates a returned token with its starting and ending

4826 locations in the input string.

4827

4828 This helper adds the following results names:

4829

4830 - ``locn_start`` - location where matched expression begins

4831 - ``locn_end`` - location where matched expression ends

4832 - ``value`` - the actual parsed results

4833

4834 Be careful if the input text contains ``<TAB>`` characters, you

4835 may want to call :class:`ParserElement.parse_with_tabs`

4836

4837 Example::

4838

4839 wd = Word(alphas)

4840 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

4841 print(match)

4842

4843 prints::

4844

4845 [0, ['ljsdf'], 5]

4846 [8, ['lksdjjf'], 15]

4847 [18, ['lkkjj'], 23]

4848

4849 """

4850

4851 def parseImpl(self, instring, loc, doActions=True):

4852 start = loc

4853 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)

4854 ret_tokens = ParseResults([start, tokens, loc])

4855 ret_tokens["locn_start"] = start

4856 ret_tokens["value"] = tokens

4857 ret_tokens["locn_end"] = loc

4858 if self.resultsName:

4859 # must return as a list, so that the name will be attached to the complete group

4860 return loc, [ret_tokens]

4861 else:

4862 return loc, ret_tokens

4863

4864

4865class NotAny(ParseElementEnhance):

4866 """

4867 Lookahead to disallow matching with the given parse expression.

4868 ``NotAny`` does *not* advance the parsing position within the

4869 input string, it only verifies that the specified parse expression

4870 does *not* match at the current position. Also, ``NotAny`` does

4871 *not* skip over leading whitespace. ``NotAny`` always returns

4872 a null token list. May be constructed using the ``'~'`` operator.

4873

4874 Example::

4875

4876 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

4877

4878 # take care not to mistake keywords for identifiers

4879 ident = ~(AND | OR | NOT) + Word(alphas)

4880 boolean_term = Opt(NOT) + ident

4881

4882 # very crude boolean expression - to support parenthesis groups and

4883 # operation hierarchy, use infix_notation

4884 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

4885

4886 # integers that are followed by "." are actually floats

4887 integer = Word(nums) + ~Char(".")

4888 """

4889

4890 def __init__(self, expr: Union[ParserElement, str]):

4891 super().__init__(expr)

4892 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

4893 # self.leave_whitespace()

4894 self.skipWhitespace = False

4895

4896 self.mayReturnEmpty = True

4897 self.errmsg = f"Found unwanted token, {self.expr}"

4898

4899 def parseImpl(self, instring, loc, doActions=True):

4900 if self.expr.can_parse_next(instring, loc, do_actions=doActions):

4901 raise ParseException(instring, loc, self.errmsg, self)

4902 return loc, []

4903

4904 def _generateDefaultName(self) -> str:

4905 return f"~{{{self.expr}}}"

4906

4907

4908class _MultipleMatch(ParseElementEnhance):

4909 def __init__(

4910 self,

4911 expr: Union[str, ParserElement],

4912 stop_on: typing.Optional[Union[ParserElement, str]] = None,

4913 *,

4914 stopOn: typing.Optional[Union[ParserElement, str]] = None,

4915 ):

4916 super().__init__(expr)

4917 stopOn = stopOn or stop_on

4918 self.saveAsList = True

4919 ender = stopOn

4920 if isinstance(ender, str_type):

4921 ender = self._literalStringClass(ender)

4922 self.stopOn(ender)

4923

4924 def stopOn(self, ender) -> ParserElement:

4925 if isinstance(ender, str_type):

4926 ender = self._literalStringClass(ender)

4927 self.not_ender = ~ender if ender is not None else None

4928 return self

4929

4930 def parseImpl(self, instring, loc, doActions=True):

4931 self_expr_parse = self.expr._parse

4932 self_skip_ignorables = self._skipIgnorables

4933 check_ender = self.not_ender is not None

4934 if check_ender:

4935 try_not_ender = self.not_ender.try_parse

4936

4937 # must be at least one (but first see if we are the stopOn sentinel;

4938 # if so, fail)

4939 if check_ender:

4940 try_not_ender(instring, loc)

4941 loc, tokens = self_expr_parse(instring, loc, doActions)

4942 try:

4943 hasIgnoreExprs = not not self.ignoreExprs

4944 while 1:

4945 if check_ender:

4946 try_not_ender(instring, loc)

4947 if hasIgnoreExprs:

4948 preloc = self_skip_ignorables(instring, loc)

4949 else:

4950 preloc = loc

4951 loc, tmptokens = self_expr_parse(instring, preloc, doActions)

4952 tokens += tmptokens

4953 except (ParseException, IndexError):

4954 pass

4955

4956 return loc, tokens

4957

4958 def _setResultsName(self, name, listAllMatches=False):

4959 if (

4960 __diag__.warn_ungrouped_named_tokens_in_collection

4961 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4962 not in self.suppress_warnings_

4963 ):

4964 for e in [self.expr] + self.expr.recurse():

4965 if (

4966 isinstance(e, ParserElement)

4967 and e.resultsName

4968 and (

4969 Diagnostics.warn_ungrouped_named_tokens_in_collection

4970 not in e.suppress_warnings_

4971 )

4972 ):

4973 warning = (

4974 "warn_ungrouped_named_tokens_in_collection:"

4975 f" setting results name {name!r} on {type(self).__name__} expression"

4976 f" collides with {e.resultsName!r} on contained expression"

4977 )

4978 warnings.warn(warning, stacklevel=3)

4979 break

4980

4981 return super()._setResultsName(name, listAllMatches)

4982

4983

4984class OneOrMore(_MultipleMatch):

4985 """

4986 Repetition of one or more of the given expression.

4987

4988 Parameters:

4989

4990 - ``expr`` - expression that must match one or more times

4991 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

4992 (only required if the sentinel would ordinarily match the repetition

4993 expression)

4994

4995 Example::

4996

4997 data_word = Word(alphas)

4998 label = data_word + FollowedBy(':')

4999 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5000

5001 text = "shape: SQUARE posn: upper left color: BLACK"

5002 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5003

5004 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5005 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5006 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5007

5008 # could also be written as

5009 (attr_expr * (1,)).parse_string(text).pprint()

5010 """

5011

5012 def _generateDefaultName(self) -> str:

5013 return f"{{{self.expr}}}..."

5014

5015

5016class ZeroOrMore(_MultipleMatch):

5017 """

5018 Optional repetition of zero or more of the given expression.

5019

5020 Parameters:

5021

5022 - ``expr`` - expression that must match zero or more times

5023 - ``stop_on`` - expression for a terminating sentinel

5024 (only required if the sentinel would ordinarily match the repetition

5025 expression) - (default= ``None``)

5026

5027 Example: similar to :class:`OneOrMore`

5028 """

5029

5030 def __init__(

5031 self,

5032 expr: Union[str, ParserElement],

5033 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5034 *,

5035 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5036 ):

5037 super().__init__(expr, stopOn=stopOn or stop_on)

5038 self.mayReturnEmpty = True

5039

5040 def parseImpl(self, instring, loc, doActions=True):

5041 try:

5042 return super().parseImpl(instring, loc, doActions)

5043 except (ParseException, IndexError):

5044 return loc, ParseResults([], name=self.resultsName)

5045

5046 def _generateDefaultName(self) -> str:

5047 return f"[{self.expr}]..."

5048

5049

5050class DelimitedList(ParseElementEnhance):

5051 def __init__(

5052 self,

5053 expr: Union[str, ParserElement],

5054 delim: Union[str, ParserElement] = ",",

5055 combine: bool = False,

5056 min: typing.Optional[int] = None,

5057 max: typing.Optional[int] = None,

5058 *,

5059 allow_trailing_delim: bool = False,

5060 ):

5061 """Helper to define a delimited list of expressions - the delimiter

5062 defaults to ','. By default, the list elements and delimiters can

5063 have intervening whitespace, and comments, but this can be

5064 overridden by passing ``combine=True`` in the constructor. If

5065 ``combine`` is set to ``True``, the matching tokens are

5066 returned as a single token string, with the delimiters included;

5067 otherwise, the matching tokens are returned as a list of tokens,

5068 with the delimiters suppressed.

5069

5070 If ``allow_trailing_delim`` is set to True, then the list may end with

5071 a delimiter.

5072

5073 Example::

5074

5075 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5076 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5077 """

5078 if isinstance(expr, str_type):

5079 expr = ParserElement._literalStringClass(expr)

5080 expr = typing.cast(ParserElement, expr)

5081

5082 if min is not None and min < 1:

5083 raise ValueError("min must be greater than 0")

5084

5085 if max is not None and min is not None and max < min:

5086 raise ValueError("max must be greater than, or equal to min")

5087

5088 self.content = expr

5089 self.raw_delim = str(delim)

5090 self.delim = delim

5091 self.combine = combine

5092 if not combine:

5093 self.delim = Suppress(delim)

5094 self.min = min or 1

5095 self.max = max

5096 self.allow_trailing_delim = allow_trailing_delim

5097

5098 delim_list_expr = self.content + (self.delim + self.content) * (

5099 self.min - 1,

5100 None if self.max is None else self.max - 1,

5101 )

5102 if self.allow_trailing_delim:

5103 delim_list_expr += Opt(self.delim)

5104

5105 if self.combine:

5106 delim_list_expr = Combine(delim_list_expr)

5107

5108 super().__init__(delim_list_expr, savelist=True)

5109

5110 def _generateDefaultName(self) -> str:

5111 content_expr = self.content.streamline()

5112 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5113

5114

5115class _NullToken:

5116 def __bool__(self):

5117 return False

5118

5119 def __str__(self):

5120 return ""

5121

5122

5123class Opt(ParseElementEnhance):

5124 """

5125 Optional matching of the given expression.

5126

5127 Parameters:

5128

5129 - ``expr`` - expression that must match zero or more times

5130 - ``default`` (optional) - value to be returned if the optional expression is not found.

5131

5132 Example::

5133

5134 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5135 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5136 zip.run_tests('''

5137 # traditional ZIP code

5138 12345

5139

5140 # ZIP+4 form

5141 12101-0001

5142

5143 # invalid ZIP

5144 98765-

5145 ''')

5146

5147 prints::

5148

5149 # traditional ZIP code

5150 12345

5151 ['12345']

5152

5153 # ZIP+4 form

5154 12101-0001

5155 ['12101-0001']

5156

5157 # invalid ZIP

5158 98765-

5159 ^

5160 FAIL: Expected end of text (at char 5), (line:1, col:6)

5161 """

5162

5163 __optionalNotMatched = _NullToken()

5164

5165 def __init__(

5166 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5167 ):

5168 super().__init__(expr, savelist=False)

5169 self.saveAsList = self.expr.saveAsList

5170 self.defaultValue = default

5171 self.mayReturnEmpty = True

5172

5173 def parseImpl(self, instring, loc, doActions=True):

5174 self_expr = self.expr

5175 try:

5176 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)

5177 except (ParseException, IndexError):

5178 default_value = self.defaultValue

5179 if default_value is not self.__optionalNotMatched:

5180 if self_expr.resultsName:

5181 tokens = ParseResults([default_value])

5182 tokens[self_expr.resultsName] = default_value

5183 else:

5184 tokens = [default_value]

5185 else:

5186 tokens = []

5187 return loc, tokens

5188

5189 def _generateDefaultName(self) -> str:

5190 inner = str(self.expr)

5191 # strip off redundant inner {}'s

5192 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5193 inner = inner[1:-1]

5194 return f"[{inner}]"

5197Optional = Opt

5200class SkipTo(ParseElementEnhance):

5201 """

5202 Token for skipping over all undefined text until the matched

5203 expression is found.

5204

5205 Parameters:

5206

5207 - ``expr`` - target expression marking the end of the data to be skipped

5208 - ``include`` - if ``True``, the target expression is also parsed

5209 (the skipped text and target expression are returned as a 2-element

5210 list) (default= ``False``).

5211 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5212 comments) that might contain false matches to the target expression

5213 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5214 included in the skipped test; if found before the target expression is found,

5215 the :class:`SkipTo` is not a match

5216

5217 Example::

5218

5219 report = '''

5220 Outstanding Issues Report - 1 Jan 2000

5221

5222 # | Severity | Description | Days Open

5223 -----+----------+-------------------------------------------+-----------

5224 101 | Critical | Intermittent system crash | 6

5225 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5226 79 | Minor | System slow when running too many reports | 47

5227 '''

5228 integer = Word(nums)

5229 SEP = Suppress('|')

5230 # use SkipTo to simply match everything up until the next SEP

5231 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5232 # - parse action will call token.strip() for each matched token, i.e., the description body

5233 string_data = SkipTo(SEP, ignore=quoted_string)

5234 string_data.set_parse_action(token_map(str.strip))

5235 ticket_expr = (integer("issue_num") + SEP

5236 + string_data("sev") + SEP

5237 + string_data("desc") + SEP

5238 + integer("days_open"))

5239

5240 for tkt in ticket_expr.search_string(report):

5241 print tkt.dump()

5242

5243 prints::

5244

5245 ['101', 'Critical', 'Intermittent system crash', '6']

5246 - days_open: '6'

5247 - desc: 'Intermittent system crash'

5248 - issue_num: '101'

5249 - sev: 'Critical'

5250 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5251 - days_open: '14'

5252 - desc: "Spelling error on Login ('log|n')"

5253 - issue_num: '94'

5254 - sev: 'Cosmetic'

5255 ['79', 'Minor', 'System slow when running too many reports', '47']

5256 - days_open: '47'

5257 - desc: 'System slow when running too many reports'

5258 - issue_num: '79'

5259 - sev: 'Minor'

5260 """

5261

5262 def __init__(

5263 self,

5264 other: Union[ParserElement, str],

5265 include: bool = False,

5266 ignore: typing.Optional[Union[ParserElement, str]] = None,

5267 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5268 *,

5269 failOn: typing.Optional[Union[ParserElement, str]] = None,

5270 ):

5271 super().__init__(other)

5272 failOn = failOn or fail_on

5273 self.ignoreExpr = ignore

5274 self.mayReturnEmpty = True

5275 self.mayIndexError = False

5276 self.includeMatch = include

5277 self.saveAsList = False

5278 if isinstance(failOn, str_type):

5279 self.failOn = self._literalStringClass(failOn)

5280 else:

5281 self.failOn = failOn

5282 self.errmsg = "No match found for " + str(self.expr)

5283 self.ignorer = Empty().leave_whitespace()

5284 self._update_ignorer()

5285

5286 def _update_ignorer(self):

5287 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5288 self.ignorer.ignoreExprs.clear()

5289 for e in self.expr.ignoreExprs:

5290 self.ignorer.ignore(e)

5291 if self.ignoreExpr:

5292 self.ignorer.ignore(self.ignoreExpr)

5293

5294 def ignore(self, expr):

5295 super().ignore(expr)

5296 self._update_ignorer()

5297

5298 def parseImpl(self, instring, loc, doActions=True):

5299 startloc = loc

5300 instrlen = len(instring)

5301 self_expr_parse = self.expr._parse

5302 self_failOn_canParseNext = (

5303 self.failOn.canParseNext if self.failOn is not None else None

5304 )

5305 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5306

5307 tmploc = loc

5308 while tmploc <= instrlen:

5309 if self_failOn_canParseNext is not None:

5310 # break if failOn expression matches

5311 if self_failOn_canParseNext(instring, tmploc):

5312 break

5313

5314 if ignorer_try_parse is not None:

5315 # advance past ignore expressions

5316 prev_tmploc = tmploc

5317 while 1:

5318 try:

5319 tmploc = ignorer_try_parse(instring, tmploc)

5320 except ParseBaseException:

5321 break

5322 # see if all ignorers matched, but didn't actually ignore anything

5323 if tmploc == prev_tmploc:

5324 break

5325 prev_tmploc = tmploc

5326

5327 try:

5328 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)

5329 except (ParseException, IndexError):

5330 # no match, advance loc in string

5331 tmploc += 1

5332 else:

5333 # matched skipto expr, done

5334 break

5335

5336 else:

5337 # ran off the end of the input string without matching skipto expr, fail

5338 raise ParseException(instring, loc, self.errmsg, self)

5339

5340 # build up return values

5341 loc = tmploc

5342 skiptext = instring[startloc:loc]

5343 skipresult = ParseResults(skiptext)

5344

5345 if self.includeMatch:

5346 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)

5347 skipresult += mat

5348

5349 return loc, skipresult

5350

5351

5352class Forward(ParseElementEnhance):

5353 """

5354 Forward declaration of an expression to be defined later -

5355 used for recursive grammars, such as algebraic infix notation.

5356 When the expression is known, it is assigned to the ``Forward``

5357 variable using the ``'<<'`` operator.

5358

5359 Note: take care when assigning to ``Forward`` not to overlook

5360 precedence of operators.

5361

5362 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5363

5364 fwd_expr << a | b | c

5365

5366 will actually be evaluated as::

5367

5368 (fwd_expr << a) | b | c

5369

5370 thereby leaving b and c out as parseable alternatives. It is recommended that you

5371 explicitly group the values inserted into the ``Forward``::

5372

5373 fwd_expr << (a | b | c)

5374

5375 Converting to use the ``'<<='`` operator instead will avoid this problem.

5376

5377 See :class:`ParseResults.pprint` for an example of a recursive

5378 parser created using ``Forward``.

5379 """

5380

5381 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):

5382 self.caller_frame = traceback.extract_stack(limit=2)[0]

5383 super().__init__(other, savelist=False) # type: ignore[arg-type]

5384 self.lshift_line = None

5385

5386 def __lshift__(self, other) -> "Forward":

5387 if hasattr(self, "caller_frame"):

5388 del self.caller_frame

5389 if isinstance(other, str_type):

5390 other = self._literalStringClass(other)

5391

5392 if not isinstance(other, ParserElement):

5393 return NotImplemented

5394

5395 self.expr = other

5396 self.streamlined = other.streamlined

5397 self.mayIndexError = self.expr.mayIndexError

5398 self.mayReturnEmpty = self.expr.mayReturnEmpty

5399 self.set_whitespace_chars(

5400 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5401 )

5402 self.skipWhitespace = self.expr.skipWhitespace

5403 self.saveAsList = self.expr.saveAsList

5404 self.ignoreExprs.extend(self.expr.ignoreExprs)

5405 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5406 return self

5407

5408 def __ilshift__(self, other) -> "Forward":

5409 if not isinstance(other, ParserElement):

5410 return NotImplemented

5411

5412 return self << other

5413

5414 def __or__(self, other) -> "ParserElement":

5415 caller_line = traceback.extract_stack(limit=2)[-2]

5416 if (

5417 __diag__.warn_on_match_first_with_lshift_operator

5418 and caller_line == self.lshift_line

5419 and Diagnostics.warn_on_match_first_with_lshift_operator

5420 not in self.suppress_warnings_

5421 ):

5422 warnings.warn(

5423 "using '<<' operator with '|' is probably an error, use '<<='",

5424 stacklevel=2,

5425 )

5426 ret = super().__or__(other)

5427 return ret

5428

5429 def __del__(self):

5430 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5431 if (

5432 self.expr is None

5433 and __diag__.warn_on_assignment_to_Forward

5434 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5435 ):

5436 warnings.warn_explicit(

5437 "Forward defined here but no expression attached later using '<<=' or '<<'",

5438 UserWarning,

5439 filename=self.caller_frame.filename,

5440 lineno=self.caller_frame.lineno,

5441 )

5442

5443 def parseImpl(self, instring, loc, doActions=True):

5444 if (

5445 self.expr is None

5446 and __diag__.warn_on_parse_using_empty_Forward

5447 and Diagnostics.warn_on_parse_using_empty_Forward

5448 not in self.suppress_warnings_

5449 ):

5450 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5451 parse_fns = (

5452 "parse_string",

5453 "scan_string",

5454 "search_string",

5455 "transform_string",

5456 )

5457 tb = traceback.extract_stack(limit=200)

5458 for i, frm in enumerate(reversed(tb), start=1):

5459 if frm.name in parse_fns:

5460 stacklevel = i + 1

5461 break

5462 else:

5463 stacklevel = 2

5464 warnings.warn(

5465 "Forward expression was never assigned a value, will not parse any input",

5466 stacklevel=stacklevel,

5467 )

5468 if not ParserElement._left_recursion_enabled:

5469 return super().parseImpl(instring, loc, doActions)

5470 # ## Bounded Recursion algorithm ##

5471 # Recursion only needs to be processed at ``Forward`` elements, since they are

5472 # the only ones that can actually refer to themselves. The general idea is

5473 # to handle recursion stepwise: We start at no recursion, then recurse once,

5474 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5475 #

5476 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5477 # - to *match* a specific recursion level, and

5478 # - to *search* the bounded recursion level

5479 # and the two run concurrently. The *search* must *match* each recursion level

5480 # to find the best possible match. This is handled by a memo table, which

5481 # provides the previous match to the next level match attempt.

5482 #

5483 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5484 #

5485 # There is a complication since we not only *parse* but also *transform* via

5486 # actions: We do not want to run the actions too often while expanding. Thus,

5487 # we expand using `doActions=False` and only run `doActions=True` if the next

5488 # recursion level is acceptable.

5489 with ParserElement.recursion_lock:

5490 memo = ParserElement.recursion_memos

5491 try:

5492 # we are parsing at a specific recursion expansion - use it as-is

5493 prev_loc, prev_result = memo[loc, self, doActions]

5494 if isinstance(prev_result, Exception):

5495 raise prev_result

5496 return prev_loc, prev_result.copy()

5497 except KeyError:

5498 act_key = (loc, self, True)

5499 peek_key = (loc, self, False)

5500 # we are searching for the best recursion expansion - keep on improving

5501 # both `doActions` cases must be tracked separately here!

5502 prev_loc, prev_peek = memo[peek_key] = (

5503 loc - 1,

5504 ParseException(

5505 instring, loc, "Forward recursion without base case", self

5506 ),

5507 )

5508 if doActions:

5509 memo[act_key] = memo[peek_key]

5510 while True:

5511 try:

5512 new_loc, new_peek = super().parseImpl(instring, loc, False)

5513 except ParseException:

5514 # we failed before getting any match – do not hide the error

5515 if isinstance(prev_peek, Exception):

5516 raise

5517 new_loc, new_peek = prev_loc, prev_peek

5518 # the match did not get better: we are done

5519 if new_loc <= prev_loc:

5520 if doActions:

5521 # replace the match for doActions=False as well,

5522 # in case the action did backtrack

5523 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5524 del memo[peek_key], memo[act_key]

5525 return prev_loc, prev_result.copy()

5526 del memo[peek_key]

5527 return prev_loc, prev_peek.copy()

5528 # the match did get better: see if we can improve further

5529 if doActions:

5530 try:

5531 memo[act_key] = super().parseImpl(instring, loc, True)

5532 except ParseException as e:

5533 memo[peek_key] = memo[act_key] = (new_loc, e)

5534 raise

5535 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5536

5537 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5538 self.skipWhitespace = False

5539 return self

5540

5541 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5542 self.skipWhitespace = True

5543 return self

5544

5545 def streamline(self) -> ParserElement:

5546 if not self.streamlined:

5547 self.streamlined = True

5548 if self.expr is not None:

5549 self.expr.streamline()

5550 return self

5551

5552 def validate(self, validateTrace=None) -> None:

5553 warnings.warn(

5554 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5555 DeprecationWarning,

5556 stacklevel=2,

5557 )

5558 if validateTrace is None:

5559 validateTrace = []

5560

5561 if self not in validateTrace:

5562 tmp = validateTrace[:] + [self]

5563 if self.expr is not None:

5564 self.expr.validate(tmp)

5565 self._checkRecursion([])

5566

5567 def _generateDefaultName(self) -> str:

5568 # Avoid infinite recursion by setting a temporary _defaultName

5569 self._defaultName = ": ..."

5570

5571 # Use the string representation of main expression.

5572 retString = "..."

5573 try:

5574 if self.expr is not None:

5575 retString = str(self.expr)[:1000]

5576 else:

5577 retString = "None"

5578 finally:

5579 return f"{type(self).__name__}: {retString}"

5580

5581 def copy(self) -> ParserElement:

5582 if self.expr is not None:

5583 return super().copy()

5584 else:

5585 ret = Forward()

5586 ret <<= self

5587 return ret

5588

5589 def _setResultsName(self, name, list_all_matches=False):

5590 # fmt: off

5591 if (

5592 __diag__.warn_name_set_on_empty_Forward

5593 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

5594 and self.expr is None

5595 ):

5596 warning = (

5597 "warn_name_set_on_empty_Forward:"

5598 f" setting results name {name!r} on {type(self).__name__} expression"

5599 " that has no contained expression"

5600 )

5601 warnings.warn(warning, stacklevel=3)

5602 # fmt: on

5603

5604 return super()._setResultsName(name, list_all_matches)

5605

5606 # Compatibility synonyms

5607 # fmt: off

5608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5610 # fmt: on

5611

5612

5613class TokenConverter(ParseElementEnhance):

5614 """

5615 Abstract subclass of :class:`ParseExpression`, for converting parsed results.

5616 """

5617

5618 def __init__(self, expr: Union[ParserElement, str], savelist=False):

5619 super().__init__(expr) # , savelist)

5620 self.saveAsList = False

5621

5622

5623class Combine(TokenConverter):

5624 """Converter to concatenate all matching tokens to a single string.

5625 By default, the matching patterns must also be contiguous in the

5626 input string; this can be disabled by specifying

5627 ``'adjacent=False'`` in the constructor.

5628

5629 Example::

5630

5631 real = Word(nums) + '.' + Word(nums)

5632 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5633 # will also erroneously match the following

5634 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5635

5636 real = Combine(Word(nums) + '.' + Word(nums))

5637 print(real.parse_string('3.1416')) # -> ['3.1416']

5638 # no match when there are internal spaces

5639 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5640 """

5641

5642 def __init__(

5643 self,

5644 expr: ParserElement,

5645 join_string: str = "",

5646 adjacent: bool = True,

5647 *,

5648 joinString: typing.Optional[str] = None,

5649 ):

5650 super().__init__(expr)

5651 joinString = joinString if joinString is not None else join_string

5652 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5653 if adjacent:

5654 self.leave_whitespace()

5655 self.adjacent = adjacent

5656 self.skipWhitespace = True

5657 self.joinString = joinString

5658 self.callPreparse = True

5659

5660 def ignore(self, other) -> ParserElement:

5661 if self.adjacent:

5662 ParserElement.ignore(self, other)

5663 else:

5664 super().ignore(other)

5665 return self

5666

5667 def postParse(self, instring, loc, tokenlist):

5668 retToks = tokenlist.copy()

5669 del retToks[:]

5670 retToks += ParseResults(

5671 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5672 )

5673

5674 if self.resultsName and retToks.haskeys():

5675 return [retToks]

5676 else:

5677 return retToks

5678

5679

5680class Group(TokenConverter):

5681 """Converter to return the matched tokens as a list - useful for

5682 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5683

5684 The optional ``aslist`` argument when set to True will return the

5685 parsed tokens as a Python list instead of a pyparsing ParseResults.

5686

5687 Example::

5688

5689 ident = Word(alphas)

5690 num = Word(nums)

5691 term = ident | num

5692 func = ident + Opt(DelimitedList(term))

5693 print(func.parse_string("fn a, b, 100"))

5694 # -> ['fn', 'a', 'b', '100']

5695

5696 func = ident + Group(Opt(DelimitedList(term)))

5697 print(func.parse_string("fn a, b, 100"))

5698 # -> ['fn', ['a', 'b', '100']]

5699 """

5700

5701 def __init__(self, expr: ParserElement, aslist: bool = False):

5702 super().__init__(expr)

5703 self.saveAsList = True

5704 self._asPythonList = aslist

5705

5706 def postParse(self, instring, loc, tokenlist):

5707 if self._asPythonList:

5708 return ParseResults.List(

5709 tokenlist.asList()

5710 if isinstance(tokenlist, ParseResults)

5711 else list(tokenlist)

5712 )

5713

5714 return [tokenlist]

5715

5716

5717class Dict(TokenConverter):

5718 """Converter to return a repetitive expression as a list, but also

5719 as a dictionary. Each element can also be referenced using the first

5720 token in the expression as its key. Useful for tabular report

5721 scraping when the first column can be used as a item key.

5722

5723 The optional ``asdict`` argument when set to True will return the

5724 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5725

5726 Example::

5727

5728 data_word = Word(alphas)

5729 label = data_word + FollowedBy(':')

5730

5731 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5732 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5733

5734 # print attributes as plain groups

5735 print(attr_expr[1, ...].parse_string(text).dump())

5736

5737 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5738 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5739 print(result.dump())

5740

5741 # access named fields as dict entries, or output as dict

5742 print(result['shape'])

5743 print(result.as_dict())

5744

5745 prints::

5746

5747 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5749 - color: 'light blue'

5750 - posn: 'upper left'

5751 - shape: 'SQUARE'

5752 - texture: 'burlap'

5753 SQUARE

5754 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5755

5756 See more examples at :class:`ParseResults` of accessing fields by results name.

5757 """

5758

5759 def __init__(self, expr: ParserElement, asdict: bool = False):

5760 super().__init__(expr)

5761 self.saveAsList = True

5762 self._asPythonDict = asdict

5763

5764 def postParse(self, instring, loc, tokenlist):

5765 for i, tok in enumerate(tokenlist):

5766 if len(tok) == 0:

5767 continue

5768

5769 ikey = tok[0]

5770 if isinstance(ikey, int):

5771 ikey = str(ikey).strip()

5772

5773 if len(tok) == 1:

5774 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5775

5776 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5777 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5778

5779 else:

5780 try:

5781 dictvalue = tok.copy() # ParseResults(i)

5782 except Exception:

5783 exc = TypeError(

5784 "could not extract dict values from parsed results"

5785 " - Dict expression must contain Grouped expressions"

5786 )

5787 raise exc from None

5788

5789 del dictvalue[0]

5790

5791 if len(dictvalue) != 1 or (

5792 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5793 ):

5794 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5795 else:

5796 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5797

5798 if self._asPythonDict:

5799 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

5800

5801 return [tokenlist] if self.resultsName else tokenlist

5802

5803

5804class Suppress(TokenConverter):

5805 """Converter for ignoring the results of a parsed expression.

5806

5807 Example::

5808

5809 source = "a, b, c,d"

5810 wd = Word(alphas)

5811 wd_list1 = wd + (',' + wd)[...]

5812 print(wd_list1.parse_string(source))

5813

5814 # often, delimiters that are useful during parsing are just in the

5815 # way afterward - use Suppress to keep them out of the parsed output

5816 wd_list2 = wd + (Suppress(',') + wd)[...]

5817 print(wd_list2.parse_string(source))

5818

5819 # Skipped text (using '...') can be suppressed as well

5820 source = "lead in START relevant text END trailing text"

5821 start_marker = Keyword("START")

5822 end_marker = Keyword("END")

5823 find_body = Suppress(...) + start_marker + ... + end_marker

5824 print(find_body.parse_string(source)

5825

5826 prints::

5827

5828 ['a', ',', 'b', ',', 'c', ',', 'd']

5829 ['a', 'b', 'c', 'd']

5830 ['START', 'relevant text ', 'END']

5831

5832 (See also :class:`DelimitedList`.)

5833 """

5834

5835 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

5836 if expr is ...:

5837 expr = _PendingSkip(NoMatch())

5838 super().__init__(expr)

5839

5840 def __add__(self, other) -> "ParserElement":

5841 if isinstance(self.expr, _PendingSkip):

5842 return Suppress(SkipTo(other)) + other

5843

5844 return super().__add__(other)

5845

5846 def __sub__(self, other) -> "ParserElement":

5847 if isinstance(self.expr, _PendingSkip):

5848 return Suppress(SkipTo(other)) - other

5849

5850 return super().__sub__(other)

5851

5852 def postParse(self, instring, loc, tokenlist):

5853 return []

5854

5855 def suppress(self) -> ParserElement:

5856 return self

5857

5858

5859def trace_parse_action(f: ParseAction) -> ParseAction:

5860 """Decorator for debugging parse actions.

5861

5862 When the parse action is called, this decorator will print

5863 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

5864 When the parse action completes, the decorator will print

5865 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

5866

5867 Example::

5868

5869 wd = Word(alphas)

5870

5871 @trace_parse_action

5872 def remove_duplicate_chars(tokens):

5873 return ''.join(sorted(set(''.join(tokens))))

5874

5875 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

5876 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

5877

5878 prints::

5879

5880 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

5881 <<leaving remove_duplicate_chars (ret: 'dfjkls')

5882 ['dfjkls']

5883 """

5884 f = _trim_arity(f)

5885

5886 def z(*paArgs):

5887 thisFunc = f.__name__

5888 s, l, t = paArgs[-3:]

5889 if len(paArgs) > 3:

5890 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

5891 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

5892 try:

5893 ret = f(*paArgs)

5894 except Exception as exc:

5895 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n")

5896 raise

5897 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

5898 return ret

5899

5900 z.__name__ = f.__name__

5901 return z

5902

5903

5904# convenience constants for positional expressions

5905empty = Empty().set_name("empty")

5906line_start = LineStart().set_name("line_start")

5907line_end = LineEnd().set_name("line_end")

5908string_start = StringStart().set_name("string_start")

5909string_end = StringEnd().set_name("string_end")

5910

5911_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

5912 lambda s, l, t: t[0][1]

5913)

5914_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

5915 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

5916)

5917_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

5918 lambda s, l, t: chr(int(t[0][1:], 8))

5919)

5920_singleChar = (

5921 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

5922)

5923_charRange = Group(_singleChar + Suppress("-") + _singleChar)

5924_reBracketExpr = (

5925 Literal("[")

5926 + Opt("^").set_results_name("negate")

5927 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

5928 + Literal("]")

5929)

5930

5931

5932def srange(s: str) -> str:

5933 r"""Helper to easily define string ranges for use in :class:`Word`

5934 construction. Borrows syntax from regexp ``'[]'`` string range

5935 definitions::

5936

5937 srange("[0-9]") -> "0123456789"

5938 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

5939 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

5940

5941 The input string must be enclosed in []'s, and the returned string

5942 is the expanded character set joined into a single string. The

5943 values enclosed in the []'s may be:

5944

5945 - a single character

5946 - an escaped character with a leading backslash (such as ``\-``

5947 or ``\]``)

5948 - an escaped hex character with a leading ``'\x'``

5949 (``\x21``, which is a ``'!'`` character) (``\0x##``

5950 is also supported for backwards compatibility)

5951 - an escaped octal character with a leading ``'\0'``

5952 (``\041``, which is a ``'!'`` character)

5953 - a range of any of the above, separated by a dash (``'a-z'``,

5954 etc.)

5955 - any combination of the above (``'aeiouy'``,

5956 ``'a-zA-Z0-9_$'``, etc.)

5957 """

5958 _expanded = lambda p: (

5959 p

5960 if not isinstance(p, ParseResults)

5961 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

5962 )

5963 try:

5964 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)

5965 except Exception as e:

5966 return ""

5967

5968

5969def token_map(func, *args) -> ParseAction:

5970 """Helper to define a parse action by mapping a function to all

5971 elements of a :class:`ParseResults` list. If any additional args are passed,

5972 they are forwarded to the given function as additional arguments

5973 after the token, as in

5974 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

5975 which will convert the parsed data to an integer using base 16.

5976

5977 Example (compare the last to example in :class:`ParserElement.transform_string`::

5978

5979 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

5980 hex_ints.run_tests('''

5981 00 11 22 aa FF 0a 0d 1a

5982 ''')

5983

5984 upperword = Word(alphas).set_parse_action(token_map(str.upper))

5985 upperword[1, ...].run_tests('''

5986 my kingdom for a horse

5987 ''')

5988

5989 wd = Word(alphas).set_parse_action(token_map(str.title))

5990 wd[1, ...].set_parse_action(' '.join).run_tests('''

5991 now is the winter of our discontent made glorious summer by this sun of york

5992 ''')

5993

5994 prints::

5995

5996 00 11 22 aa FF 0a 0d 1a

5997 [0, 17, 34, 170, 255, 10, 13, 26]

5998

5999 my kingdom for a horse

6000 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6001

6002 now is the winter of our discontent made glorious summer by this sun of york

6003 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6004 """

6005

6006 def pa(s, l, t):

6007 return [func(tokn, *args) for tokn in t]

6008

6009 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6010 pa.__name__ = func_name

6011

6012 return pa

6013

6014

6015def autoname_elements() -> None:

6016 """

6017 Utility to simplify mass-naming of parser elements, for

6018 generating railroad diagram with named subdiagrams.

6019 """

6020 calling_frame = sys._getframe().f_back

6021 if calling_frame is None:

6022 return

6023 calling_frame = typing.cast(types.FrameType, calling_frame)

6024 for name, var in calling_frame.f_locals.items():

6025 if isinstance(var, ParserElement) and not var.customName:

6026 var.set_name(name)

6027

6028

6029dbl_quoted_string = Combine(

6030 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6031).set_name("string enclosed in double quotes")

6032

6033sgl_quoted_string = Combine(

6034 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6035).set_name("string enclosed in single quotes")

6036

6037quoted_string = Combine(

6038 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6039 "double quoted string"

6040 )

6041 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6042 "single quoted string"

6043 )

6044).set_name("quoted string using single or double quotes")

6045

6046python_quoted_string = Combine(

6047 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6048 "multiline double quoted string"

6049 )

6050 ^ (

6051 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6052 ).set_name("multiline single quoted string")

6053 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6054 "double quoted string"

6055 )

6056 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6057 "single quoted string"

6058 )

6059).set_name("Python quoted string")

6060

6061unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6062

6063

6064alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6065punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6066

6067# build list of built-in expressions, for future reference if a global default value

6068# gets updated

6069_builtin_exprs: List[ParserElement] = [

6070 v for v in vars().values() if isinstance(v, ParserElement)

6071]

6072

6073# backward compatibility names

6074# fmt: off

6075sglQuotedString = sgl_quoted_string

6076dblQuotedString = dbl_quoted_string

6077quotedString = quoted_string

6078unicodeString = unicode_string

6079lineStart = line_start

6080lineEnd = line_end

6081stringStart = string_start

6082stringEnd = string_end

6083nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6084traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6085conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6086tokenMap = replaced_by_pep8("tokenMap", token_map)

6087# fmt: on