Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# helpers.py
2import html.entities
3import operator
4import re
5import sys
6import typing
8from . import __diag__
9from .core import *
10from .util import (
11 _bslash,
12 _flatten,
13 _escape_regex_range_chars,
14 make_compressed_re,
15 replaced_by_pep8,
16)
19def _suppression(expr: Union[ParserElement, str]) -> ParserElement:
20 # internal helper to avoid wrapping Suppress inside another Suppress
21 if isinstance(expr, Suppress):
22 return expr
23 return Suppress(expr)
26#
27# global helpers
28#
29def counted_array(
30 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs
31) -> ParserElement:
32 """Helper to define a counted list of expressions.
34 This helper defines a pattern of the form::
36 integer expr expr expr...
38 where the leading integer tells how many expr expressions follow.
39 The matched tokens returns the array of expr tokens as a list - the
40 leading count token is suppressed.
42 If ``int_expr`` is specified, it should be a pyparsing expression
43 that produces an integer value.
45 Examples:
47 .. doctest::
49 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef')
50 ParseResults(['ab', 'cd'], {})
52 - In this parser, the leading integer value is given in binary,
53 '10' indicating that 2 values are in the array:
55 .. doctest::
57 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
58 >>> counted_array(Word(alphas), int_expr=binary_constant
59 ... ).parse_string('10 ab cd ef')
60 ParseResults(['ab', 'cd'], {})
62 - If other fields must be parsed after the count but before the
63 list items, give the fields results names and they will
64 be preserved in the returned ParseResults:
66 .. doctest::
68 >>> ppc = pyparsing.common
69 >>> count_with_metadata = ppc.integer + Word(alphas)("type")
70 >>> typed_array = counted_array(Word(alphanums),
71 ... int_expr=count_with_metadata)("items")
72 >>> result = typed_array.parse_string("3 bool True True False")
73 >>> print(result.dump())
74 ['True', 'True', 'False']
75 - items: ['True', 'True', 'False']
76 - type: 'bool'
77 """
78 intExpr: typing.Optional[ParserElement] = deprecate_argument(
79 kwargs, "intExpr", None
80 )
82 intExpr = intExpr or int_expr
83 array_expr = Forward()
85 def count_field_parse_action(s, l, t):
86 nonlocal array_expr
87 n = t[0]
88 array_expr <<= (expr * n) if n else Empty()
89 # clear list contents, but keep any named results
90 del t[:]
92 if intExpr is None:
93 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
94 else:
95 intExpr = intExpr.copy()
96 intExpr.set_name("arrayLen")
97 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
98 return (intExpr + array_expr).set_name(f"(len) {expr}...")
101def match_previous_literal(expr: ParserElement) -> ParserElement:
102 """Helper to define an expression that is indirectly defined from
103 the tokens matched in a previous expression, that is, it looks for
104 a 'repeat' of a previous expression. For example::
106 .. testcode::
108 first = Word(nums)
109 second = match_previous_literal(first)
110 match_expr = first + ":" + second
112 will match ``"1:1"``, but not ``"1:2"``. Because this
113 matches a previous literal, will also match the leading
114 ``"1:1"`` in ``"1:10"``. If this is not desired, use
115 :class:`match_previous_expr`. Do *not* use with packrat parsing
116 enabled.
117 """
118 rep = Forward()
120 def copy_token_to_repeater(s, l, t):
121 if not t:
122 rep << Empty()
123 return
125 if len(t) == 1:
126 rep << t[0]
127 return
129 # flatten t tokens
130 tflat = _flatten(t.as_list())
131 rep << And(Literal(tt) for tt in tflat)
133 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)
134 rep.set_name("(prev) " + str(expr))
135 return rep
138def match_previous_expr(expr: ParserElement) -> ParserElement:
139 """Helper to define an expression that is indirectly defined from
140 the tokens matched in a previous expression, that is, it looks for
141 a 'repeat' of a previous expression. For example:
143 .. testcode::
145 first = Word(nums)
146 second = match_previous_expr(first)
147 match_expr = first + ":" + second
149 will match ``"1:1"``, but not ``"1:2"``. Because this
150 matches by expressions, will *not* match the leading ``"1:1"``
151 in ``"1:10"``; the expressions are evaluated first, and then
152 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
153 with packrat parsing enabled.
154 """
155 rep = Forward()
156 e2 = expr.copy()
157 rep <<= e2
159 def copy_token_to_repeater(s, l, t):
160 matchTokens = _flatten(t.as_list())
162 def must_match_these_tokens(s, l, t):
163 theseTokens = _flatten(t.as_list())
164 if theseTokens != matchTokens:
165 raise ParseException(
166 s, l, f"Expected {matchTokens}, found{theseTokens}"
167 )
169 rep.set_parse_action(must_match_these_tokens, call_during_try=True)
171 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)
172 rep.set_name("(prev) " + str(expr))
173 return rep
176def one_of(
177 strs: Union[typing.Iterable[str], str],
178 caseless: bool = False,
179 use_regex: bool = True,
180 as_keyword: bool = False,
181 **kwargs,
182) -> ParserElement:
183 """Helper to quickly define a set of alternative :class:`Literal` s,
184 and makes sure to do longest-first testing when there is a conflict,
185 regardless of the input order, but returns
186 a :class:`MatchFirst` for best performance.
188 :param strs: a string of space-delimited literals, or a collection of
189 string literals
190 :param caseless: treat all literals as caseless
191 :param use_regex: bool - as an optimization, will
192 generate a :class:`Regex` object; otherwise, will generate
193 a :class:`MatchFirst` object (if ``caseless=True`` or
194 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception)
195 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the
196 generated expressions
198 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8
199 compatibility, but will be removed in a future release.
201 Example:
203 .. testcode::
205 comp_oper = one_of("< = > <= >= !=")
206 var = Word(alphas)
207 number = Word(nums)
208 term = var | number
209 comparison_expr = term + comp_oper + term
210 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
212 prints:
214 .. testoutput::
216 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
217 """
218 useRegex: bool = deprecate_argument(kwargs, "useRegex", True)
219 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
221 asKeyword = asKeyword or as_keyword
222 useRegex = useRegex and use_regex
224 if (
225 isinstance(caseless, str_type)
226 and __diag__.warn_on_multiple_string_args_to_oneof
227 ):
228 warnings.warn(
229 "warn_on_multiple_string_args_to_oneof:"
230 " More than one string argument passed to one_of, pass"
231 " choices as a list or space-delimited string",
232 stacklevel=2,
233 )
235 if caseless:
236 is_equal = lambda a, b: a.upper() == b.upper()
237 masks = lambda a, b: b.upper().startswith(a.upper())
238 else:
239 is_equal = operator.eq
240 masks = lambda a, b: b.startswith(a)
242 symbols: list[str]
243 if isinstance(strs, str_type):
244 strs = typing.cast(str, strs)
245 symbols = strs.split()
246 elif isinstance(strs, Iterable):
247 symbols = list(strs)
248 else:
249 raise TypeError("Invalid argument to one_of, expected string or iterable")
250 if not symbols:
251 return NoMatch()
253 # reorder given symbols to take care to avoid masking longer choices with shorter ones
254 # (but only if the given symbols are not just single characters)
255 i = 0
256 while i < len(symbols) - 1:
257 cur = symbols[i]
258 for j, other in enumerate(symbols[i + 1 :]):
259 if is_equal(other, cur):
260 del symbols[i + j + 1]
261 break
262 if len(other) > len(cur) and masks(cur, other):
263 del symbols[i + j + 1]
264 symbols.insert(i, other)
265 break
266 else:
267 i += 1
269 if useRegex:
270 re_flags: int = re.IGNORECASE if caseless else 0
272 try:
273 if all(len(sym) == 1 for sym in symbols):
274 # symbols are just single characters, create range regex pattern
275 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
276 else:
277 patt = "|".join(re.escape(sym) for sym in symbols)
279 # wrap with \b word break markers if defining as keywords
280 if asKeyword:
281 patt = rf"\b(?:{patt})\b"
283 ret = Regex(patt, flags=re_flags)
284 ret.set_name(" | ".join(repr(s) for s in symbols))
286 if caseless:
287 # add parse action to return symbols as specified, not in random
288 # casing as found in input string
289 symbol_map = {sym.lower(): sym for sym in symbols}
290 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
292 return ret
294 except re.error:
295 warnings.warn(
296 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
297 )
299 # last resort, just use MatchFirst of Token class corresponding to caseless
300 # and asKeyword settings
301 CASELESS = KEYWORD = True
302 parse_element_class = {
303 (CASELESS, KEYWORD): CaselessKeyword,
304 (CASELESS, not KEYWORD): CaselessLiteral,
305 (not CASELESS, KEYWORD): Keyword,
306 (not CASELESS, not KEYWORD): Literal,
307 }[(caseless, asKeyword)]
308 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(
309 " | ".join(symbols)
310 )
313def dict_of(key: ParserElement, value: ParserElement) -> Dict:
314 """Helper to easily and clearly define a dictionary by specifying
315 the respective patterns for the key and value. Takes care of
316 defining the :class:`Dict`, :class:`ZeroOrMore`, and
317 :class:`Group` tokens in the proper order. The key pattern
318 can include delimiting markers or punctuation, as long as they are
319 suppressed, thereby leaving the significant key text. The value
320 pattern can include named results, so that the :class:`Dict` results
321 can include named token fields.
323 Example:
325 .. doctest::
327 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
329 >>> data_word = Word(alphas)
330 >>> label = data_word + FollowedBy(':')
331 >>> attr_expr = (
332 ... label
333 ... + Suppress(':')
334 ... + OneOrMore(data_word, stop_on=label)
335 ... .set_parse_action(' '.join))
336 >>> print(attr_expr[1, ...].parse_string(text).dump())
337 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
339 >>> attr_label = label
340 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label
341 ... ).set_parse_action(' '.join)
343 # similar to Dict, but simpler call format
344 >>> result = dict_of(attr_label, attr_value).parse_string(text)
345 >>> print(result.dump())
346 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
347 - color: 'light blue'
348 - posn: 'upper left'
349 - shape: 'SQUARE'
350 - texture: 'burlap'
351 [0]:
352 ['shape', 'SQUARE']
353 [1]:
354 ['posn', 'upper left']
355 [2]:
356 ['color', 'light blue']
357 [3]:
358 ['texture', 'burlap']
360 >>> print(result['shape'])
361 SQUARE
362 >>> print(result.shape) # object attribute access works too
363 SQUARE
364 >>> print(result.as_dict())
365 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}
366 """
367 return Dict(OneOrMore(Group(key + value)))
370def original_text_for(
371 expr: ParserElement, as_string: bool = True, **kwargs
372) -> ParserElement:
373 """Helper to return the original, untokenized text for a given
374 expression. Useful to restore the parsed fields of an HTML start
375 tag into the raw tag text itself, or to revert separate tokens with
376 intervening whitespace back to the original matching input text. By
377 default, returns a string containing the original parsed text.
379 If the optional ``as_string`` argument is passed as
380 ``False``, then the return value is
381 a :class:`ParseResults` containing any results names that
382 were originally matched, and a single token containing the original
383 matched text from the input string. So if the expression passed to
384 :class:`original_text_for` contains expressions with defined
385 results names, you must set ``as_string`` to ``False`` if you
386 want to preserve those results name values.
388 The ``asString`` pre-PEP8 argument is retained for compatibility,
389 but will be removed in a future release.
391 Example:
393 .. testcode::
395 src = "this is test <b> bold <i>text</i> </b> normal text "
396 for tag in ("b", "i"):
397 opener, closer = make_html_tags(tag)
398 patt = original_text_for(opener + ... + closer)
399 print(patt.search_string(src)[0])
401 prints:
403 .. testoutput::
405 ['<b> bold <i>text</i> </b>']
406 ['<i>text</i>']
407 """
408 asString: bool = deprecate_argument(kwargs, "asString", True)
410 asString = asString and as_string
412 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
413 endlocMarker = locMarker.copy()
414 endlocMarker.callPreparse = False
415 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
416 if asString:
417 extractText = lambda s, l, t: s[t._original_start : t._original_end]
418 else:
420 def extractText(s, l, t):
421 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
423 matchExpr.set_parse_action(extractText)
424 matchExpr.ignoreExprs = expr.ignoreExprs
425 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
426 return matchExpr
429def ungroup(expr: ParserElement) -> ParserElement:
430 """Helper to undo pyparsing's default grouping of And expressions,
431 even if all but one are non-empty.
432 """
433 return TokenConverter(expr).add_parse_action(lambda t: t[0])
436def locatedExpr(expr: ParserElement) -> ParserElement:
437 """
438 .. deprecated:: 3.0.0
439 Use the :class:`Located` class instead. Note that `Located`
440 returns results with one less grouping level.
442 Helper to decorate a returned token with its starting and ending
443 locations in the input string.
445 This helper adds the following results names:
447 - ``locn_start`` - location where matched expression begins
448 - ``locn_end`` - location where matched expression ends
449 - ``value`` - the actual parsed results
451 Be careful if the input text contains ``<TAB>`` characters, you
452 may want to call :meth:`ParserElement.parse_with_tabs`
453 """
454 warnings.warn(
455 f"{'locatedExpr'!r} deprecated - use {'Located'!r}",
456 DeprecationWarning,
457 stacklevel=2,
458 )
460 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
461 return Group(
462 locator("locn_start")
463 + expr("value")
464 + locator.copy().leave_whitespace()("locn_end")
465 )
468# define special default value to permit None as a significant value for
469# ignore_expr
470_NO_IGNORE_EXPR_GIVEN = NoMatch()
473def nested_expr(
474 opener: Union[str, ParserElement] = "(",
475 closer: Union[str, ParserElement] = ")",
476 content: typing.Optional[ParserElement] = None,
477 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN,
478 **kwargs,
479) -> ParserElement:
480 """Helper method for defining nested lists enclosed in opening and
481 closing delimiters (``"("`` and ``")"`` are the default).
483 :param opener: str - opening character for a nested list
484 (default= ``"("``); can also be a pyparsing expression
486 :param closer: str - closing character for a nested list
487 (default= ``")"``); can also be a pyparsing expression
489 :param content: expression for items within the nested lists
491 :param ignore_expr: expression for ignoring opening and closing delimiters
492 (default = :class:`quoted_string`)
494 Parameter ``ignoreExpr`` is retained for compatibility
495 but will be removed in a future release.
497 If an expression is not provided for the content argument, the
498 nested expression will capture all whitespace-delimited content
499 between delimiters as a list of separate values.
501 Use the ``ignore_expr`` argument to define expressions that may
502 contain opening or closing characters that should not be treated as
503 opening or closing characters for nesting, such as quoted_string or
504 a comment expression. Specify multiple expressions using an
505 :class:`Or` or :class:`MatchFirst`. The default is
506 :class:`quoted_string`, but if no expressions are to be ignored, then
507 pass ``None`` for this argument.
509 Example:
511 .. testcode::
513 data_type = one_of("void int short long char float double")
514 decl_data_type = Combine(data_type + Opt(Word('*')))
515 ident = Word(alphas+'_', alphanums+'_')
516 number = pyparsing_common.number
517 arg = Group(decl_data_type + ident)
518 LPAR, RPAR = map(Suppress, "()")
520 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
522 c_function = (decl_data_type("type")
523 + ident("name")
524 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
525 + code_body("body"))
526 c_function.ignore(c_style_comment)
528 source_code = '''
529 int is_odd(int x) {
530 return (x%2);
531 }
533 int dec_to_hex(char hchar) {
534 if (hchar >= '0' && hchar <= '9') {
535 return (ord(hchar)-ord('0'));
536 } else {
537 return (10+ord(hchar)-ord('A'));
538 }
539 }
540 '''
541 for func in c_function.search_string(source_code):
542 print(f"{func.name} ({func.type}) args: {func.args}")
545 prints:
547 .. testoutput::
549 is_odd (int) args: [['int', 'x']]
550 dec_to_hex (int) args: [['char', 'hchar']]
551 """
552 ignoreExpr: ParserElement = deprecate_argument(
553 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN
554 )
556 if ignoreExpr != ignore_expr:
557 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment]
559 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN:
560 ignoreExpr = quoted_string()
562 if opener == closer:
563 raise ValueError("opening and closing strings cannot be the same")
565 if content is None:
566 if isinstance(opener, str_type) and isinstance(closer, str_type):
567 opener = typing.cast(str, opener)
568 closer = typing.cast(str, closer)
569 if len(opener) == 1 and len(closer) == 1:
570 if ignoreExpr is not None:
571 content = Combine(
572 OneOrMore(
573 ~ignoreExpr
574 + CharsNotIn(
575 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
576 exact=1,
577 )
578 )
579 )
580 else:
581 content = Combine(
582 Empty()
583 + CharsNotIn(
584 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
585 )
586 )
587 else:
588 if ignoreExpr is not None:
589 content = Combine(
590 OneOrMore(
591 ~ignoreExpr
592 + ~Literal(opener)
593 + ~Literal(closer)
594 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
595 )
596 )
597 else:
598 content = Combine(
599 OneOrMore(
600 ~Literal(opener)
601 + ~Literal(closer)
602 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
603 )
604 )
605 else:
606 raise ValueError(
607 "opening and closing arguments must be strings if no content expression is given"
608 )
610 # for these internally-created context expressions, simulate whitespace-skipping
611 if ParserElement.DEFAULT_WHITE_CHARS:
612 content.set_parse_action(
613 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS)
614 )
616 ret = Forward()
617 if ignoreExpr is not None:
618 ret <<= Group(
619 _suppression(opener)
620 + ZeroOrMore(ignoreExpr | ret | content)
621 + _suppression(closer)
622 )
623 else:
624 ret <<= Group(
625 _suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer)
626 )
628 ret.set_name(f"nested {opener}{closer} expression")
630 # don't override error message from content expressions
631 ret.errmsg = None
632 return ret
635def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
636 """Internal helper to construct opening and closing tag expressions,
637 given a tag name"""
638 if isinstance(tagStr, str_type):
639 resname = tagStr
640 tagStr = Keyword(tagStr, caseless=not xml)
641 else:
642 resname = tagStr.name
644 tagAttrName = Word(alphas, alphanums + "_-:")
645 if xml:
646 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
647 openTag = (
648 suppress_LT
649 + tagStr("tag")
650 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
651 + Opt("/", default=[False])("empty").set_parse_action(
652 lambda s, l, t: t[0] == "/"
653 )
654 + suppress_GT
655 )
656 else:
657 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
658 printables, exclude_chars=">"
659 )
660 openTag = (
661 suppress_LT
662 + tagStr("tag")
663 + Dict(
664 ZeroOrMore(
665 Group(
666 tagAttrName.set_parse_action(lambda t: t[0].lower())
667 + Opt(Suppress("=") + tagAttrValue)
668 )
669 )
670 )
671 + Opt("/", default=[False])("empty").set_parse_action(
672 lambda s, l, t: t[0] == "/"
673 )
674 + suppress_GT
675 )
676 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
678 openTag.set_name(f"<{resname}>")
679 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
680 openTag.add_parse_action(
681 lambda t: t.__setitem__(
682 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
683 )
684 )
685 closeTag = closeTag(
686 "end" + "".join(resname.replace(":", " ").title().split())
687 ).set_name(f"</{resname}>")
688 openTag.tag = resname
689 closeTag.tag = resname
690 openTag.tag_body = SkipTo(closeTag())
691 return openTag, closeTag
694def make_html_tags(
695 tag_str: Union[str, ParserElement],
696) -> tuple[ParserElement, ParserElement]:
697 """Helper to construct opening and closing tag expressions for HTML,
698 given a tag name. Matches tags in either upper or lower case,
699 attributes with namespaces and with quoted or unquoted values.
701 Example:
703 .. testcode::
705 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
706 # make_html_tags returns pyparsing expressions for the opening and
707 # closing tags as a 2-tuple
708 a, a_end = make_html_tags("A")
709 link_expr = a + SkipTo(a_end)("link_text") + a_end
711 for link in link_expr.search_string(text):
712 # attributes in the <A> tag (like "href" shown here) are
713 # also accessible as named results
714 print(link.link_text, '->', link.href)
716 prints:
718 .. testoutput::
720 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
721 """
722 return _makeTags(tag_str, False)
725def make_xml_tags(
726 tag_str: Union[str, ParserElement],
727) -> tuple[ParserElement, ParserElement]:
728 """Helper to construct opening and closing tag expressions for XML,
729 given a tag name. Matches tags only in the given upper/lower case.
731 Example: similar to :class:`make_html_tags`
732 """
733 return _makeTags(tag_str, True)
736any_open_tag: ParserElement
737any_close_tag: ParserElement
738any_open_tag, any_close_tag = make_html_tags(
739 Word(alphas, alphanums + "_:").set_name("any tag")
740)
742_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
743_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(
744 " ", "|"
745)
746common_html_entity = Regex(
747 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"
748).set_name("common HTML entity")
751def replace_html_entity(s, l, t):
752 """Helper parser action to replace common HTML entities with their special characters"""
753 return _htmlEntityMap.get(t.entity)
756class OpAssoc(Enum):
757 """Enumeration of operator associativity
758 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
760 LEFT = 1
761 RIGHT = 2
764InfixNotationOperatorArgType = Union[
765 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]
766]
767InfixNotationOperatorSpec = Union[
768 tuple[
769 InfixNotationOperatorArgType,
770 int,
771 OpAssoc,
772 typing.Optional[ParseAction],
773 ],
774 tuple[
775 InfixNotationOperatorArgType,
776 int,
777 OpAssoc,
778 ],
779]
782def infix_notation(
783 base_expr: ParserElement,
784 op_list: list[InfixNotationOperatorSpec],
785 lpar: Union[str, ParserElement] = Suppress("("),
786 rpar: Union[str, ParserElement] = Suppress(")"),
787) -> Forward:
788 """Helper method for constructing grammars of expressions made up of
789 operators working in a precedence hierarchy. Operators may be unary
790 or binary, left- or right-associative. Parse actions can also be
791 attached to operator expressions. The generated parser will also
792 recognize the use of parentheses to override operator precedences
793 (see example below).
795 Note: if you define a deep operator list, you may see performance
796 issues when using infix_notation. See
797 :class:`ParserElement.enable_packrat` for a mechanism to potentially
798 improve your parser performance.
800 Parameters:
802 :param base_expr: expression representing the most basic operand to
803 be used in the expression
804 :param op_list: list of tuples, one for each operator precedence level
805 in the expression grammar; each tuple is of the form ``(op_expr,
806 num_operands, right_left_assoc, (optional)parse_action)``, where:
808 - ``op_expr`` is the pyparsing expression for the operator; may also
809 be a string, which will be converted to a Literal; if ``num_operands``
810 is 3, ``op_expr`` is a tuple of two expressions, for the two
811 operators separating the 3 terms
812 - ``num_operands`` is the number of terms for this operator (must be 1,
813 2, or 3)
814 - ``right_left_assoc`` is the indicator whether the operator is right
815 or left associative, using the pyparsing-defined constants
816 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
817 - ``parse_action`` is the parse action to be associated with
818 expressions matching this operator expression (the parse action
819 tuple member may be omitted); if the parse action is passed
820 a tuple or list of functions, this is equivalent to calling
821 ``set_parse_action(*fn)``
822 (:class:`ParserElement.set_parse_action`)
824 :param lpar: expression for matching left-parentheses; if passed as a
825 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
826 an expression (such as ``Literal('(')``), then it will be kept in
827 the parsed results, and grouped with them. (default= ``Suppress('(')``)
828 :param rpar: expression for matching right-parentheses; if passed as a
829 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
830 an expression (such as ``Literal(')')``), then it will be kept in
831 the parsed results, and grouped with them. (default= ``Suppress(')')``)
833 Example:
835 .. testcode::
837 # simple example of four-function arithmetic with ints and
838 # variable names
839 integer = pyparsing_common.signed_integer
840 varname = pyparsing_common.identifier
842 arith_expr = infix_notation(integer | varname,
843 [
844 ('-', 1, OpAssoc.RIGHT),
845 (one_of('* /'), 2, OpAssoc.LEFT),
846 (one_of('+ -'), 2, OpAssoc.LEFT),
847 ])
849 arith_expr.run_tests('''
850 5+3*6
851 (5+3)*6
852 (5+x)*y
853 -2--11
854 ''', full_dump=False)
856 prints:
858 .. testoutput::
859 :options: +NORMALIZE_WHITESPACE
862 5+3*6
863 [[5, '+', [3, '*', 6]]]
865 (5+3)*6
866 [[[5, '+', 3], '*', 6]]
868 (5+x)*y
869 [[[5, '+', 'x'], '*', 'y']]
871 -2--11
872 [[['-', 2], '-', ['-', 11]]]
873 """
875 # captive version of FollowedBy that does not do parse actions or capture results names
876 class _FB(FollowedBy):
877 def parseImpl(self, instring, loc, doActions=True):
878 self.expr.try_parse(instring, loc)
879 return loc, []
881 _FB.__name__ = "FollowedBy>"
883 ret = Forward()
884 ret.set_name(f"{base_expr.name}_expression")
885 if isinstance(lpar, str):
886 lpar = Suppress(lpar)
887 if isinstance(rpar, str):
888 rpar = Suppress(rpar)
890 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression")
892 # if lpar and rpar are not suppressed, wrap in group
893 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):
894 lastExpr = base_expr | Group(nested_expr)
895 else:
896 lastExpr = base_expr | nested_expr
898 arity: int
899 rightLeftAssoc: opAssoc
900 pa: typing.Optional[ParseAction]
901 opExpr1: ParserElement
902 opExpr2: ParserElement
903 matchExpr: ParserElement
904 match_lookahead: ParserElement
905 for operDef in op_list:
906 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
907 if isinstance(opExpr, str_type):
908 opExpr = ParserElement._literalStringClass(opExpr)
909 opExpr = typing.cast(ParserElement, opExpr)
910 if arity == 3:
911 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
912 raise ValueError(
913 "if numterms=3, opExpr must be a tuple or list of two expressions"
914 )
915 opExpr1, opExpr2 = opExpr
916 term_name = f"{opExpr1}{opExpr2} operations"
917 else:
918 term_name = f"{opExpr} operations"
920 if not 1 <= arity <= 3:
921 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
923 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
924 raise ValueError("operator must indicate right or left associativity")
926 thisExpr: ParserElement = Forward().set_name(term_name)
927 thisExpr = typing.cast(Forward, thisExpr)
928 match_lookahead = And([])
929 if rightLeftAssoc is OpAssoc.LEFT:
930 if arity == 1:
931 match_lookahead = _FB(lastExpr + opExpr)
932 matchExpr = Group(lastExpr + opExpr[1, ...])
933 elif arity == 2:
934 if opExpr is not None:
935 match_lookahead = _FB(lastExpr + opExpr + lastExpr)
936 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])
937 else:
938 match_lookahead = _FB(lastExpr + lastExpr)
939 matchExpr = Group(lastExpr[2, ...])
940 elif arity == 3:
941 match_lookahead = _FB(
942 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
943 )
944 matchExpr = Group(
945 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]
946 )
947 elif rightLeftAssoc is OpAssoc.RIGHT:
948 if arity == 1:
949 # try to avoid LR with this extra test
950 if not isinstance(opExpr, Opt):
951 opExpr = Opt(opExpr)
952 match_lookahead = _FB(opExpr.expr + thisExpr)
953 matchExpr = Group(opExpr + thisExpr)
954 elif arity == 2:
955 if opExpr is not None:
956 match_lookahead = _FB(lastExpr + opExpr + thisExpr)
957 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])
958 else:
959 match_lookahead = _FB(lastExpr + thisExpr)
960 matchExpr = Group(lastExpr + thisExpr[1, ...])
961 elif arity == 3:
962 match_lookahead = _FB(
963 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
964 )
965 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
967 # suppress lookahead expr from railroad diagrams
968 match_lookahead.show_in_diagram = False
970 # TODO - determine why this statement can't be included in the following
971 # if pa block
972 matchExpr = match_lookahead + matchExpr
974 if pa:
975 if isinstance(pa, (tuple, list)):
976 matchExpr.set_parse_action(*pa)
977 else:
978 matchExpr.set_parse_action(pa)
980 thisExpr <<= (matchExpr | lastExpr).set_name(term_name)
981 lastExpr = thisExpr
983 ret <<= lastExpr
984 return ret
987def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
988 """
989 .. deprecated:: 3.0.0
990 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock`
991 has a difference method signature.
993 Helper method for defining space-delimited indentation blocks,
994 such as those used to define block statements in Python source code.
996 :param blockStatementExpr: expression defining syntax of statement that
997 is repeated within the indented block
999 :param indentStack: list created by caller to manage indentation stack
1000 (multiple ``statementWithIndentedBlock`` expressions within a single
1001 grammar should share a common ``indentStack``)
1003 :param indent: boolean indicating whether block must be indented beyond
1004 the current level; set to ``False`` for block of left-most statements
1006 A valid block must contain at least one ``blockStatement``.
1008 (Note that indentedBlock uses internal parse actions which make it
1009 incompatible with packrat parsing.)
1011 Example:
1013 .. testcode::
1015 data = '''
1016 def A(z):
1017 A1
1018 B = 100
1019 G = A2
1020 A2
1021 A3
1022 B
1023 def BB(a,b,c):
1024 BB1
1025 def BBA():
1026 bba1
1027 bba2
1028 bba3
1029 C
1030 D
1031 def spam(x,y):
1032 def eggs(z):
1033 pass
1034 '''
1036 indentStack = [1]
1037 stmt = Forward()
1039 identifier = Word(alphas, alphanums)
1040 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
1041 func_body = indentedBlock(stmt, indentStack)
1042 funcDef = Group(funcDecl + func_body)
1044 rvalue = Forward()
1045 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
1046 rvalue << (funcCall | identifier | Word(nums))
1047 assignment = Group(identifier + "=" + rvalue)
1048 stmt << (funcDef | assignment | identifier)
1050 module_body = stmt[1, ...]
1052 parseTree = module_body.parseString(data)
1053 parseTree.pprint()
1055 prints:
1057 .. testoutput::
1059 [['def',
1060 'A',
1061 ['(', 'z', ')'],
1062 ':',
1063 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
1064 'B',
1065 ['def',
1066 'BB',
1067 ['(', 'a', 'b', 'c', ')'],
1068 ':',
1069 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
1070 'C',
1071 'D',
1072 ['def',
1073 'spam',
1074 ['(', 'x', 'y', ')'],
1075 ':',
1076 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
1077 """
1078 warnings.warn(
1079 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}",
1080 DeprecationWarning,
1081 stacklevel=2,
1082 )
1084 backup_stacks.append(indentStack[:])
1086 def reset_stack():
1087 indentStack[:] = backup_stacks[-1]
1089 def checkPeerIndent(s, l, t):
1090 if l >= len(s):
1091 return
1092 curCol = col(l, s)
1093 if curCol != indentStack[-1]:
1094 if curCol > indentStack[-1]:
1095 raise ParseException(s, l, "illegal nesting")
1096 raise ParseException(s, l, "not a peer entry")
1098 def checkSubIndent(s, l, t):
1099 curCol = col(l, s)
1100 if curCol > indentStack[-1]:
1101 indentStack.append(curCol)
1102 else:
1103 raise ParseException(s, l, "not a subentry")
1105 def checkUnindent(s, l, t):
1106 if l >= len(s):
1107 return
1108 curCol = col(l, s)
1109 if not (indentStack and curCol in indentStack):
1110 raise ParseException(s, l, "not an unindent")
1111 if curCol < indentStack[-1]:
1112 indentStack.pop()
1114 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
1115 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
1116 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
1117 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
1118 if indent:
1119 smExpr = Group(
1120 Opt(NL)
1121 + INDENT
1122 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1123 + UNDENT
1124 )
1125 else:
1126 smExpr = Group(
1127 Opt(NL)
1128 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1129 + Opt(UNDENT)
1130 )
1132 # add a parse action to remove backup_stack from list of backups
1133 smExpr.add_parse_action(
1134 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
1135 )
1136 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
1137 blockStatementExpr.ignore(_bslash + LineEnd())
1138 return smExpr.set_name("indented block")
1141# it's easy to get these comment structures wrong - they're very common,
1142# so may as well make them available
1143c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")
1144"Comment of the form ``/* ... */``"
1146html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1147"Comment of the form ``<!-- ... -->``"
1149rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1150dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1151"Comment of the form ``// ... (to end of line)``"
1153cpp_style_comment = Regex(
1154 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"
1155).set_name("C++ style comment")
1156"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1158java_style_comment = cpp_style_comment
1159"Same as :class:`cpp_style_comment`"
1161python_style_comment = Regex(r"#.*").set_name("Python style comment")
1162"Comment of the form ``# ... (to end of line)``"
1165# build list of built-in expressions, for future reference if a global default value
1166# gets updated
1167_builtin_exprs: list[ParserElement] = [
1168 v for v in vars().values() if isinstance(v, ParserElement)
1169]
1172# compatibility function, superseded by DelimitedList class
1173def delimited_list(
1174 expr: Union[str, ParserElement],
1175 delim: Union[str, ParserElement] = ",",
1176 combine: bool = False,
1177 min: typing.Optional[int] = None,
1178 max: typing.Optional[int] = None,
1179 *,
1180 allow_trailing_delim: bool = False,
1181) -> ParserElement:
1182 """
1183 .. deprecated:: 3.1.0
1184 Use the :class:`DelimitedList` class instead.
1185 """
1186 return DelimitedList(
1187 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1188 )
1191# Compatibility synonyms
1192# fmt: off
1193opAssoc = OpAssoc
1194anyOpenTag = any_open_tag
1195anyCloseTag = any_close_tag
1196commonHTMLEntity = common_html_entity
1197cStyleComment = c_style_comment
1198htmlComment = html_comment
1199restOfLine = rest_of_line
1200dblSlashComment = dbl_slash_comment
1201cppStyleComment = cpp_style_comment
1202javaStyleComment = java_style_comment
1203pythonStyleComment = python_style_comment
1204delimitedList = replaced_by_pep8("delimitedList", DelimitedList)
1205delimited_list = replaced_by_pep8("delimited_list", DelimitedList)
1206countedArray = replaced_by_pep8("countedArray", counted_array)
1207matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)
1208matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)
1209oneOf = replaced_by_pep8("oneOf", one_of)
1210dictOf = replaced_by_pep8("dictOf", dict_of)
1211originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)
1212nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)
1213makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)
1214makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)
1215replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)
1216infixNotation = replaced_by_pep8("infixNotation", infix_notation)
1217# fmt: on