Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# helpers.py
2import html.entities
3import operator
4import re
5import sys
6import typing
8from . import __diag__
9from .core import *
10from .util import (
11 _bslash,
12 _flatten,
13 _escape_regex_range_chars,
14 make_compressed_re,
15 replaced_by_pep8,
16)
19def _suppression(expr: Union[ParserElement, str]) -> ParserElement:
20 # internal helper to avoid wrapping Suppress inside another Suppress
21 if isinstance(expr, Suppress):
22 return expr
23 return Suppress(expr)
26#
27# global helpers
28#
29def counted_array(
30 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs
31) -> ParserElement:
32 """Helper to define a counted list of expressions.
34 This helper defines a pattern of the form::
36 integer expr expr expr...
38 where the leading integer tells how many expr expressions follow.
39 The matched tokens returns the array of expr tokens as a list - the
40 leading count token is suppressed.
42 If ``int_expr`` is specified, it should be a pyparsing expression
43 that produces an integer value.
45 Examples:
47 .. doctest::
49 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef')
50 ParseResults(['ab', 'cd'], {})
52 - In this parser, the leading integer value is given in binary,
53 '10' indicating that 2 values are in the array:
55 .. doctest::
57 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
58 >>> counted_array(Word(alphas), int_expr=binary_constant
59 ... ).parse_string('10 ab cd ef')
60 ParseResults(['ab', 'cd'], {})
62 - If other fields must be parsed after the count but before the
63 list items, give the fields results names and they will
64 be preserved in the returned ParseResults:
66 .. doctest::
68 >>> ppc = pyparsing.common
69 >>> count_with_metadata = ppc.integer + Word(alphas)("type")
70 >>> typed_array = counted_array(Word(alphanums),
71 ... int_expr=count_with_metadata)("items")
72 >>> result = typed_array.parse_string("3 bool True True False")
73 >>> print(result.dump())
74 ['True', 'True', 'False']
75 - items: ['True', 'True', 'False']
76 - type: 'bool'
77 """
78 intExpr: typing.Optional[ParserElement] = deprecate_argument(
79 kwargs, "intExpr", None
80 )
82 intExpr = intExpr or int_expr
83 array_expr = Forward()
85 def count_field_parse_action(s, l, t):
86 nonlocal array_expr
87 n = t[0]
88 array_expr <<= (expr * n) if n else Empty()
89 # clear list contents, but keep any named results
90 del t[:]
92 if intExpr is None:
93 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
94 else:
95 intExpr = intExpr.copy()
96 intExpr.set_name("arrayLen")
97 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
98 return (intExpr + array_expr).set_name(f"(len) {expr}...")
101def match_previous_literal(expr: ParserElement) -> ParserElement:
102 """Helper to define an expression that is indirectly defined from
103 the tokens matched in a previous expression, that is, it looks for
104 a 'repeat' of a previous expression. For example::
106 .. testcode::
108 first = Word(nums)
109 second = match_previous_literal(first)
110 match_expr = first + ":" + second
112 will match ``"1:1"``, but not ``"1:2"``. Because this
113 matches a previous literal, will also match the leading
114 ``"1:1"`` in ``"1:10"``. If this is not desired, use
115 :class:`match_previous_expr`. Do *not* use with packrat parsing
116 enabled.
117 """
118 rep = Forward()
120 def copy_token_to_repeater(s, l, t):
121 if not t:
122 rep << Empty()
123 return
125 if len(t) == 1:
126 rep << t[0]
127 return
129 # flatten t tokens
130 tflat = _flatten(t.as_list())
131 rep << And(Literal(tt) for tt in tflat)
133 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)
134 rep.set_name("(prev) " + str(expr))
135 return rep
138def match_previous_expr(expr: ParserElement) -> ParserElement:
139 """Helper to define an expression that is indirectly defined from
140 the tokens matched in a previous expression, that is, it looks for
141 a 'repeat' of a previous expression. For example:
143 .. testcode::
145 first = Word(nums)
146 second = match_previous_expr(first)
147 match_expr = first + ":" + second
149 will match ``"1:1"``, but not ``"1:2"``. Because this
150 matches by expressions, will *not* match the leading ``"1:1"``
151 in ``"1:10"``; the expressions are evaluated first, and then
152 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
153 with packrat parsing enabled.
154 """
155 rep = Forward()
156 e2 = expr.copy()
157 rep <<= e2
159 def copy_token_to_repeater(s, l, t):
160 matchTokens = _flatten(t.as_list())
162 def must_match_these_tokens(s, l, t):
163 theseTokens = _flatten(t.as_list())
164 if theseTokens != matchTokens:
165 raise ParseException(
166 s, l, f"Expected {matchTokens}, found{theseTokens}"
167 )
169 rep.set_parse_action(must_match_these_tokens, call_during_try=True)
171 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)
172 rep.set_name("(prev) " + str(expr))
173 return rep
176def one_of(
177 strs: Union[typing.Iterable[str], str],
178 caseless: bool = False,
179 use_regex: bool = True,
180 as_keyword: bool = False,
181 **kwargs,
182) -> ParserElement:
183 """Helper to quickly define a set of alternative :class:`Literal` s,
184 and makes sure to do longest-first testing when there is a conflict,
185 regardless of the input order, but returns
186 a :class:`MatchFirst` for best performance.
188 :param strs: a string of space-delimited literals, or a collection of
189 string literals
190 :param caseless: treat all literals as caseless
191 :param use_regex: bool - as an optimization, will
192 generate a :class:`Regex` object; otherwise, will generate
193 a :class:`MatchFirst` object (if ``caseless=True`` or
194 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception)
195 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the
196 generated expressions
198 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8
199 compatibility, but will be removed in a future release.
201 Example:
203 .. testcode::
205 comp_oper = one_of("< = > <= >= !=")
206 var = Word(alphas)
207 number = Word(nums)
208 term = var | number
209 comparison_expr = term + comp_oper + term
210 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
212 prints:
214 .. testoutput::
216 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
217 """
218 useRegex: bool = deprecate_argument(kwargs, "useRegex", True)
219 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
221 asKeyword = asKeyword or as_keyword
222 useRegex = useRegex and use_regex
224 if (
225 isinstance(caseless, str_type)
226 and __diag__.warn_on_multiple_string_args_to_oneof
227 ):
228 warnings.warn(
229 "warn_on_multiple_string_args_to_oneof:"
230 " More than one string argument passed to one_of, pass"
231 " choices as a list or space-delimited string",
232 PyparsingDiagnosticWarning,
233 stacklevel=2,
234 )
236 if caseless:
237 is_equal = lambda a, b: a.upper() == b.upper()
238 masks = lambda a, b: b.upper().startswith(a.upper())
239 else:
240 is_equal = operator.eq
241 masks = lambda a, b: b.startswith(a)
243 symbols: list[str]
244 if isinstance(strs, str_type):
245 strs = typing.cast(str, strs)
246 symbols = strs.split()
247 elif isinstance(strs, Iterable):
248 symbols = list(strs)
249 else:
250 raise TypeError("Invalid argument to one_of, expected string or iterable")
251 if not symbols:
252 return NoMatch()
254 # reorder given symbols to take care to avoid masking longer choices with shorter ones
255 # (but only if the given symbols are not just single characters)
256 i = 0
257 while i < len(symbols) - 1:
258 cur = symbols[i]
259 for j, other in enumerate(symbols[i + 1 :]):
260 if is_equal(other, cur):
261 del symbols[i + j + 1]
262 break
263 if len(other) > len(cur) and masks(cur, other):
264 del symbols[i + j + 1]
265 symbols.insert(i, other)
266 break
267 else:
268 i += 1
270 if useRegex:
271 re_flags: int = re.IGNORECASE if caseless else 0
273 try:
274 if all(len(sym) == 1 for sym in symbols):
275 # symbols are just single characters, create range regex pattern
276 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
277 else:
278 patt = "|".join(re.escape(sym) for sym in symbols)
280 # wrap with \b word break markers if defining as keywords
281 if asKeyword:
282 patt = rf"\b(?:{patt})\b"
284 ret = Regex(patt, flags=re_flags)
285 ret.set_name(" | ".join(repr(s) for s in symbols))
287 if caseless:
288 # add parse action to return symbols as specified, not in random
289 # casing as found in input string
290 symbol_map = {sym.lower(): sym for sym in symbols}
291 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
293 return ret
295 except re.error:
296 warnings.warn(
297 "Exception creating Regex for one_of, building MatchFirst",
298 PyparsingDiagnosticWarning,
299 stacklevel=2,
300 )
302 # last resort, just use MatchFirst of Token class corresponding to caseless
303 # and asKeyword settings
304 CASELESS = KEYWORD = True
305 parse_element_class = {
306 (CASELESS, KEYWORD): CaselessKeyword,
307 (CASELESS, not KEYWORD): CaselessLiteral,
308 (not CASELESS, KEYWORD): Keyword,
309 (not CASELESS, not KEYWORD): Literal,
310 }[(caseless, asKeyword)]
311 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(
312 " | ".join(symbols)
313 )
316def dict_of(key: ParserElement, value: ParserElement) -> Dict:
317 """Helper to easily and clearly define a dictionary by specifying
318 the respective patterns for the key and value. Takes care of
319 defining the :class:`Dict`, :class:`ZeroOrMore`, and
320 :class:`Group` tokens in the proper order. The key pattern
321 can include delimiting markers or punctuation, as long as they are
322 suppressed, thereby leaving the significant key text. The value
323 pattern can include named results, so that the :class:`Dict` results
324 can include named token fields.
326 Example:
328 .. doctest::
330 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
332 >>> data_word = Word(alphas)
333 >>> label = data_word + FollowedBy(':')
334 >>> attr_expr = (
335 ... label
336 ... + Suppress(':')
337 ... + OneOrMore(data_word, stop_on=label)
338 ... .set_parse_action(' '.join))
339 >>> print(attr_expr[1, ...].parse_string(text).dump())
340 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
342 >>> attr_label = label
343 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label
344 ... ).set_parse_action(' '.join)
346 # similar to Dict, but simpler call format
347 >>> result = dict_of(attr_label, attr_value).parse_string(text)
348 >>> print(result.dump())
349 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
350 - color: 'light blue'
351 - posn: 'upper left'
352 - shape: 'SQUARE'
353 - texture: 'burlap'
354 [0]:
355 ['shape', 'SQUARE']
356 [1]:
357 ['posn', 'upper left']
358 [2]:
359 ['color', 'light blue']
360 [3]:
361 ['texture', 'burlap']
363 >>> print(result['shape'])
364 SQUARE
365 >>> print(result.shape) # object attribute access works too
366 SQUARE
367 >>> print(result.as_dict())
368 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}
369 """
370 return Dict(OneOrMore(Group(key + value)))
373def original_text_for(
374 expr: ParserElement, as_string: bool = True, **kwargs
375) -> ParserElement:
376 """Helper to return the original, untokenized text for a given
377 expression. Useful to restore the parsed fields of an HTML start
378 tag into the raw tag text itself, or to revert separate tokens with
379 intervening whitespace back to the original matching input text. By
380 default, returns a string containing the original parsed text.
382 If the optional ``as_string`` argument is passed as
383 ``False``, then the return value is
384 a :class:`ParseResults` containing any results names that
385 were originally matched, and a single token containing the original
386 matched text from the input string. So if the expression passed to
387 :class:`original_text_for` contains expressions with defined
388 results names, you must set ``as_string`` to ``False`` if you
389 want to preserve those results name values.
391 The ``asString`` pre-PEP8 argument is retained for compatibility,
392 but will be removed in a future release.
394 Example:
396 .. testcode::
398 src = "this is test <b> bold <i>text</i> </b> normal text "
399 for tag in ("b", "i"):
400 opener, closer = make_html_tags(tag)
401 patt = original_text_for(opener + ... + closer)
402 print(patt.search_string(src)[0])
404 prints:
406 .. testoutput::
408 ['<b> bold <i>text</i> </b>']
409 ['<i>text</i>']
410 """
411 asString: bool = deprecate_argument(kwargs, "asString", True)
413 asString = asString and as_string
415 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
416 endlocMarker = locMarker.copy()
417 endlocMarker.callPreparse = False
418 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
419 if asString:
420 extractText = lambda s, l, t: s[t._original_start : t._original_end]
421 else:
423 def extractText(s, l, t):
424 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
426 matchExpr.set_parse_action(extractText)
427 matchExpr.ignoreExprs = expr.ignoreExprs
428 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
429 return matchExpr
432def ungroup(expr: ParserElement) -> ParserElement:
433 """Helper to undo pyparsing's default grouping of And expressions,
434 even if all but one are non-empty.
435 """
436 return TokenConverter(expr).add_parse_action(lambda t: t[0])
439def locatedExpr(expr: ParserElement) -> ParserElement:
440 """
441 .. deprecated:: 3.0.0
442 Use the :class:`Located` class instead. Note that `Located`
443 returns results with one less grouping level.
445 Helper to decorate a returned token with its starting and ending
446 locations in the input string.
448 This helper adds the following results names:
450 - ``locn_start`` - location where matched expression begins
451 - ``locn_end`` - location where matched expression ends
452 - ``value`` - the actual parsed results
454 Be careful if the input text contains ``<TAB>`` characters, you
455 may want to call :meth:`ParserElement.parse_with_tabs`
456 """
457 warnings.warn(
458 f"{'locatedExpr'!r} deprecated - use {'Located'!r}",
459 PyparsingDeprecationWarning,
460 stacklevel=2,
461 )
463 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
464 return Group(
465 locator("locn_start")
466 + expr("value")
467 + locator.copy().leave_whitespace()("locn_end")
468 )
471# define special default value to permit None as a significant value for
472# ignore_expr
473_NO_IGNORE_EXPR_GIVEN = NoMatch()
476def nested_expr(
477 opener: Union[str, ParserElement] = "(",
478 closer: Union[str, ParserElement] = ")",
479 content: typing.Optional[ParserElement] = None,
480 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN,
481 **kwargs,
482) -> ParserElement:
483 """Helper method for defining nested lists enclosed in opening and
484 closing delimiters (``"("`` and ``")"`` are the default).
486 :param opener: str - opening character for a nested list
487 (default= ``"("``); can also be a pyparsing expression
489 :param closer: str - closing character for a nested list
490 (default= ``")"``); can also be a pyparsing expression
492 :param content: expression for items within the nested lists
494 :param ignore_expr: expression for ignoring opening and closing delimiters
495 (default = :class:`quoted_string`)
497 Parameter ``ignoreExpr`` is retained for compatibility
498 but will be removed in a future release.
500 If an expression is not provided for the content argument, the
501 nested expression will capture all whitespace-delimited content
502 between delimiters as a list of separate values.
504 Use the ``ignore_expr`` argument to define expressions that may
505 contain opening or closing characters that should not be treated as
506 opening or closing characters for nesting, such as quoted_string or
507 a comment expression. Specify multiple expressions using an
508 :class:`Or` or :class:`MatchFirst`. The default is
509 :class:`quoted_string`, but if no expressions are to be ignored, then
510 pass ``None`` for this argument.
512 Example:
514 .. testcode::
516 data_type = one_of("void int short long char float double")
517 decl_data_type = Combine(data_type + Opt(Word('*')))
518 ident = Word(alphas+'_', alphanums+'_')
519 number = pyparsing_common.number
520 arg = Group(decl_data_type + ident)
521 LPAR, RPAR = map(Suppress, "()")
523 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
525 c_function = (decl_data_type("type")
526 + ident("name")
527 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
528 + code_body("body"))
529 c_function.ignore(c_style_comment)
531 source_code = '''
532 int is_odd(int x) {
533 return (x%2);
534 }
536 int dec_to_hex(char hchar) {
537 if (hchar >= '0' && hchar <= '9') {
538 return (ord(hchar)-ord('0'));
539 } else {
540 return (10+ord(hchar)-ord('A'));
541 }
542 }
543 '''
544 for func in c_function.search_string(source_code):
545 print(f"{func.name} ({func.type}) args: {func.args}")
548 prints:
550 .. testoutput::
552 is_odd (int) args: [['int', 'x']]
553 dec_to_hex (int) args: [['char', 'hchar']]
554 """
555 ignoreExpr: ParserElement = deprecate_argument(
556 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN
557 )
559 if ignoreExpr != ignore_expr:
560 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment]
562 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN:
563 ignoreExpr = quoted_string()
565 if opener == closer:
566 raise ValueError("opening and closing strings cannot be the same")
568 if content is None:
569 if isinstance(opener, str_type) and isinstance(closer, str_type):
570 opener = typing.cast(str, opener)
571 closer = typing.cast(str, closer)
572 if len(opener) == 1 and len(closer) == 1:
573 if ignoreExpr is not None:
574 content = Combine(
575 OneOrMore(
576 ~ignoreExpr
577 + CharsNotIn(
578 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
579 exact=1,
580 )
581 )
582 )
583 else:
584 content = Combine(
585 Empty()
586 + CharsNotIn(
587 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
588 )
589 )
590 else:
591 if ignoreExpr is not None:
592 content = Combine(
593 OneOrMore(
594 ~ignoreExpr
595 + ~Literal(opener)
596 + ~Literal(closer)
597 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
598 )
599 )
600 else:
601 content = Combine(
602 OneOrMore(
603 ~Literal(opener)
604 + ~Literal(closer)
605 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
606 )
607 )
608 else:
609 raise ValueError(
610 "opening and closing arguments must be strings if no content expression is given"
611 )
613 # for these internally-created context expressions, simulate whitespace-skipping
614 if ParserElement.DEFAULT_WHITE_CHARS:
615 content.set_parse_action(
616 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS)
617 )
619 ret = Forward()
620 if ignoreExpr is not None:
621 ret <<= Group(
622 _suppression(opener)
623 + ZeroOrMore(ignoreExpr | ret | content)
624 + _suppression(closer)
625 )
626 else:
627 ret <<= Group(
628 _suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer)
629 )
631 ret.set_name(f"nested {opener}{closer} expression")
633 # don't override error message from content expressions
634 ret.errmsg = None
635 return ret
638def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
639 """Internal helper to construct opening and closing tag expressions,
640 given a tag name"""
641 if isinstance(tagStr, str_type):
642 resname = tagStr
643 tagStr = Keyword(tagStr, caseless=not xml)
644 else:
645 resname = tagStr.name
647 tagAttrName = Word(alphas, alphanums + "_-:")
648 if xml:
649 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
650 openTag = (
651 suppress_LT
652 + tagStr("tag")
653 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
654 + Opt("/", default=[False])("empty").set_parse_action(
655 lambda s, l, t: t[0] == "/"
656 )
657 + suppress_GT
658 )
659 else:
660 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
661 printables, exclude_chars=">"
662 )
663 openTag = (
664 suppress_LT
665 + tagStr("tag")
666 + Dict(
667 ZeroOrMore(
668 Group(
669 tagAttrName.set_parse_action(lambda t: t[0].lower())
670 + Opt(Suppress("=") + tagAttrValue)
671 )
672 )
673 )
674 + Opt("/", default=[False])("empty").set_parse_action(
675 lambda s, l, t: t[0] == "/"
676 )
677 + suppress_GT
678 )
679 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
681 openTag.set_name(f"<{resname}>")
682 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
683 openTag.add_parse_action(
684 lambda t: t.__setitem__(
685 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
686 )
687 )
688 closeTag = closeTag(
689 "end" + "".join(resname.replace(":", " ").title().split())
690 ).set_name(f"</{resname}>")
691 openTag.tag = resname
692 closeTag.tag = resname
693 openTag.tag_body = SkipTo(closeTag())
694 return openTag, closeTag
697def make_html_tags(
698 tag_str: Union[str, ParserElement],
699) -> tuple[ParserElement, ParserElement]:
700 """Helper to construct opening and closing tag expressions for HTML,
701 given a tag name. Matches tags in either upper or lower case,
702 attributes with namespaces and with quoted or unquoted values.
704 Example:
706 .. testcode::
708 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
709 # make_html_tags returns pyparsing expressions for the opening and
710 # closing tags as a 2-tuple
711 a, a_end = make_html_tags("A")
712 link_expr = a + SkipTo(a_end)("link_text") + a_end
714 for link in link_expr.search_string(text):
715 # attributes in the <A> tag (like "href" shown here) are
716 # also accessible as named results
717 print(link.link_text, '->', link.href)
719 prints:
721 .. testoutput::
723 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
724 """
725 return _makeTags(tag_str, False)
728def make_xml_tags(
729 tag_str: Union[str, ParserElement],
730) -> tuple[ParserElement, ParserElement]:
731 """Helper to construct opening and closing tag expressions for XML,
732 given a tag name. Matches tags only in the given upper/lower case.
734 Example: similar to :class:`make_html_tags`
735 """
736 return _makeTags(tag_str, True)
739any_open_tag: ParserElement
740any_close_tag: ParserElement
741any_open_tag, any_close_tag = make_html_tags(
742 Word(alphas, alphanums + "_:").set_name("any tag")
743)
745_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
746_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(
747 " ", "|"
748)
749common_html_entity = Regex(
750 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"
751).set_name("common HTML entity")
754def replace_html_entity(s, l, t):
755 """Helper parser action to replace common HTML entities with their special characters"""
756 return _htmlEntityMap.get(t.entity)
759class OpAssoc(Enum):
760 """Enumeration of operator associativity
761 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
763 LEFT = 1
764 RIGHT = 2
767InfixNotationOperatorArgType = Union[
768 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]
769]
770InfixNotationOperatorSpec = Union[
771 tuple[
772 InfixNotationOperatorArgType,
773 int,
774 OpAssoc,
775 typing.Optional[ParseAction],
776 ],
777 tuple[
778 InfixNotationOperatorArgType,
779 int,
780 OpAssoc,
781 ],
782]
785def infix_notation(
786 base_expr: ParserElement,
787 op_list: list[InfixNotationOperatorSpec],
788 lpar: Union[str, ParserElement] = Suppress("("),
789 rpar: Union[str, ParserElement] = Suppress(")"),
790) -> Forward:
791 """Helper method for constructing grammars of expressions made up of
792 operators working in a precedence hierarchy. Operators may be unary
793 or binary, left- or right-associative. Parse actions can also be
794 attached to operator expressions. The generated parser will also
795 recognize the use of parentheses to override operator precedences
796 (see example below).
798 Note: if you define a deep operator list, you may see performance
799 issues when using infix_notation. See
800 :class:`ParserElement.enable_packrat` for a mechanism to potentially
801 improve your parser performance.
803 Parameters:
805 :param base_expr: expression representing the most basic operand to
806 be used in the expression
807 :param op_list: list of tuples, one for each operator precedence level
808 in the expression grammar; each tuple is of the form ``(op_expr,
809 num_operands, right_left_assoc, (optional)parse_action)``, where:
811 - ``op_expr`` is the pyparsing expression for the operator; may also
812 be a string, which will be converted to a Literal; if ``num_operands``
813 is 3, ``op_expr`` is a tuple of two expressions, for the two
814 operators separating the 3 terms
815 - ``num_operands`` is the number of terms for this operator (must be 1,
816 2, or 3)
817 - ``right_left_assoc`` is the indicator whether the operator is right
818 or left associative, using the pyparsing-defined constants
819 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
820 - ``parse_action`` is the parse action to be associated with
821 expressions matching this operator expression (the parse action
822 tuple member may be omitted); if the parse action is passed
823 a tuple or list of functions, this is equivalent to calling
824 ``set_parse_action(*fn)``
825 (:class:`ParserElement.set_parse_action`)
827 :param lpar: expression for matching left-parentheses; if passed as a
828 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
829 an expression (such as ``Literal('(')``), then it will be kept in
830 the parsed results, and grouped with them. (default= ``Suppress('(')``)
831 :param rpar: expression for matching right-parentheses; if passed as a
832 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
833 an expression (such as ``Literal(')')``), then it will be kept in
834 the parsed results, and grouped with them. (default= ``Suppress(')')``)
836 Example:
838 .. testcode::
840 # simple example of four-function arithmetic with ints and
841 # variable names
842 integer = pyparsing_common.signed_integer
843 varname = pyparsing_common.identifier
845 arith_expr = infix_notation(integer | varname,
846 [
847 ('-', 1, OpAssoc.RIGHT),
848 (one_of('* /'), 2, OpAssoc.LEFT),
849 (one_of('+ -'), 2, OpAssoc.LEFT),
850 ])
852 arith_expr.run_tests('''
853 5+3*6
854 (5+3)*6
855 (5+x)*y
856 -2--11
857 ''', full_dump=False)
859 prints:
861 .. testoutput::
862 :options: +NORMALIZE_WHITESPACE
865 5+3*6
866 [[5, '+', [3, '*', 6]]]
868 (5+3)*6
869 [[[5, '+', 3], '*', 6]]
871 (5+x)*y
872 [[[5, '+', 'x'], '*', 'y']]
874 -2--11
875 [[['-', 2], '-', ['-', 11]]]
876 """
878 # captive version of FollowedBy that does not do parse actions or capture results names
879 class _FB(FollowedBy):
880 def parseImpl(self, instring, loc, doActions=True):
881 self.expr.try_parse(instring, loc)
882 return loc, []
884 _FB.__name__ = "FollowedBy>"
886 ret = Forward()
887 ret.set_name(f"{base_expr.name}_expression")
888 if isinstance(lpar, str):
889 lpar = Suppress(lpar)
890 if isinstance(rpar, str):
891 rpar = Suppress(rpar)
893 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression")
895 # if lpar and rpar are not suppressed, wrap in group
896 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):
897 lastExpr = base_expr | Group(nested_expr)
898 else:
899 lastExpr = base_expr | nested_expr
901 arity: int
902 rightLeftAssoc: opAssoc
903 pa: typing.Optional[ParseAction]
904 opExpr1: ParserElement
905 opExpr2: ParserElement
906 matchExpr: ParserElement
907 match_lookahead: ParserElement
908 for operDef in op_list:
909 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
910 if isinstance(opExpr, str_type):
911 opExpr = ParserElement._literalStringClass(opExpr)
912 opExpr = typing.cast(ParserElement, opExpr)
913 if arity == 3:
914 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
915 raise ValueError(
916 "if numterms=3, opExpr must be a tuple or list of two expressions"
917 )
918 opExpr1, opExpr2 = opExpr
919 term_name = f"{opExpr1}{opExpr2} operations"
920 else:
921 term_name = f"{opExpr} operations"
923 if not 1 <= arity <= 3:
924 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
926 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
927 raise ValueError("operator must indicate right or left associativity")
929 thisExpr: ParserElement = Forward().set_name(term_name)
930 thisExpr = typing.cast(Forward, thisExpr)
931 match_lookahead = And([])
932 if rightLeftAssoc is OpAssoc.LEFT:
933 if arity == 1:
934 match_lookahead = _FB(lastExpr + opExpr)
935 matchExpr = Group(lastExpr + opExpr[1, ...])
936 elif arity == 2:
937 if opExpr is not None:
938 match_lookahead = _FB(lastExpr + opExpr + lastExpr)
939 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])
940 else:
941 match_lookahead = _FB(lastExpr + lastExpr)
942 matchExpr = Group(lastExpr[2, ...])
943 elif arity == 3:
944 match_lookahead = _FB(
945 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
946 )
947 matchExpr = Group(
948 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]
949 )
950 elif rightLeftAssoc is OpAssoc.RIGHT:
951 if arity == 1:
952 # try to avoid LR with this extra test
953 if not isinstance(opExpr, Opt):
954 opExpr = Opt(opExpr)
955 match_lookahead = _FB(opExpr.expr + thisExpr)
956 matchExpr = Group(opExpr + thisExpr)
957 elif arity == 2:
958 if opExpr is not None:
959 match_lookahead = _FB(lastExpr + opExpr + thisExpr)
960 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])
961 else:
962 match_lookahead = _FB(lastExpr + thisExpr)
963 matchExpr = Group(lastExpr + thisExpr[1, ...])
964 elif arity == 3:
965 match_lookahead = _FB(
966 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
967 )
968 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
970 # suppress lookahead expr from railroad diagrams
971 match_lookahead.show_in_diagram = False
973 # TODO - determine why this statement can't be included in the following
974 # if pa block
975 matchExpr = match_lookahead + matchExpr
977 if pa:
978 if isinstance(pa, (tuple, list)):
979 matchExpr.set_parse_action(*pa)
980 else:
981 matchExpr.set_parse_action(pa)
983 thisExpr <<= (matchExpr | lastExpr).set_name(term_name)
984 lastExpr = thisExpr
986 ret <<= lastExpr
987 return ret
990def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
991 """
992 .. deprecated:: 3.0.0
993 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock`
994 has a difference method signature.
996 Helper method for defining space-delimited indentation blocks,
997 such as those used to define block statements in Python source code.
999 :param blockStatementExpr: expression defining syntax of statement that
1000 is repeated within the indented block
1002 :param indentStack: list created by caller to manage indentation stack
1003 (multiple ``statementWithIndentedBlock`` expressions within a single
1004 grammar should share a common ``indentStack``)
1006 :param indent: boolean indicating whether block must be indented beyond
1007 the current level; set to ``False`` for block of left-most statements
1009 A valid block must contain at least one ``blockStatement``.
1011 (Note that indentedBlock uses internal parse actions which make it
1012 incompatible with packrat parsing.)
1014 Example:
1016 .. testcode::
1018 data = '''
1019 def A(z):
1020 A1
1021 B = 100
1022 G = A2
1023 A2
1024 A3
1025 B
1026 def BB(a,b,c):
1027 BB1
1028 def BBA():
1029 bba1
1030 bba2
1031 bba3
1032 C
1033 D
1034 def spam(x,y):
1035 def eggs(z):
1036 pass
1037 '''
1039 indentStack = [1]
1040 stmt = Forward()
1042 identifier = Word(alphas, alphanums)
1043 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
1044 func_body = indentedBlock(stmt, indentStack)
1045 funcDef = Group(funcDecl + func_body)
1047 rvalue = Forward()
1048 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
1049 rvalue << (funcCall | identifier | Word(nums))
1050 assignment = Group(identifier + "=" + rvalue)
1051 stmt << (funcDef | assignment | identifier)
1053 module_body = stmt[1, ...]
1055 parseTree = module_body.parseString(data)
1056 parseTree.pprint()
1058 prints:
1060 .. testoutput::
1062 [['def',
1063 'A',
1064 ['(', 'z', ')'],
1065 ':',
1066 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
1067 'B',
1068 ['def',
1069 'BB',
1070 ['(', 'a', 'b', 'c', ')'],
1071 ':',
1072 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
1073 'C',
1074 'D',
1075 ['def',
1076 'spam',
1077 ['(', 'x', 'y', ')'],
1078 ':',
1079 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
1080 """
1081 warnings.warn(
1082 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}",
1083 PyparsingDeprecationWarning,
1084 stacklevel=2,
1085 )
1087 backup_stacks.append(indentStack[:])
1089 def reset_stack():
1090 indentStack[:] = backup_stacks[-1]
1092 def checkPeerIndent(s, l, t):
1093 if l >= len(s):
1094 return
1095 curCol = col(l, s)
1096 if curCol != indentStack[-1]:
1097 if curCol > indentStack[-1]:
1098 raise ParseException(s, l, "illegal nesting")
1099 raise ParseException(s, l, "not a peer entry")
1101 def checkSubIndent(s, l, t):
1102 curCol = col(l, s)
1103 if curCol > indentStack[-1]:
1104 indentStack.append(curCol)
1105 else:
1106 raise ParseException(s, l, "not a subentry")
1108 def checkUnindent(s, l, t):
1109 if l >= len(s):
1110 return
1111 curCol = col(l, s)
1112 if not (indentStack and curCol in indentStack):
1113 raise ParseException(s, l, "not an unindent")
1114 if curCol < indentStack[-1]:
1115 indentStack.pop()
1117 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
1118 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
1119 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
1120 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
1121 if indent:
1122 smExpr = Group(
1123 Opt(NL)
1124 + INDENT
1125 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1126 + UNDENT
1127 )
1128 else:
1129 smExpr = Group(
1130 Opt(NL)
1131 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1132 + Opt(UNDENT)
1133 )
1135 # add a parse action to remove backup_stack from list of backups
1136 smExpr.add_parse_action(
1137 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
1138 )
1139 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
1140 blockStatementExpr.ignore(_bslash + LineEnd())
1141 return smExpr.set_name("indented block")
1144# it's easy to get these comment structures wrong - they're very common,
1145# so may as well make them available
1146c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")
1147"Comment of the form ``/* ... */``"
1149html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1150"Comment of the form ``<!-- ... -->``"
1152rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1153dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1154"Comment of the form ``// ... (to end of line)``"
1156cpp_style_comment = Regex(
1157 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"
1158).set_name("C++ style comment")
1159"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1161java_style_comment = cpp_style_comment
1162"Same as :class:`cpp_style_comment`"
1164python_style_comment = Regex(r"#.*").set_name("Python style comment")
1165"Comment of the form ``# ... (to end of line)``"
1168# build list of built-in expressions, for future reference if a global default value
1169# gets updated
1170_builtin_exprs: list[ParserElement] = [
1171 v for v in vars().values() if isinstance(v, ParserElement)
1172]
1175# compatibility function, superseded by DelimitedList class
1176def delimited_list(
1177 expr: Union[str, ParserElement],
1178 delim: Union[str, ParserElement] = ",",
1179 combine: bool = False,
1180 min: typing.Optional[int] = None,
1181 max: typing.Optional[int] = None,
1182 *,
1183 allow_trailing_delim: bool = False,
1184) -> ParserElement:
1185 """
1186 .. deprecated:: 3.1.0
1187 Use the :class:`DelimitedList` class instead.
1188 """
1189 return DelimitedList(
1190 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1191 )
1194# Compatibility synonyms
1195# fmt: off
1196opAssoc = OpAssoc
1197anyOpenTag = any_open_tag
1198anyCloseTag = any_close_tag
1199commonHTMLEntity = common_html_entity
1200cStyleComment = c_style_comment
1201htmlComment = html_comment
1202restOfLine = rest_of_line
1203dblSlashComment = dbl_slash_comment
1204cppStyleComment = cpp_style_comment
1205javaStyleComment = java_style_comment
1206pythonStyleComment = python_style_comment
1207delimitedList = replaced_by_pep8("delimitedList", DelimitedList)
1208delimited_list = replaced_by_pep8("delimited_list", DelimitedList)
1209countedArray = replaced_by_pep8("countedArray", counted_array)
1210matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)
1211matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)
1212oneOf = replaced_by_pep8("oneOf", one_of)
1213dictOf = replaced_by_pep8("dictOf", dict_of)
1214originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)
1215nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)
1216makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)
1217makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)
1218replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)
1219infixNotation = replaced_by_pep8("infixNotation", infix_notation)
1220# fmt: on