1# helpers.py
2import html.entities
3import operator
4import re
5import sys
6import typing
7
8from . import __diag__
9from .core import *
10from .util import (
11 _bslash,
12 _flatten,
13 _escape_regex_range_chars,
14 make_compressed_re,
15 replaced_by_pep8,
16)
17
18
19#
20# global helpers
21#
22def counted_array(
23 expr: ParserElement,
24 int_expr: typing.Optional[ParserElement] = None,
25 *,
26 intExpr: typing.Optional[ParserElement] = None,
27) -> ParserElement:
28 """Helper to define a counted list of expressions.
29
30 This helper defines a pattern of the form::
31
32 integer expr expr expr...
33
34 where the leading integer tells how many expr expressions follow.
35 The matched tokens returns the array of expr tokens as a list - the
36 leading count token is suppressed.
37
38 If ``int_expr`` is specified, it should be a pyparsing expression
39 that produces an integer value.
40
41 Example::
42
43 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
44
45 # in this parser, the leading integer value is given in binary,
46 # '10' indicating that 2 values are in the array
47 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
48 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
49
50 # if other fields must be parsed after the count but before the
51 # list items, give the fields results names and they will
52 # be preserved in the returned ParseResults:
53 count_with_metadata = integer + Word(alphas)("type")
54 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
55 result = typed_array.parse_string("3 bool True True False")
56 print(result.dump())
57
58 # prints
59 # ['True', 'True', 'False']
60 # - items: ['True', 'True', 'False']
61 # - type: 'bool'
62 """
63 intExpr = intExpr or int_expr
64 array_expr = Forward()
65
66 def count_field_parse_action(s, l, t):
67 nonlocal array_expr
68 n = t[0]
69 array_expr <<= (expr * n) if n else Empty()
70 # clear list contents, but keep any named results
71 del t[:]
72
73 if intExpr is None:
74 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
75 else:
76 intExpr = intExpr.copy()
77 intExpr.set_name("arrayLen")
78 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
79 return (intExpr + array_expr).set_name(f"(len) {expr}...")
80
81
82def match_previous_literal(expr: ParserElement) -> ParserElement:
83 """Helper to define an expression that is indirectly defined from
84 the tokens matched in a previous expression, that is, it looks for
85 a 'repeat' of a previous expression. For example::
86
87 first = Word(nums)
88 second = match_previous_literal(first)
89 match_expr = first + ":" + second
90
91 will match ``"1:1"``, but not ``"1:2"``. Because this
92 matches a previous literal, will also match the leading
93 ``"1:1"`` in ``"1:10"``. If this is not desired, use
94 :class:`match_previous_expr`. Do *not* use with packrat parsing
95 enabled.
96 """
97 rep = Forward()
98
99 def copy_token_to_repeater(s, l, t):
100 if not t:
101 rep << Empty()
102 return
103
104 if len(t) == 1:
105 rep << t[0]
106 return
107
108 # flatten t tokens
109 tflat = _flatten(t.as_list())
110 rep << And(Literal(tt) for tt in tflat)
111
112 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
113 rep.set_name("(prev) " + str(expr))
114 return rep
115
116
117def match_previous_expr(expr: ParserElement) -> ParserElement:
118 """Helper to define an expression that is indirectly defined from
119 the tokens matched in a previous expression, that is, it looks for
120 a 'repeat' of a previous expression. For example::
121
122 first = Word(nums)
123 second = match_previous_expr(first)
124 match_expr = first + ":" + second
125
126 will match ``"1:1"``, but not ``"1:2"``. Because this
127 matches by expressions, will *not* match the leading ``"1:1"``
128 in ``"1:10"``; the expressions are evaluated first, and then
129 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
130 with packrat parsing enabled.
131 """
132 rep = Forward()
133 e2 = expr.copy()
134 rep <<= e2
135
136 def copy_token_to_repeater(s, l, t):
137 matchTokens = _flatten(t.as_list())
138
139 def must_match_these_tokens(s, l, t):
140 theseTokens = _flatten(t.as_list())
141 if theseTokens != matchTokens:
142 raise ParseException(
143 s, l, f"Expected {matchTokens}, found{theseTokens}"
144 )
145
146 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
147
148 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
149 rep.set_name("(prev) " + str(expr))
150 return rep
151
152
153def one_of(
154 strs: Union[typing.Iterable[str], str],
155 caseless: bool = False,
156 use_regex: bool = True,
157 as_keyword: bool = False,
158 *,
159 useRegex: bool = True,
160 asKeyword: bool = False,
161) -> ParserElement:
162 """Helper to quickly define a set of alternative :class:`Literal` s,
163 and makes sure to do longest-first testing when there is a conflict,
164 regardless of the input order, but returns
165 a :class:`MatchFirst` for best performance.
166
167 Parameters:
168
169 - ``strs`` - a string of space-delimited literals, or a collection of
170 string literals
171 - ``caseless`` - treat all literals as caseless - (default= ``False``)
172 - ``use_regex`` - as an optimization, will
173 generate a :class:`Regex` object; otherwise, will generate
174 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
175 creating a :class:`Regex` raises an exception) - (default= ``True``)
176 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
177 generated expressions - (default= ``False``)
178 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
179 but will be removed in a future release
180
181 Example::
182
183 comp_oper = one_of("< = > <= >= !=")
184 var = Word(alphas)
185 number = Word(nums)
186 term = var | number
187 comparison_expr = term + comp_oper + term
188 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
189
190 prints::
191
192 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
193 """
194 asKeyword = asKeyword or as_keyword
195 useRegex = useRegex and use_regex
196
197 if (
198 isinstance(caseless, str_type)
199 and __diag__.warn_on_multiple_string_args_to_oneof
200 ):
201 warnings.warn(
202 "warn_on_multiple_string_args_to_oneof:"
203 " More than one string argument passed to one_of, pass"
204 " choices as a list or space-delimited string",
205 stacklevel=2,
206 )
207
208 if caseless:
209 is_equal = lambda a, b: a.upper() == b.upper()
210 masks = lambda a, b: b.upper().startswith(a.upper())
211 parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral
212 else:
213 is_equal = operator.eq
214 masks = lambda a, b: b.startswith(a)
215 parse_element_class = Keyword if asKeyword else Literal
216
217 symbols: list[str]
218 if isinstance(strs, str_type):
219 strs = typing.cast(str, strs)
220 symbols = strs.split()
221 elif isinstance(strs, Iterable):
222 symbols = list(strs)
223 else:
224 raise TypeError("Invalid argument to one_of, expected string or iterable")
225 if not symbols:
226 return NoMatch()
227
228 # reorder given symbols to take care to avoid masking longer choices with shorter ones
229 # (but only if the given symbols are not just single characters)
230 i = 0
231 while i < len(symbols) - 1:
232 cur = symbols[i]
233 for j, other in enumerate(symbols[i + 1 :]):
234 if is_equal(other, cur):
235 del symbols[i + j + 1]
236 break
237 if len(other) > len(cur) and masks(cur, other):
238 del symbols[i + j + 1]
239 symbols.insert(i, other)
240 break
241 else:
242 i += 1
243
244 if useRegex:
245 re_flags: int = re.IGNORECASE if caseless else 0
246
247 try:
248 if all(len(sym) == 1 for sym in symbols):
249 # symbols are just single characters, create range regex pattern
250 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
251 else:
252 patt = "|".join(re.escape(sym) for sym in symbols)
253
254 # wrap with \b word break markers if defining as keywords
255 if asKeyword:
256 patt = rf"\b(?:{patt})\b"
257
258 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
259
260 if caseless:
261 # add parse action to return symbols as specified, not in random
262 # casing as found in input string
263 symbol_map = {sym.lower(): sym for sym in symbols}
264 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
265
266 return ret
267
268 except re.error:
269 warnings.warn(
270 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
271 )
272
273 # last resort, just use MatchFirst
274 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(
275 " | ".join(symbols)
276 )
277
278
279def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
280 """Helper to easily and clearly define a dictionary by specifying
281 the respective patterns for the key and value. Takes care of
282 defining the :class:`Dict`, :class:`ZeroOrMore`, and
283 :class:`Group` tokens in the proper order. The key pattern
284 can include delimiting markers or punctuation, as long as they are
285 suppressed, thereby leaving the significant key text. The value
286 pattern can include named results, so that the :class:`Dict` results
287 can include named token fields.
288
289 Example::
290
291 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
292 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
293 print(attr_expr[1, ...].parse_string(text).dump())
294
295 attr_label = label
296 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
297
298 # similar to Dict, but simpler call format
299 result = dict_of(attr_label, attr_value).parse_string(text)
300 print(result.dump())
301 print(result['shape'])
302 print(result.shape) # object attribute access works too
303 print(result.as_dict())
304
305 prints::
306
307 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
308 - color: 'light blue'
309 - posn: 'upper left'
310 - shape: 'SQUARE'
311 - texture: 'burlap'
312 SQUARE
313 SQUARE
314 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
315 """
316 return Dict(OneOrMore(Group(key + value)))
317
318
319def original_text_for(
320 expr: ParserElement, as_string: bool = True, *, asString: bool = True
321) -> ParserElement:
322 """Helper to return the original, untokenized text for a given
323 expression. Useful to restore the parsed fields of an HTML start
324 tag into the raw tag text itself, or to revert separate tokens with
325 intervening whitespace back to the original matching input text. By
326 default, returns a string containing the original parsed text.
327
328 If the optional ``as_string`` argument is passed as
329 ``False``, then the return value is
330 a :class:`ParseResults` containing any results names that
331 were originally matched, and a single token containing the original
332 matched text from the input string. So if the expression passed to
333 :class:`original_text_for` contains expressions with defined
334 results names, you must set ``as_string`` to ``False`` if you
335 want to preserve those results name values.
336
337 The ``asString`` pre-PEP8 argument is retained for compatibility,
338 but will be removed in a future release.
339
340 Example::
341
342 src = "this is test <b> bold <i>text</i> </b> normal text "
343 for tag in ("b", "i"):
344 opener, closer = make_html_tags(tag)
345 patt = original_text_for(opener + ... + closer)
346 print(patt.search_string(src)[0])
347
348 prints::
349
350 ['<b> bold <i>text</i> </b>']
351 ['<i>text</i>']
352 """
353 asString = asString and as_string
354
355 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
356 endlocMarker = locMarker.copy()
357 endlocMarker.callPreparse = False
358 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
359 if asString:
360 extractText = lambda s, l, t: s[t._original_start : t._original_end]
361 else:
362
363 def extractText(s, l, t):
364 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
365
366 matchExpr.set_parse_action(extractText)
367 matchExpr.ignoreExprs = expr.ignoreExprs
368 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
369 return matchExpr
370
371
372def ungroup(expr: ParserElement) -> ParserElement:
373 """Helper to undo pyparsing's default grouping of And expressions,
374 even if all but one are non-empty.
375 """
376 return TokenConverter(expr).add_parse_action(lambda t: t[0])
377
378
379def locatedExpr(expr: ParserElement) -> ParserElement:
380 """
381 (DEPRECATED - future code should use the :class:`Located` class)
382 Helper to decorate a returned token with its starting and ending
383 locations in the input string.
384
385 This helper adds the following results names:
386
387 - ``locn_start`` - location where matched expression begins
388 - ``locn_end`` - location where matched expression ends
389 - ``value`` - the actual parsed results
390
391 Be careful if the input text contains ``<TAB>`` characters, you
392 may want to call :class:`ParserElement.parse_with_tabs`
393
394 Example::
395
396 wd = Word(alphas)
397 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
398 print(match)
399
400 prints::
401
402 [[0, 'ljsdf', 5]]
403 [[8, 'lksdjjf', 15]]
404 [[18, 'lkkjj', 23]]
405 """
406 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
407 return Group(
408 locator("locn_start")
409 + expr("value")
410 + locator.copy().leaveWhitespace()("locn_end")
411 )
412
413
414def nested_expr(
415 opener: Union[str, ParserElement] = "(",
416 closer: Union[str, ParserElement] = ")",
417 content: typing.Optional[ParserElement] = None,
418 ignore_expr: ParserElement = quoted_string(),
419 *,
420 ignoreExpr: ParserElement = quoted_string(),
421) -> ParserElement:
422 """Helper method for defining nested lists enclosed in opening and
423 closing delimiters (``"("`` and ``")"`` are the default).
424
425 Parameters:
426
427 - ``opener`` - opening character for a nested list
428 (default= ``"("``); can also be a pyparsing expression
429 - ``closer`` - closing character for a nested list
430 (default= ``")"``); can also be a pyparsing expression
431 - ``content`` - expression for items within the nested lists
432 (default= ``None``)
433 - ``ignore_expr`` - expression for ignoring opening and closing delimiters
434 (default= :class:`quoted_string`)
435 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
436 but will be removed in a future release
437
438 If an expression is not provided for the content argument, the
439 nested expression will capture all whitespace-delimited content
440 between delimiters as a list of separate values.
441
442 Use the ``ignore_expr`` argument to define expressions that may
443 contain opening or closing characters that should not be treated as
444 opening or closing characters for nesting, such as quoted_string or
445 a comment expression. Specify multiple expressions using an
446 :class:`Or` or :class:`MatchFirst`. The default is
447 :class:`quoted_string`, but if no expressions are to be ignored, then
448 pass ``None`` for this argument.
449
450 Example::
451
452 data_type = one_of("void int short long char float double")
453 decl_data_type = Combine(data_type + Opt(Word('*')))
454 ident = Word(alphas+'_', alphanums+'_')
455 number = pyparsing_common.number
456 arg = Group(decl_data_type + ident)
457 LPAR, RPAR = map(Suppress, "()")
458
459 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
460
461 c_function = (decl_data_type("type")
462 + ident("name")
463 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
464 + code_body("body"))
465 c_function.ignore(c_style_comment)
466
467 source_code = '''
468 int is_odd(int x) {
469 return (x%2);
470 }
471
472 int dec_to_hex(char hchar) {
473 if (hchar >= '0' && hchar <= '9') {
474 return (ord(hchar)-ord('0'));
475 } else {
476 return (10+ord(hchar)-ord('A'));
477 }
478 }
479 '''
480 for func in c_function.search_string(source_code):
481 print("%(name)s (%(type)s) args: %(args)s" % func)
482
483
484 prints::
485
486 is_odd (int) args: [['int', 'x']]
487 dec_to_hex (int) args: [['char', 'hchar']]
488 """
489 if ignoreExpr != ignore_expr:
490 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
491 if opener == closer:
492 raise ValueError("opening and closing strings cannot be the same")
493 if content is None:
494 if isinstance(opener, str_type) and isinstance(closer, str_type):
495 opener = typing.cast(str, opener)
496 closer = typing.cast(str, closer)
497 if len(opener) == 1 and len(closer) == 1:
498 if ignoreExpr is not None:
499 content = Combine(
500 OneOrMore(
501 ~ignoreExpr
502 + CharsNotIn(
503 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
504 exact=1,
505 )
506 )
507 ).set_parse_action(lambda t: t[0].strip())
508 else:
509 content = empty.copy() + CharsNotIn(
510 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
511 ).set_parse_action(lambda t: t[0].strip())
512 else:
513 if ignoreExpr is not None:
514 content = Combine(
515 OneOrMore(
516 ~ignoreExpr
517 + ~Literal(opener)
518 + ~Literal(closer)
519 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
520 )
521 ).set_parse_action(lambda t: t[0].strip())
522 else:
523 content = Combine(
524 OneOrMore(
525 ~Literal(opener)
526 + ~Literal(closer)
527 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
528 )
529 ).set_parse_action(lambda t: t[0].strip())
530 else:
531 raise ValueError(
532 "opening and closing arguments must be strings if no content expression is given"
533 )
534 ret = Forward()
535 if ignoreExpr is not None:
536 ret <<= Group(
537 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
538 )
539 else:
540 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
541 ret.set_name(f"nested {opener}{closer} expression")
542 # don't override error message from content expressions
543 ret.errmsg = None
544 return ret
545
546
547def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
548 """Internal helper to construct opening and closing tag expressions, given a tag name"""
549 if isinstance(tagStr, str_type):
550 resname = tagStr
551 tagStr = Keyword(tagStr, caseless=not xml)
552 else:
553 resname = tagStr.name
554
555 tagAttrName = Word(alphas, alphanums + "_-:")
556 if xml:
557 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
558 openTag = (
559 suppress_LT
560 + tagStr("tag")
561 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
562 + Opt("/", default=[False])("empty").set_parse_action(
563 lambda s, l, t: t[0] == "/"
564 )
565 + suppress_GT
566 )
567 else:
568 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
569 printables, exclude_chars=">"
570 )
571 openTag = (
572 suppress_LT
573 + tagStr("tag")
574 + Dict(
575 ZeroOrMore(
576 Group(
577 tagAttrName.set_parse_action(lambda t: t[0].lower())
578 + Opt(Suppress("=") + tagAttrValue)
579 )
580 )
581 )
582 + Opt("/", default=[False])("empty").set_parse_action(
583 lambda s, l, t: t[0] == "/"
584 )
585 + suppress_GT
586 )
587 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
588
589 openTag.set_name(f"<{resname}>")
590 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
591 openTag.add_parse_action(
592 lambda t: t.__setitem__(
593 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
594 )
595 )
596 closeTag = closeTag(
597 "end" + "".join(resname.replace(":", " ").title().split())
598 ).set_name(f"</{resname}>")
599 openTag.tag = resname
600 closeTag.tag = resname
601 openTag.tag_body = SkipTo(closeTag())
602 return openTag, closeTag
603
604
605def make_html_tags(
606 tag_str: Union[str, ParserElement]
607) -> tuple[ParserElement, ParserElement]:
608 """Helper to construct opening and closing tag expressions for HTML,
609 given a tag name. Matches tags in either upper or lower case,
610 attributes with namespaces and with quoted or unquoted values.
611
612 Example::
613
614 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
615 # make_html_tags returns pyparsing expressions for the opening and
616 # closing tags as a 2-tuple
617 a, a_end = make_html_tags("A")
618 link_expr = a + SkipTo(a_end)("link_text") + a_end
619
620 for link in link_expr.search_string(text):
621 # attributes in the <A> tag (like "href" shown here) are
622 # also accessible as named results
623 print(link.link_text, '->', link.href)
624
625 prints::
626
627 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
628 """
629 return _makeTags(tag_str, False)
630
631
632def make_xml_tags(
633 tag_str: Union[str, ParserElement]
634) -> tuple[ParserElement, ParserElement]:
635 """Helper to construct opening and closing tag expressions for XML,
636 given a tag name. Matches tags only in the given upper/lower case.
637
638 Example: similar to :class:`make_html_tags`
639 """
640 return _makeTags(tag_str, True)
641
642
643any_open_tag: ParserElement
644any_close_tag: ParserElement
645any_open_tag, any_close_tag = make_html_tags(
646 Word(alphas, alphanums + "_:").set_name("any tag")
647)
648
649_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
650_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(
651 " ", "|"
652)
653common_html_entity = Regex(
654 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"
655).set_name("common HTML entity")
656
657
658def replace_html_entity(s, l, t):
659 """Helper parser action to replace common HTML entities with their special characters"""
660 return _htmlEntityMap.get(t.entity)
661
662
663class OpAssoc(Enum):
664 """Enumeration of operator associativity
665 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
666
667 LEFT = 1
668 RIGHT = 2
669
670
671InfixNotationOperatorArgType = Union[
672 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]
673]
674InfixNotationOperatorSpec = Union[
675 tuple[
676 InfixNotationOperatorArgType,
677 int,
678 OpAssoc,
679 typing.Optional[ParseAction],
680 ],
681 tuple[
682 InfixNotationOperatorArgType,
683 int,
684 OpAssoc,
685 ],
686]
687
688
689def infix_notation(
690 base_expr: ParserElement,
691 op_list: list[InfixNotationOperatorSpec],
692 lpar: Union[str, ParserElement] = Suppress("("),
693 rpar: Union[str, ParserElement] = Suppress(")"),
694) -> ParserElement:
695 """Helper method for constructing grammars of expressions made up of
696 operators working in a precedence hierarchy. Operators may be unary
697 or binary, left- or right-associative. Parse actions can also be
698 attached to operator expressions. The generated parser will also
699 recognize the use of parentheses to override operator precedences
700 (see example below).
701
702 Note: if you define a deep operator list, you may see performance
703 issues when using infix_notation. See
704 :class:`ParserElement.enable_packrat` for a mechanism to potentially
705 improve your parser performance.
706
707 Parameters:
708
709 - ``base_expr`` - expression representing the most basic operand to
710 be used in the expression
711 - ``op_list`` - list of tuples, one for each operator precedence level
712 in the expression grammar; each tuple is of the form ``(op_expr,
713 num_operands, right_left_assoc, (optional)parse_action)``, where:
714
715 - ``op_expr`` is the pyparsing expression for the operator; may also
716 be a string, which will be converted to a Literal; if ``num_operands``
717 is 3, ``op_expr`` is a tuple of two expressions, for the two
718 operators separating the 3 terms
719 - ``num_operands`` is the number of terms for this operator (must be 1,
720 2, or 3)
721 - ``right_left_assoc`` is the indicator whether the operator is right
722 or left associative, using the pyparsing-defined constants
723 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
724 - ``parse_action`` is the parse action to be associated with
725 expressions matching this operator expression (the parse action
726 tuple member may be omitted); if the parse action is passed
727 a tuple or list of functions, this is equivalent to calling
728 ``set_parse_action(*fn)``
729 (:class:`ParserElement.set_parse_action`)
730 - ``lpar`` - expression for matching left-parentheses; if passed as a
731 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
732 an expression (such as ``Literal('(')``), then it will be kept in
733 the parsed results, and grouped with them. (default= ``Suppress('(')``)
734 - ``rpar`` - expression for matching right-parentheses; if passed as a
735 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
736 an expression (such as ``Literal(')')``), then it will be kept in
737 the parsed results, and grouped with them. (default= ``Suppress(')')``)
738
739 Example::
740
741 # simple example of four-function arithmetic with ints and
742 # variable names
743 integer = pyparsing_common.signed_integer
744 varname = pyparsing_common.identifier
745
746 arith_expr = infix_notation(integer | varname,
747 [
748 ('-', 1, OpAssoc.RIGHT),
749 (one_of('* /'), 2, OpAssoc.LEFT),
750 (one_of('+ -'), 2, OpAssoc.LEFT),
751 ])
752
753 arith_expr.run_tests('''
754 5+3*6
755 (5+3)*6
756 -2--11
757 ''', full_dump=False)
758
759 prints::
760
761 5+3*6
762 [[5, '+', [3, '*', 6]]]
763
764 (5+3)*6
765 [[[5, '+', 3], '*', 6]]
766
767 (5+x)*y
768 [[[5, '+', 'x'], '*', 'y']]
769
770 -2--11
771 [[['-', 2], '-', ['-', 11]]]
772 """
773
774 # captive version of FollowedBy that does not do parse actions or capture results names
775 class _FB(FollowedBy):
776 def parseImpl(self, instring, loc, doActions=True):
777 self.expr.try_parse(instring, loc)
778 return loc, []
779
780 _FB.__name__ = "FollowedBy>"
781
782 ret = Forward()
783 ret.set_name(f"{base_expr.name}_expression")
784 if isinstance(lpar, str):
785 lpar = Suppress(lpar)
786 if isinstance(rpar, str):
787 rpar = Suppress(rpar)
788
789 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}")
790
791 # if lpar and rpar are not suppressed, wrap in group
792 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):
793 lastExpr = base_expr | Group(nested_expr)
794 else:
795 lastExpr = base_expr | nested_expr
796
797 arity: int
798 rightLeftAssoc: opAssoc
799 pa: typing.Optional[ParseAction]
800 opExpr1: ParserElement
801 opExpr2: ParserElement
802 matchExpr: ParserElement
803 match_lookahead: ParserElement
804 for operDef in op_list:
805 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
806 if isinstance(opExpr, str_type):
807 opExpr = ParserElement._literalStringClass(opExpr)
808 opExpr = typing.cast(ParserElement, opExpr)
809 if arity == 3:
810 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
811 raise ValueError(
812 "if numterms=3, opExpr must be a tuple or list of two expressions"
813 )
814 opExpr1, opExpr2 = opExpr
815 term_name = f"{opExpr1}{opExpr2} operations"
816 else:
817 term_name = f"{opExpr} operations"
818
819 if not 1 <= arity <= 3:
820 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
821
822 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
823 raise ValueError("operator must indicate right or left associativity")
824
825 thisExpr: ParserElement = Forward().set_name(term_name)
826 thisExpr = typing.cast(Forward, thisExpr)
827 match_lookahead = And([])
828 if rightLeftAssoc is OpAssoc.LEFT:
829 if arity == 1:
830 match_lookahead = _FB(lastExpr + opExpr)
831 matchExpr = Group(lastExpr + opExpr[1, ...])
832 elif arity == 2:
833 if opExpr is not None:
834 match_lookahead = _FB(lastExpr + opExpr + lastExpr)
835 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])
836 else:
837 match_lookahead = _FB(lastExpr + lastExpr)
838 matchExpr = Group(lastExpr[2, ...])
839 elif arity == 3:
840 match_lookahead = _FB(
841 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
842 )
843 matchExpr = Group(
844 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]
845 )
846 elif rightLeftAssoc is OpAssoc.RIGHT:
847 if arity == 1:
848 # try to avoid LR with this extra test
849 if not isinstance(opExpr, Opt):
850 opExpr = Opt(opExpr)
851 match_lookahead = _FB(opExpr.expr + thisExpr)
852 matchExpr = Group(opExpr + thisExpr)
853 elif arity == 2:
854 if opExpr is not None:
855 match_lookahead = _FB(lastExpr + opExpr + thisExpr)
856 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])
857 else:
858 match_lookahead = _FB(lastExpr + thisExpr)
859 matchExpr = Group(lastExpr + thisExpr[1, ...])
860 elif arity == 3:
861 match_lookahead = _FB(
862 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
863 )
864 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
865
866 # suppress lookahead expr from railroad diagrams
867 match_lookahead.show_in_diagram = False
868
869 # TODO - determine why this statement can't be included in the following
870 # if pa block
871 matchExpr = match_lookahead + matchExpr
872
873 if pa:
874 if isinstance(pa, (tuple, list)):
875 matchExpr.set_parse_action(*pa)
876 else:
877 matchExpr.set_parse_action(pa)
878
879 thisExpr <<= (matchExpr | lastExpr).setName(term_name)
880 lastExpr = thisExpr
881
882 ret <<= lastExpr
883 return ret
884
885
886def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
887 """
888 (DEPRECATED - use :class:`IndentedBlock` class instead)
889 Helper method for defining space-delimited indentation blocks,
890 such as those used to define block statements in Python source code.
891
892 Parameters:
893
894 - ``blockStatementExpr`` - expression defining syntax of statement that
895 is repeated within the indented block
896 - ``indentStack`` - list created by caller to manage indentation stack
897 (multiple ``statementWithIndentedBlock`` expressions within a single
898 grammar should share a common ``indentStack``)
899 - ``indent`` - boolean indicating whether block must be indented beyond
900 the current level; set to ``False`` for block of left-most statements
901 (default= ``True``)
902
903 A valid block must contain at least one ``blockStatement``.
904
905 (Note that indentedBlock uses internal parse actions which make it
906 incompatible with packrat parsing.)
907
908 Example::
909
910 data = '''
911 def A(z):
912 A1
913 B = 100
914 G = A2
915 A2
916 A3
917 B
918 def BB(a,b,c):
919 BB1
920 def BBA():
921 bba1
922 bba2
923 bba3
924 C
925 D
926 def spam(x,y):
927 def eggs(z):
928 pass
929 '''
930
931
932 indentStack = [1]
933 stmt = Forward()
934
935 identifier = Word(alphas, alphanums)
936 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
937 func_body = indentedBlock(stmt, indentStack)
938 funcDef = Group(funcDecl + func_body)
939
940 rvalue = Forward()
941 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
942 rvalue << (funcCall | identifier | Word(nums))
943 assignment = Group(identifier + "=" + rvalue)
944 stmt << (funcDef | assignment | identifier)
945
946 module_body = stmt[1, ...]
947
948 parseTree = module_body.parseString(data)
949 parseTree.pprint()
950
951 prints::
952
953 [['def',
954 'A',
955 ['(', 'z', ')'],
956 ':',
957 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
958 'B',
959 ['def',
960 'BB',
961 ['(', 'a', 'b', 'c', ')'],
962 ':',
963 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
964 'C',
965 'D',
966 ['def',
967 'spam',
968 ['(', 'x', 'y', ')'],
969 ':',
970 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
971 """
972 backup_stacks.append(indentStack[:])
973
974 def reset_stack():
975 indentStack[:] = backup_stacks[-1]
976
977 def checkPeerIndent(s, l, t):
978 if l >= len(s):
979 return
980 curCol = col(l, s)
981 if curCol != indentStack[-1]:
982 if curCol > indentStack[-1]:
983 raise ParseException(s, l, "illegal nesting")
984 raise ParseException(s, l, "not a peer entry")
985
986 def checkSubIndent(s, l, t):
987 curCol = col(l, s)
988 if curCol > indentStack[-1]:
989 indentStack.append(curCol)
990 else:
991 raise ParseException(s, l, "not a subentry")
992
993 def checkUnindent(s, l, t):
994 if l >= len(s):
995 return
996 curCol = col(l, s)
997 if not (indentStack and curCol in indentStack):
998 raise ParseException(s, l, "not an unindent")
999 if curCol < indentStack[-1]:
1000 indentStack.pop()
1001
1002 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
1003 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
1004 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
1005 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
1006 if indent:
1007 smExpr = Group(
1008 Opt(NL)
1009 + INDENT
1010 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1011 + UNDENT
1012 )
1013 else:
1014 smExpr = Group(
1015 Opt(NL)
1016 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1017 + Opt(UNDENT)
1018 )
1019
1020 # add a parse action to remove backup_stack from list of backups
1021 smExpr.add_parse_action(
1022 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
1023 )
1024 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
1025 blockStatementExpr.ignore(_bslash + LineEnd())
1026 return smExpr.set_name("indented block")
1027
1028
1029# it's easy to get these comment structures wrong - they're very common,
1030# so may as well make them available
1031c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")
1032"Comment of the form ``/* ... */``"
1033
1034html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1035"Comment of the form ``<!-- ... -->``"
1036
1037rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1038dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1039"Comment of the form ``// ... (to end of line)``"
1040
1041cpp_style_comment = Regex(
1042 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"
1043).set_name("C++ style comment")
1044"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1045
1046java_style_comment = cpp_style_comment
1047"Same as :class:`cpp_style_comment`"
1048
1049python_style_comment = Regex(r"#.*").set_name("Python style comment")
1050"Comment of the form ``# ... (to end of line)``"
1051
1052
1053# build list of built-in expressions, for future reference if a global default value
1054# gets updated
1055_builtin_exprs: list[ParserElement] = [
1056 v for v in vars().values() if isinstance(v, ParserElement)
1057]
1058
1059
1060# compatibility function, superseded by DelimitedList class
1061def delimited_list(
1062 expr: Union[str, ParserElement],
1063 delim: Union[str, ParserElement] = ",",
1064 combine: bool = False,
1065 min: typing.Optional[int] = None,
1066 max: typing.Optional[int] = None,
1067 *,
1068 allow_trailing_delim: bool = False,
1069) -> ParserElement:
1070 """(DEPRECATED - use :class:`DelimitedList` class)"""
1071 return DelimitedList(
1072 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1073 )
1074
1075
1076# Compatibility synonyms
1077# fmt: off
1078opAssoc = OpAssoc
1079anyOpenTag = any_open_tag
1080anyCloseTag = any_close_tag
1081commonHTMLEntity = common_html_entity
1082cStyleComment = c_style_comment
1083htmlComment = html_comment
1084restOfLine = rest_of_line
1085dblSlashComment = dbl_slash_comment
1086cppStyleComment = cpp_style_comment
1087javaStyleComment = java_style_comment
1088pythonStyleComment = python_style_comment
1089delimitedList = replaced_by_pep8("delimitedList", DelimitedList)
1090delimited_list = replaced_by_pep8("delimited_list", DelimitedList)
1091countedArray = replaced_by_pep8("countedArray", counted_array)
1092matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)
1093matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)
1094oneOf = replaced_by_pep8("oneOf", one_of)
1095dictOf = replaced_by_pep8("dictOf", dict_of)
1096originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)
1097nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)
1098makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)
1099makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)
1100replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)
1101infixNotation = replaced_by_pep8("infixNotation", infix_notation)
1102# fmt: on