1# helpers.py
2import html.entities
3import re
4import sys
5import typing
6
7from . import __diag__
8from .core import *
9from .util import (
10 _bslash,
11 _flatten,
12 _escape_regex_range_chars,
13 replaced_by_pep8,
14)
15
16
17#
18# global helpers
19#
20def counted_array(
21 expr: ParserElement,
22 int_expr: typing.Optional[ParserElement] = None,
23 *,
24 intExpr: typing.Optional[ParserElement] = None,
25) -> ParserElement:
26 """Helper to define a counted list of expressions.
27
28 This helper defines a pattern of the form::
29
30 integer expr expr expr...
31
32 where the leading integer tells how many expr expressions follow.
33 The matched tokens returns the array of expr tokens as a list - the
34 leading count token is suppressed.
35
36 If ``int_expr`` is specified, it should be a pyparsing expression
37 that produces an integer value.
38
39 Example::
40
41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
42
43 # in this parser, the leading integer value is given in binary,
44 # '10' indicating that 2 values are in the array
45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
47
48 # if other fields must be parsed after the count but before the
49 # list items, give the fields results names and they will
50 # be preserved in the returned ParseResults:
51 count_with_metadata = integer + Word(alphas)("type")
52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
53 result = typed_array.parse_string("3 bool True True False")
54 print(result.dump())
55
56 # prints
57 # ['True', 'True', 'False']
58 # - items: ['True', 'True', 'False']
59 # - type: 'bool'
60 """
61 intExpr = intExpr or int_expr
62 array_expr = Forward()
63
64 def count_field_parse_action(s, l, t):
65 nonlocal array_expr
66 n = t[0]
67 array_expr <<= (expr * n) if n else Empty()
68 # clear list contents, but keep any named results
69 del t[:]
70
71 if intExpr is None:
72 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
73 else:
74 intExpr = intExpr.copy()
75 intExpr.set_name("arrayLen")
76 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
77 return (intExpr + array_expr).set_name(f"(len) {expr}...")
78
79
80def match_previous_literal(expr: ParserElement) -> ParserElement:
81 """Helper to define an expression that is indirectly defined from
82 the tokens matched in a previous expression, that is, it looks for
83 a 'repeat' of a previous expression. For example::
84
85 first = Word(nums)
86 second = match_previous_literal(first)
87 match_expr = first + ":" + second
88
89 will match ``"1:1"``, but not ``"1:2"``. Because this
90 matches a previous literal, will also match the leading
91 ``"1:1"`` in ``"1:10"``. If this is not desired, use
92 :class:`match_previous_expr`. Do *not* use with packrat parsing
93 enabled.
94 """
95 rep = Forward()
96
97 def copy_token_to_repeater(s, l, t):
98 if not t:
99 rep << Empty()
100 return
101
102 if len(t) == 1:
103 rep << t[0]
104 return
105
106 # flatten t tokens
107 tflat = _flatten(t.as_list())
108 rep << And(Literal(tt) for tt in tflat)
109
110 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
111 rep.set_name("(prev) " + str(expr))
112 return rep
113
114
115def match_previous_expr(expr: ParserElement) -> ParserElement:
116 """Helper to define an expression that is indirectly defined from
117 the tokens matched in a previous expression, that is, it looks for
118 a 'repeat' of a previous expression. For example::
119
120 first = Word(nums)
121 second = match_previous_expr(first)
122 match_expr = first + ":" + second
123
124 will match ``"1:1"``, but not ``"1:2"``. Because this
125 matches by expressions, will *not* match the leading ``"1:1"``
126 in ``"1:10"``; the expressions are evaluated first, and then
127 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
128 with packrat parsing enabled.
129 """
130 rep = Forward()
131 e2 = expr.copy()
132 rep <<= e2
133
134 def copy_token_to_repeater(s, l, t):
135 matchTokens = _flatten(t.as_list())
136
137 def must_match_these_tokens(s, l, t):
138 theseTokens = _flatten(t.as_list())
139 if theseTokens != matchTokens:
140 raise ParseException(
141 s, l, f"Expected {matchTokens}, found{theseTokens}"
142 )
143
144 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
145
146 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
147 rep.set_name("(prev) " + str(expr))
148 return rep
149
150
151def one_of(
152 strs: Union[typing.Iterable[str], str],
153 caseless: bool = False,
154 use_regex: bool = True,
155 as_keyword: bool = False,
156 *,
157 useRegex: bool = True,
158 asKeyword: bool = False,
159) -> ParserElement:
160 """Helper to quickly define a set of alternative :class:`Literal` s,
161 and makes sure to do longest-first testing when there is a conflict,
162 regardless of the input order, but returns
163 a :class:`MatchFirst` for best performance.
164
165 Parameters:
166
167 - ``strs`` - a string of space-delimited literals, or a collection of
168 string literals
169 - ``caseless`` - treat all literals as caseless - (default= ``False``)
170 - ``use_regex`` - as an optimization, will
171 generate a :class:`Regex` object; otherwise, will generate
172 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
173 creating a :class:`Regex` raises an exception) - (default= ``True``)
174 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
175 generated expressions - (default= ``False``)
176 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
177 but will be removed in a future release
178
179 Example::
180
181 comp_oper = one_of("< = > <= >= !=")
182 var = Word(alphas)
183 number = Word(nums)
184 term = var | number
185 comparison_expr = term + comp_oper + term
186 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
187
188 prints::
189
190 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
191 """
192 asKeyword = asKeyword or as_keyword
193 useRegex = useRegex and use_regex
194
195 if (
196 isinstance(caseless, str_type)
197 and __diag__.warn_on_multiple_string_args_to_oneof
198 ):
199 warnings.warn(
200 "More than one string argument passed to one_of, pass"
201 " choices as a list or space-delimited string",
202 stacklevel=2,
203 )
204
205 if caseless:
206 isequal = lambda a, b: a.upper() == b.upper()
207 masks = lambda a, b: b.upper().startswith(a.upper())
208 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
209 else:
210 isequal = lambda a, b: a == b
211 masks = lambda a, b: b.startswith(a)
212 parseElementClass = Keyword if asKeyword else Literal
213
214 symbols: List[str] = []
215 if isinstance(strs, str_type):
216 strs = typing.cast(str, strs)
217 symbols = strs.split()
218 elif isinstance(strs, Iterable):
219 symbols = list(strs)
220 else:
221 raise TypeError("Invalid argument to one_of, expected string or iterable")
222 if not symbols:
223 return NoMatch()
224
225 # reorder given symbols to take care to avoid masking longer choices with shorter ones
226 # (but only if the given symbols are not just single characters)
227 if any(len(sym) > 1 for sym in symbols):
228 i = 0
229 while i < len(symbols) - 1:
230 cur = symbols[i]
231 for j, other in enumerate(symbols[i + 1 :]):
232 if isequal(other, cur):
233 del symbols[i + j + 1]
234 break
235 if masks(cur, other):
236 del symbols[i + j + 1]
237 symbols.insert(i, other)
238 break
239 else:
240 i += 1
241
242 if useRegex:
243 re_flags: int = re.IGNORECASE if caseless else 0
244
245 try:
246 if all(len(sym) == 1 for sym in symbols):
247 # symbols are just single characters, create range regex pattern
248 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
249 else:
250 patt = "|".join(re.escape(sym) for sym in symbols)
251
252 # wrap with \b word break markers if defining as keywords
253 if asKeyword:
254 patt = rf"\b(?:{patt})\b"
255
256 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
257
258 if caseless:
259 # add parse action to return symbols as specified, not in random
260 # casing as found in input string
261 symbol_map = {sym.lower(): sym for sym in symbols}
262 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
263
264 return ret
265
266 except re.error:
267 warnings.warn(
268 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
269 )
270
271 # last resort, just use MatchFirst
272 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
273 " | ".join(symbols)
274 )
275
276
277def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
278 """Helper to easily and clearly define a dictionary by specifying
279 the respective patterns for the key and value. Takes care of
280 defining the :class:`Dict`, :class:`ZeroOrMore`, and
281 :class:`Group` tokens in the proper order. The key pattern
282 can include delimiting markers or punctuation, as long as they are
283 suppressed, thereby leaving the significant key text. The value
284 pattern can include named results, so that the :class:`Dict` results
285 can include named token fields.
286
287 Example::
288
289 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
290 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
291 print(attr_expr[1, ...].parse_string(text).dump())
292
293 attr_label = label
294 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
295
296 # similar to Dict, but simpler call format
297 result = dict_of(attr_label, attr_value).parse_string(text)
298 print(result.dump())
299 print(result['shape'])
300 print(result.shape) # object attribute access works too
301 print(result.as_dict())
302
303 prints::
304
305 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
306 - color: 'light blue'
307 - posn: 'upper left'
308 - shape: 'SQUARE'
309 - texture: 'burlap'
310 SQUARE
311 SQUARE
312 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
313 """
314 return Dict(OneOrMore(Group(key + value)))
315
316
317def original_text_for(
318 expr: ParserElement, as_string: bool = True, *, asString: bool = True
319) -> ParserElement:
320 """Helper to return the original, untokenized text for a given
321 expression. Useful to restore the parsed fields of an HTML start
322 tag into the raw tag text itself, or to revert separate tokens with
323 intervening whitespace back to the original matching input text. By
324 default, returns a string containing the original parsed text.
325
326 If the optional ``as_string`` argument is passed as
327 ``False``, then the return value is
328 a :class:`ParseResults` containing any results names that
329 were originally matched, and a single token containing the original
330 matched text from the input string. So if the expression passed to
331 :class:`original_text_for` contains expressions with defined
332 results names, you must set ``as_string`` to ``False`` if you
333 want to preserve those results name values.
334
335 The ``asString`` pre-PEP8 argument is retained for compatibility,
336 but will be removed in a future release.
337
338 Example::
339
340 src = "this is test <b> bold <i>text</i> </b> normal text "
341 for tag in ("b", "i"):
342 opener, closer = make_html_tags(tag)
343 patt = original_text_for(opener + ... + closer)
344 print(patt.search_string(src)[0])
345
346 prints::
347
348 ['<b> bold <i>text</i> </b>']
349 ['<i>text</i>']
350 """
351 asString = asString and as_string
352
353 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
354 endlocMarker = locMarker.copy()
355 endlocMarker.callPreparse = False
356 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
357 if asString:
358 extractText = lambda s, l, t: s[t._original_start : t._original_end]
359 else:
360
361 def extractText(s, l, t):
362 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
363
364 matchExpr.set_parse_action(extractText)
365 matchExpr.ignoreExprs = expr.ignoreExprs
366 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
367 return matchExpr
368
369
370def ungroup(expr: ParserElement) -> ParserElement:
371 """Helper to undo pyparsing's default grouping of And expressions,
372 even if all but one are non-empty.
373 """
374 return TokenConverter(expr).add_parse_action(lambda t: t[0])
375
376
377def locatedExpr(expr: ParserElement) -> ParserElement:
378 """
379 (DEPRECATED - future code should use the :class:`Located` class)
380 Helper to decorate a returned token with its starting and ending
381 locations in the input string.
382
383 This helper adds the following results names:
384
385 - ``locn_start`` - location where matched expression begins
386 - ``locn_end`` - location where matched expression ends
387 - ``value`` - the actual parsed results
388
389 Be careful if the input text contains ``<TAB>`` characters, you
390 may want to call :class:`ParserElement.parse_with_tabs`
391
392 Example::
393
394 wd = Word(alphas)
395 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
396 print(match)
397
398 prints::
399
400 [[0, 'ljsdf', 5]]
401 [[8, 'lksdjjf', 15]]
402 [[18, 'lkkjj', 23]]
403 """
404 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
405 return Group(
406 locator("locn_start")
407 + expr("value")
408 + locator.copy().leaveWhitespace()("locn_end")
409 )
410
411
412def nested_expr(
413 opener: Union[str, ParserElement] = "(",
414 closer: Union[str, ParserElement] = ")",
415 content: typing.Optional[ParserElement] = None,
416 ignore_expr: ParserElement = quoted_string(),
417 *,
418 ignoreExpr: ParserElement = quoted_string(),
419) -> ParserElement:
420 """Helper method for defining nested lists enclosed in opening and
421 closing delimiters (``"("`` and ``")"`` are the default).
422
423 Parameters:
424
425 - ``opener`` - opening character for a nested list
426 (default= ``"("``); can also be a pyparsing expression
427 - ``closer`` - closing character for a nested list
428 (default= ``")"``); can also be a pyparsing expression
429 - ``content`` - expression for items within the nested lists
430 (default= ``None``)
431 - ``ignore_expr`` - expression for ignoring opening and closing delimiters
432 (default= :class:`quoted_string`)
433 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
434 but will be removed in a future release
435
436 If an expression is not provided for the content argument, the
437 nested expression will capture all whitespace-delimited content
438 between delimiters as a list of separate values.
439
440 Use the ``ignore_expr`` argument to define expressions that may
441 contain opening or closing characters that should not be treated as
442 opening or closing characters for nesting, such as quoted_string or
443 a comment expression. Specify multiple expressions using an
444 :class:`Or` or :class:`MatchFirst`. The default is
445 :class:`quoted_string`, but if no expressions are to be ignored, then
446 pass ``None`` for this argument.
447
448 Example::
449
450 data_type = one_of("void int short long char float double")
451 decl_data_type = Combine(data_type + Opt(Word('*')))
452 ident = Word(alphas+'_', alphanums+'_')
453 number = pyparsing_common.number
454 arg = Group(decl_data_type + ident)
455 LPAR, RPAR = map(Suppress, "()")
456
457 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
458
459 c_function = (decl_data_type("type")
460 + ident("name")
461 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
462 + code_body("body"))
463 c_function.ignore(c_style_comment)
464
465 source_code = '''
466 int is_odd(int x) {
467 return (x%2);
468 }
469
470 int dec_to_hex(char hchar) {
471 if (hchar >= '0' && hchar <= '9') {
472 return (ord(hchar)-ord('0'));
473 } else {
474 return (10+ord(hchar)-ord('A'));
475 }
476 }
477 '''
478 for func in c_function.search_string(source_code):
479 print("%(name)s (%(type)s) args: %(args)s" % func)
480
481
482 prints::
483
484 is_odd (int) args: [['int', 'x']]
485 dec_to_hex (int) args: [['char', 'hchar']]
486 """
487 if ignoreExpr != ignore_expr:
488 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
489 if opener == closer:
490 raise ValueError("opening and closing strings cannot be the same")
491 if content is None:
492 if isinstance(opener, str_type) and isinstance(closer, str_type):
493 opener = typing.cast(str, opener)
494 closer = typing.cast(str, closer)
495 if len(opener) == 1 and len(closer) == 1:
496 if ignoreExpr is not None:
497 content = Combine(
498 OneOrMore(
499 ~ignoreExpr
500 + CharsNotIn(
501 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
502 exact=1,
503 )
504 )
505 ).set_parse_action(lambda t: t[0].strip())
506 else:
507 content = empty.copy() + CharsNotIn(
508 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
509 ).set_parse_action(lambda t: t[0].strip())
510 else:
511 if ignoreExpr is not None:
512 content = Combine(
513 OneOrMore(
514 ~ignoreExpr
515 + ~Literal(opener)
516 + ~Literal(closer)
517 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
518 )
519 ).set_parse_action(lambda t: t[0].strip())
520 else:
521 content = Combine(
522 OneOrMore(
523 ~Literal(opener)
524 + ~Literal(closer)
525 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
526 )
527 ).set_parse_action(lambda t: t[0].strip())
528 else:
529 raise ValueError(
530 "opening and closing arguments must be strings if no content expression is given"
531 )
532 ret = Forward()
533 if ignoreExpr is not None:
534 ret <<= Group(
535 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
536 )
537 else:
538 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
539 ret.set_name(f"nested {opener}{closer} expression")
540 # don't override error message from content expressions
541 ret.errmsg = None
542 return ret
543
544
545def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
546 """Internal helper to construct opening and closing tag expressions, given a tag name"""
547 if isinstance(tagStr, str_type):
548 resname = tagStr
549 tagStr = Keyword(tagStr, caseless=not xml)
550 else:
551 resname = tagStr.name
552
553 tagAttrName = Word(alphas, alphanums + "_-:")
554 if xml:
555 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
556 openTag = (
557 suppress_LT
558 + tagStr("tag")
559 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
560 + Opt("/", default=[False])("empty").set_parse_action(
561 lambda s, l, t: t[0] == "/"
562 )
563 + suppress_GT
564 )
565 else:
566 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
567 printables, exclude_chars=">"
568 )
569 openTag = (
570 suppress_LT
571 + tagStr("tag")
572 + Dict(
573 ZeroOrMore(
574 Group(
575 tagAttrName.set_parse_action(lambda t: t[0].lower())
576 + Opt(Suppress("=") + tagAttrValue)
577 )
578 )
579 )
580 + Opt("/", default=[False])("empty").set_parse_action(
581 lambda s, l, t: t[0] == "/"
582 )
583 + suppress_GT
584 )
585 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
586
587 openTag.set_name(f"<{resname}>")
588 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
589 openTag.add_parse_action(
590 lambda t: t.__setitem__(
591 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
592 )
593 )
594 closeTag = closeTag(
595 "end" + "".join(resname.replace(":", " ").title().split())
596 ).set_name(f"</{resname}>")
597 openTag.tag = resname
598 closeTag.tag = resname
599 openTag.tag_body = SkipTo(closeTag())
600 return openTag, closeTag
601
602
603def make_html_tags(
604 tag_str: Union[str, ParserElement]
605) -> Tuple[ParserElement, ParserElement]:
606 """Helper to construct opening and closing tag expressions for HTML,
607 given a tag name. Matches tags in either upper or lower case,
608 attributes with namespaces and with quoted or unquoted values.
609
610 Example::
611
612 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
613 # make_html_tags returns pyparsing expressions for the opening and
614 # closing tags as a 2-tuple
615 a, a_end = make_html_tags("A")
616 link_expr = a + SkipTo(a_end)("link_text") + a_end
617
618 for link in link_expr.search_string(text):
619 # attributes in the <A> tag (like "href" shown here) are
620 # also accessible as named results
621 print(link.link_text, '->', link.href)
622
623 prints::
624
625 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
626 """
627 return _makeTags(tag_str, False)
628
629
630def make_xml_tags(
631 tag_str: Union[str, ParserElement]
632) -> Tuple[ParserElement, ParserElement]:
633 """Helper to construct opening and closing tag expressions for XML,
634 given a tag name. Matches tags only in the given upper/lower case.
635
636 Example: similar to :class:`make_html_tags`
637 """
638 return _makeTags(tag_str, True)
639
640
641any_open_tag: ParserElement
642any_close_tag: ParserElement
643any_open_tag, any_close_tag = make_html_tags(
644 Word(alphas, alphanums + "_:").set_name("any tag")
645)
646
647_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
648common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
649 "common HTML entity"
650)
651
652
653def replace_html_entity(s, l, t):
654 """Helper parser action to replace common HTML entities with their special characters"""
655 return _htmlEntityMap.get(t.entity)
656
657
658class OpAssoc(Enum):
659 """Enumeration of operator associativity
660 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
661
662 LEFT = 1
663 RIGHT = 2
664
665
666InfixNotationOperatorArgType = Union[
667 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
668]
669InfixNotationOperatorSpec = Union[
670 Tuple[
671 InfixNotationOperatorArgType,
672 int,
673 OpAssoc,
674 typing.Optional[ParseAction],
675 ],
676 Tuple[
677 InfixNotationOperatorArgType,
678 int,
679 OpAssoc,
680 ],
681]
682
683
684def infix_notation(
685 base_expr: ParserElement,
686 op_list: List[InfixNotationOperatorSpec],
687 lpar: Union[str, ParserElement] = Suppress("("),
688 rpar: Union[str, ParserElement] = Suppress(")"),
689) -> ParserElement:
690 """Helper method for constructing grammars of expressions made up of
691 operators working in a precedence hierarchy. Operators may be unary
692 or binary, left- or right-associative. Parse actions can also be
693 attached to operator expressions. The generated parser will also
694 recognize the use of parentheses to override operator precedences
695 (see example below).
696
697 Note: if you define a deep operator list, you may see performance
698 issues when using infix_notation. See
699 :class:`ParserElement.enable_packrat` for a mechanism to potentially
700 improve your parser performance.
701
702 Parameters:
703
704 - ``base_expr`` - expression representing the most basic operand to
705 be used in the expression
706 - ``op_list`` - list of tuples, one for each operator precedence level
707 in the expression grammar; each tuple is of the form ``(op_expr,
708 num_operands, right_left_assoc, (optional)parse_action)``, where:
709
710 - ``op_expr`` is the pyparsing expression for the operator; may also
711 be a string, which will be converted to a Literal; if ``num_operands``
712 is 3, ``op_expr`` is a tuple of two expressions, for the two
713 operators separating the 3 terms
714 - ``num_operands`` is the number of terms for this operator (must be 1,
715 2, or 3)
716 - ``right_left_assoc`` is the indicator whether the operator is right
717 or left associative, using the pyparsing-defined constants
718 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
719 - ``parse_action`` is the parse action to be associated with
720 expressions matching this operator expression (the parse action
721 tuple member may be omitted); if the parse action is passed
722 a tuple or list of functions, this is equivalent to calling
723 ``set_parse_action(*fn)``
724 (:class:`ParserElement.set_parse_action`)
725 - ``lpar`` - expression for matching left-parentheses; if passed as a
726 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
727 an expression (such as ``Literal('(')``), then it will be kept in
728 the parsed results, and grouped with them. (default= ``Suppress('(')``)
729 - ``rpar`` - expression for matching right-parentheses; if passed as a
730 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
731 an expression (such as ``Literal(')')``), then it will be kept in
732 the parsed results, and grouped with them. (default= ``Suppress(')')``)
733
734 Example::
735
736 # simple example of four-function arithmetic with ints and
737 # variable names
738 integer = pyparsing_common.signed_integer
739 varname = pyparsing_common.identifier
740
741 arith_expr = infix_notation(integer | varname,
742 [
743 ('-', 1, OpAssoc.RIGHT),
744 (one_of('* /'), 2, OpAssoc.LEFT),
745 (one_of('+ -'), 2, OpAssoc.LEFT),
746 ])
747
748 arith_expr.run_tests('''
749 5+3*6
750 (5+3)*6
751 -2--11
752 ''', full_dump=False)
753
754 prints::
755
756 5+3*6
757 [[5, '+', [3, '*', 6]]]
758
759 (5+3)*6
760 [[[5, '+', 3], '*', 6]]
761
762 (5+x)*y
763 [[[5, '+', 'x'], '*', 'y']]
764
765 -2--11
766 [[['-', 2], '-', ['-', 11]]]
767 """
768
769 # captive version of FollowedBy that does not do parse actions or capture results names
770 class _FB(FollowedBy):
771 def parseImpl(self, instring, loc, doActions=True):
772 self.expr.try_parse(instring, loc)
773 return loc, []
774
775 _FB.__name__ = "FollowedBy>"
776
777 ret = Forward()
778 if isinstance(lpar, str):
779 lpar = Suppress(lpar)
780 if isinstance(rpar, str):
781 rpar = Suppress(rpar)
782
783 # if lpar and rpar are not suppressed, wrap in group
784 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):
785 lastExpr = base_expr | Group(lpar + ret + rpar)
786 else:
787 lastExpr = base_expr | (lpar + ret + rpar)
788
789 arity: int
790 rightLeftAssoc: opAssoc
791 pa: typing.Optional[ParseAction]
792 opExpr1: ParserElement
793 opExpr2: ParserElement
794 for operDef in op_list:
795 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
796 if isinstance(opExpr, str_type):
797 opExpr = ParserElement._literalStringClass(opExpr)
798 opExpr = typing.cast(ParserElement, opExpr)
799 if arity == 3:
800 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
801 raise ValueError(
802 "if numterms=3, opExpr must be a tuple or list of two expressions"
803 )
804 opExpr1, opExpr2 = opExpr
805 term_name = f"{opExpr1}{opExpr2} term"
806 else:
807 term_name = f"{opExpr} term"
808
809 if not 1 <= arity <= 3:
810 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
811
812 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
813 raise ValueError("operator must indicate right or left associativity")
814
815 thisExpr: ParserElement = Forward().set_name(term_name)
816 thisExpr = typing.cast(Forward, thisExpr)
817 if rightLeftAssoc is OpAssoc.LEFT:
818 if arity == 1:
819 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
820 elif arity == 2:
821 if opExpr is not None:
822 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
823 lastExpr + (opExpr + lastExpr)[1, ...]
824 )
825 else:
826 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
827 elif arity == 3:
828 matchExpr = _FB(
829 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
830 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
831 elif rightLeftAssoc is OpAssoc.RIGHT:
832 if arity == 1:
833 # try to avoid LR with this extra test
834 if not isinstance(opExpr, Opt):
835 opExpr = Opt(opExpr)
836 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
837 elif arity == 2:
838 if opExpr is not None:
839 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
840 lastExpr + (opExpr + thisExpr)[1, ...]
841 )
842 else:
843 matchExpr = _FB(lastExpr + thisExpr) + Group(
844 lastExpr + thisExpr[1, ...]
845 )
846 elif arity == 3:
847 matchExpr = _FB(
848 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
849 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
850 if pa:
851 if isinstance(pa, (tuple, list)):
852 matchExpr.set_parse_action(*pa)
853 else:
854 matchExpr.set_parse_action(pa)
855 thisExpr <<= (matchExpr | lastExpr).setName(term_name)
856 lastExpr = thisExpr
857 ret <<= lastExpr
858 return ret
859
860
861def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
862 """
863 (DEPRECATED - use :class:`IndentedBlock` class instead)
864 Helper method for defining space-delimited indentation blocks,
865 such as those used to define block statements in Python source code.
866
867 Parameters:
868
869 - ``blockStatementExpr`` - expression defining syntax of statement that
870 is repeated within the indented block
871 - ``indentStack`` - list created by caller to manage indentation stack
872 (multiple ``statementWithIndentedBlock`` expressions within a single
873 grammar should share a common ``indentStack``)
874 - ``indent`` - boolean indicating whether block must be indented beyond
875 the current level; set to ``False`` for block of left-most statements
876 (default= ``True``)
877
878 A valid block must contain at least one ``blockStatement``.
879
880 (Note that indentedBlock uses internal parse actions which make it
881 incompatible with packrat parsing.)
882
883 Example::
884
885 data = '''
886 def A(z):
887 A1
888 B = 100
889 G = A2
890 A2
891 A3
892 B
893 def BB(a,b,c):
894 BB1
895 def BBA():
896 bba1
897 bba2
898 bba3
899 C
900 D
901 def spam(x,y):
902 def eggs(z):
903 pass
904 '''
905
906
907 indentStack = [1]
908 stmt = Forward()
909
910 identifier = Word(alphas, alphanums)
911 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
912 func_body = indentedBlock(stmt, indentStack)
913 funcDef = Group(funcDecl + func_body)
914
915 rvalue = Forward()
916 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
917 rvalue << (funcCall | identifier | Word(nums))
918 assignment = Group(identifier + "=" + rvalue)
919 stmt << (funcDef | assignment | identifier)
920
921 module_body = stmt[1, ...]
922
923 parseTree = module_body.parseString(data)
924 parseTree.pprint()
925
926 prints::
927
928 [['def',
929 'A',
930 ['(', 'z', ')'],
931 ':',
932 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
933 'B',
934 ['def',
935 'BB',
936 ['(', 'a', 'b', 'c', ')'],
937 ':',
938 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
939 'C',
940 'D',
941 ['def',
942 'spam',
943 ['(', 'x', 'y', ')'],
944 ':',
945 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
946 """
947 backup_stacks.append(indentStack[:])
948
949 def reset_stack():
950 indentStack[:] = backup_stacks[-1]
951
952 def checkPeerIndent(s, l, t):
953 if l >= len(s):
954 return
955 curCol = col(l, s)
956 if curCol != indentStack[-1]:
957 if curCol > indentStack[-1]:
958 raise ParseException(s, l, "illegal nesting")
959 raise ParseException(s, l, "not a peer entry")
960
961 def checkSubIndent(s, l, t):
962 curCol = col(l, s)
963 if curCol > indentStack[-1]:
964 indentStack.append(curCol)
965 else:
966 raise ParseException(s, l, "not a subentry")
967
968 def checkUnindent(s, l, t):
969 if l >= len(s):
970 return
971 curCol = col(l, s)
972 if not (indentStack and curCol in indentStack):
973 raise ParseException(s, l, "not an unindent")
974 if curCol < indentStack[-1]:
975 indentStack.pop()
976
977 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
978 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
979 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
980 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
981 if indent:
982 smExpr = Group(
983 Opt(NL)
984 + INDENT
985 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
986 + UNDENT
987 )
988 else:
989 smExpr = Group(
990 Opt(NL)
991 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
992 + Opt(UNDENT)
993 )
994
995 # add a parse action to remove backup_stack from list of backups
996 smExpr.add_parse_action(
997 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
998 )
999 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
1000 blockStatementExpr.ignore(_bslash + LineEnd())
1001 return smExpr.set_name("indented block")
1002
1003
1004# it's easy to get these comment structures wrong - they're very common, so may as well make them available
1005c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1006 "C style comment"
1007)
1008"Comment of the form ``/* ... */``"
1009
1010html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1011"Comment of the form ``<!-- ... -->``"
1012
1013rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1014dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1015"Comment of the form ``// ... (to end of line)``"
1016
1017cpp_style_comment = Combine(
1018 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1019).set_name("C++ style comment")
1020"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1021
1022java_style_comment = cpp_style_comment
1023"Same as :class:`cpp_style_comment`"
1024
1025python_style_comment = Regex(r"#.*").set_name("Python style comment")
1026"Comment of the form ``# ... (to end of line)``"
1027
1028
1029# build list of built-in expressions, for future reference if a global default value
1030# gets updated
1031_builtin_exprs: List[ParserElement] = [
1032 v for v in vars().values() if isinstance(v, ParserElement)
1033]
1034
1035
1036# compatibility function, superseded by DelimitedList class
1037def delimited_list(
1038 expr: Union[str, ParserElement],
1039 delim: Union[str, ParserElement] = ",",
1040 combine: bool = False,
1041 min: typing.Optional[int] = None,
1042 max: typing.Optional[int] = None,
1043 *,
1044 allow_trailing_delim: bool = False,
1045) -> ParserElement:
1046 """(DEPRECATED - use :class:`DelimitedList` class)"""
1047 return DelimitedList(
1048 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1049 )
1050
1051
1052# pre-PEP8 compatible names
1053# fmt: off
1054opAssoc = OpAssoc
1055anyOpenTag = any_open_tag
1056anyCloseTag = any_close_tag
1057commonHTMLEntity = common_html_entity
1058cStyleComment = c_style_comment
1059htmlComment = html_comment
1060restOfLine = rest_of_line
1061dblSlashComment = dbl_slash_comment
1062cppStyleComment = cpp_style_comment
1063javaStyleComment = java_style_comment
1064pythonStyleComment = python_style_comment
1065delimitedList = replaced_by_pep8("delimitedList", DelimitedList)
1066delimited_list = replaced_by_pep8("delimited_list", DelimitedList)
1067countedArray = replaced_by_pep8("countedArray", counted_array)
1068matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)
1069matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)
1070oneOf = replaced_by_pep8("oneOf", one_of)
1071dictOf = replaced_by_pep8("dictOf", dict_of)
1072originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)
1073nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)
1074makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)
1075makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)
1076replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)
1077infixNotation = replaced_by_pep8("infixNotation", infix_notation)
1078# fmt: on