Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pyparsing/helpers.py: 31%
314 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:48 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:48 +0000
1# helpers.py
2import html.entities
3import re
4import typing
6from . import __diag__
7from .core import *
8from .util import _bslash, _flatten, _escape_regex_range_chars
11#
12# global helpers
13#
14def delimited_list(
15 expr: Union[str, ParserElement],
16 delim: Union[str, ParserElement] = ",",
17 combine: bool = False,
18 min: typing.Optional[int] = None,
19 max: typing.Optional[int] = None,
20 *,
21 allow_trailing_delim: bool = False,
22) -> ParserElement:
23 """Helper to define a delimited list of expressions - the delimiter
24 defaults to ','. By default, the list elements and delimiters can
25 have intervening whitespace, and comments, but this can be
26 overridden by passing ``combine=True`` in the constructor. If
27 ``combine`` is set to ``True``, the matching tokens are
28 returned as a single token string, with the delimiters included;
29 otherwise, the matching tokens are returned as a list of tokens,
30 with the delimiters suppressed.
32 If ``allow_trailing_delim`` is set to True, then the list may end with
33 a delimiter.
35 Example::
37 delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
38 delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
39 """
40 if isinstance(expr, str_type):
41 expr = ParserElement._literalStringClass(expr)
43 dlName = "{expr} [{delim} {expr}]...{end}".format(
44 expr=str(expr.copy().streamline()),
45 delim=str(delim),
46 end=" [{}]".format(str(delim)) if allow_trailing_delim else "",
47 )
49 if not combine:
50 delim = Suppress(delim)
52 if min is not None:
53 if min < 1:
54 raise ValueError("min must be greater than 0")
55 min -= 1
56 if max is not None:
57 if min is not None and max <= min:
58 raise ValueError("max must be greater than, or equal to min")
59 max -= 1
60 delimited_list_expr = expr + (delim + expr)[min, max]
62 if allow_trailing_delim:
63 delimited_list_expr += Opt(delim)
65 if combine:
66 return Combine(delimited_list_expr).set_name(dlName)
67 else:
68 return delimited_list_expr.set_name(dlName)
71def counted_array(
72 expr: ParserElement,
73 int_expr: typing.Optional[ParserElement] = None,
74 *,
75 intExpr: typing.Optional[ParserElement] = None,
76) -> ParserElement:
77 """Helper to define a counted list of expressions.
79 This helper defines a pattern of the form::
81 integer expr expr expr...
83 where the leading integer tells how many expr expressions follow.
84 The matched tokens returns the array of expr tokens as a list - the
85 leading count token is suppressed.
87 If ``int_expr`` is specified, it should be a pyparsing expression
88 that produces an integer value.
90 Example::
92 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
94 # in this parser, the leading integer value is given in binary,
95 # '10' indicating that 2 values are in the array
96 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
97 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
99 # if other fields must be parsed after the count but before the
100 # list items, give the fields results names and they will
101 # be preserved in the returned ParseResults:
102 count_with_metadata = integer + Word(alphas)("type")
103 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
104 result = typed_array.parse_string("3 bool True True False")
105 print(result.dump())
107 # prints
108 # ['True', 'True', 'False']
109 # - items: ['True', 'True', 'False']
110 # - type: 'bool'
111 """
112 intExpr = intExpr or int_expr
113 array_expr = Forward()
115 def count_field_parse_action(s, l, t):
116 nonlocal array_expr
117 n = t[0]
118 array_expr <<= (expr * n) if n else Empty()
119 # clear list contents, but keep any named results
120 del t[:]
122 if intExpr is None:
123 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
124 else:
125 intExpr = intExpr.copy()
126 intExpr.set_name("arrayLen")
127 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
128 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
131def match_previous_literal(expr: ParserElement) -> ParserElement:
132 """Helper to define an expression that is indirectly defined from
133 the tokens matched in a previous expression, that is, it looks for
134 a 'repeat' of a previous expression. For example::
136 first = Word(nums)
137 second = match_previous_literal(first)
138 match_expr = first + ":" + second
140 will match ``"1:1"``, but not ``"1:2"``. Because this
141 matches a previous literal, will also match the leading
142 ``"1:1"`` in ``"1:10"``. If this is not desired, use
143 :class:`match_previous_expr`. Do *not* use with packrat parsing
144 enabled.
145 """
146 rep = Forward()
148 def copy_token_to_repeater(s, l, t):
149 if t:
150 if len(t) == 1:
151 rep << t[0]
152 else:
153 # flatten t tokens
154 tflat = _flatten(t.as_list())
155 rep << And(Literal(tt) for tt in tflat)
156 else:
157 rep << Empty()
159 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
160 rep.set_name("(prev) " + str(expr))
161 return rep
164def match_previous_expr(expr: ParserElement) -> ParserElement:
165 """Helper to define an expression that is indirectly defined from
166 the tokens matched in a previous expression, that is, it looks for
167 a 'repeat' of a previous expression. For example::
169 first = Word(nums)
170 second = match_previous_expr(first)
171 match_expr = first + ":" + second
173 will match ``"1:1"``, but not ``"1:2"``. Because this
174 matches by expressions, will *not* match the leading ``"1:1"``
175 in ``"1:10"``; the expressions are evaluated first, and then
176 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
177 with packrat parsing enabled.
178 """
179 rep = Forward()
180 e2 = expr.copy()
181 rep <<= e2
183 def copy_token_to_repeater(s, l, t):
184 matchTokens = _flatten(t.as_list())
186 def must_match_these_tokens(s, l, t):
187 theseTokens = _flatten(t.as_list())
188 if theseTokens != matchTokens:
189 raise ParseException(
190 s, l, "Expected {}, found{}".format(matchTokens, theseTokens)
191 )
193 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
195 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
196 rep.set_name("(prev) " + str(expr))
197 return rep
200def one_of(
201 strs: Union[typing.Iterable[str], str],
202 caseless: bool = False,
203 use_regex: bool = True,
204 as_keyword: bool = False,
205 *,
206 useRegex: bool = True,
207 asKeyword: bool = False,
208) -> ParserElement:
209 """Helper to quickly define a set of alternative :class:`Literal` s,
210 and makes sure to do longest-first testing when there is a conflict,
211 regardless of the input order, but returns
212 a :class:`MatchFirst` for best performance.
214 Parameters:
216 - ``strs`` - a string of space-delimited literals, or a collection of
217 string literals
218 - ``caseless`` - treat all literals as caseless - (default= ``False``)
219 - ``use_regex`` - as an optimization, will
220 generate a :class:`Regex` object; otherwise, will generate
221 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
222 creating a :class:`Regex` raises an exception) - (default= ``True``)
223 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
224 generated expressions - (default= ``False``)
225 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
226 but will be removed in a future release
228 Example::
230 comp_oper = one_of("< = > <= >= !=")
231 var = Word(alphas)
232 number = Word(nums)
233 term = var | number
234 comparison_expr = term + comp_oper + term
235 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
237 prints::
239 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
240 """
241 asKeyword = asKeyword or as_keyword
242 useRegex = useRegex and use_regex
244 if (
245 isinstance(caseless, str_type)
246 and __diag__.warn_on_multiple_string_args_to_oneof
247 ):
248 warnings.warn(
249 "More than one string argument passed to one_of, pass"
250 " choices as a list or space-delimited string",
251 stacklevel=2,
252 )
254 if caseless:
255 isequal = lambda a, b: a.upper() == b.upper()
256 masks = lambda a, b: b.upper().startswith(a.upper())
257 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
258 else:
259 isequal = lambda a, b: a == b
260 masks = lambda a, b: b.startswith(a)
261 parseElementClass = Keyword if asKeyword else Literal
263 symbols: List[str] = []
264 if isinstance(strs, str_type):
265 symbols = strs.split()
266 elif isinstance(strs, Iterable):
267 symbols = list(strs)
268 else:
269 raise TypeError("Invalid argument to one_of, expected string or iterable")
270 if not symbols:
271 return NoMatch()
273 # reorder given symbols to take care to avoid masking longer choices with shorter ones
274 # (but only if the given symbols are not just single characters)
275 if any(len(sym) > 1 for sym in symbols):
276 i = 0
277 while i < len(symbols) - 1:
278 cur = symbols[i]
279 for j, other in enumerate(symbols[i + 1 :]):
280 if isequal(other, cur):
281 del symbols[i + j + 1]
282 break
283 elif masks(cur, other):
284 del symbols[i + j + 1]
285 symbols.insert(i, other)
286 break
287 else:
288 i += 1
290 if useRegex:
291 re_flags: int = re.IGNORECASE if caseless else 0
293 try:
294 if all(len(sym) == 1 for sym in symbols):
295 # symbols are just single characters, create range regex pattern
296 patt = "[{}]".format(
297 "".join(_escape_regex_range_chars(sym) for sym in symbols)
298 )
299 else:
300 patt = "|".join(re.escape(sym) for sym in symbols)
302 # wrap with \b word break markers if defining as keywords
303 if asKeyword:
304 patt = r"\b(?:{})\b".format(patt)
306 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
308 if caseless:
309 # add parse action to return symbols as specified, not in random
310 # casing as found in input string
311 symbol_map = {sym.lower(): sym for sym in symbols}
312 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
314 return ret
316 except re.error:
317 warnings.warn(
318 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
319 )
321 # last resort, just use MatchFirst
322 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
323 " | ".join(symbols)
324 )
327def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
328 """Helper to easily and clearly define a dictionary by specifying
329 the respective patterns for the key and value. Takes care of
330 defining the :class:`Dict`, :class:`ZeroOrMore`, and
331 :class:`Group` tokens in the proper order. The key pattern
332 can include delimiting markers or punctuation, as long as they are
333 suppressed, thereby leaving the significant key text. The value
334 pattern can include named results, so that the :class:`Dict` results
335 can include named token fields.
337 Example::
339 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
340 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
341 print(attr_expr[1, ...].parse_string(text).dump())
343 attr_label = label
344 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
346 # similar to Dict, but simpler call format
347 result = dict_of(attr_label, attr_value).parse_string(text)
348 print(result.dump())
349 print(result['shape'])
350 print(result.shape) # object attribute access works too
351 print(result.as_dict())
353 prints::
355 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
356 - color: 'light blue'
357 - posn: 'upper left'
358 - shape: 'SQUARE'
359 - texture: 'burlap'
360 SQUARE
361 SQUARE
362 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
363 """
364 return Dict(OneOrMore(Group(key + value)))
367def original_text_for(
368 expr: ParserElement, as_string: bool = True, *, asString: bool = True
369) -> ParserElement:
370 """Helper to return the original, untokenized text for a given
371 expression. Useful to restore the parsed fields of an HTML start
372 tag into the raw tag text itself, or to revert separate tokens with
373 intervening whitespace back to the original matching input text. By
374 default, returns astring containing the original parsed text.
376 If the optional ``as_string`` argument is passed as
377 ``False``, then the return value is
378 a :class:`ParseResults` containing any results names that
379 were originally matched, and a single token containing the original
380 matched text from the input string. So if the expression passed to
381 :class:`original_text_for` contains expressions with defined
382 results names, you must set ``as_string`` to ``False`` if you
383 want to preserve those results name values.
385 The ``asString`` pre-PEP8 argument is retained for compatibility,
386 but will be removed in a future release.
388 Example::
390 src = "this is test <b> bold <i>text</i> </b> normal text "
391 for tag in ("b", "i"):
392 opener, closer = make_html_tags(tag)
393 patt = original_text_for(opener + SkipTo(closer) + closer)
394 print(patt.search_string(src)[0])
396 prints::
398 ['<b> bold <i>text</i> </b>']
399 ['<i>text</i>']
400 """
401 asString = asString and as_string
403 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
404 endlocMarker = locMarker.copy()
405 endlocMarker.callPreparse = False
406 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
407 if asString:
408 extractText = lambda s, l, t: s[t._original_start : t._original_end]
409 else:
411 def extractText(s, l, t):
412 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
414 matchExpr.set_parse_action(extractText)
415 matchExpr.ignoreExprs = expr.ignoreExprs
416 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
417 return matchExpr
420def ungroup(expr: ParserElement) -> ParserElement:
421 """Helper to undo pyparsing's default grouping of And expressions,
422 even if all but one are non-empty.
423 """
424 return TokenConverter(expr).add_parse_action(lambda t: t[0])
427def locatedExpr(expr: ParserElement) -> ParserElement:
428 """
429 (DEPRECATED - future code should use the Located class)
430 Helper to decorate a returned token with its starting and ending
431 locations in the input string.
433 This helper adds the following results names:
435 - ``locn_start`` - location where matched expression begins
436 - ``locn_end`` - location where matched expression ends
437 - ``value`` - the actual parsed results
439 Be careful if the input text contains ``<TAB>`` characters, you
440 may want to call :class:`ParserElement.parseWithTabs`
442 Example::
444 wd = Word(alphas)
445 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
446 print(match)
448 prints::
450 [[0, 'ljsdf', 5]]
451 [[8, 'lksdjjf', 15]]
452 [[18, 'lkkjj', 23]]
453 """
454 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
455 return Group(
456 locator("locn_start")
457 + expr("value")
458 + locator.copy().leaveWhitespace()("locn_end")
459 )
462def nested_expr(
463 opener: Union[str, ParserElement] = "(",
464 closer: Union[str, ParserElement] = ")",
465 content: typing.Optional[ParserElement] = None,
466 ignore_expr: ParserElement = quoted_string(),
467 *,
468 ignoreExpr: ParserElement = quoted_string(),
469) -> ParserElement:
470 """Helper method for defining nested lists enclosed in opening and
471 closing delimiters (``"("`` and ``")"`` are the default).
473 Parameters:
474 - ``opener`` - opening character for a nested list
475 (default= ``"("``); can also be a pyparsing expression
476 - ``closer`` - closing character for a nested list
477 (default= ``")"``); can also be a pyparsing expression
478 - ``content`` - expression for items within the nested lists
479 (default= ``None``)
480 - ``ignore_expr`` - expression for ignoring opening and closing delimiters
481 (default= :class:`quoted_string`)
482 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
483 but will be removed in a future release
485 If an expression is not provided for the content argument, the
486 nested expression will capture all whitespace-delimited content
487 between delimiters as a list of separate values.
489 Use the ``ignore_expr`` argument to define expressions that may
490 contain opening or closing characters that should not be treated as
491 opening or closing characters for nesting, such as quoted_string or
492 a comment expression. Specify multiple expressions using an
493 :class:`Or` or :class:`MatchFirst`. The default is
494 :class:`quoted_string`, but if no expressions are to be ignored, then
495 pass ``None`` for this argument.
497 Example::
499 data_type = one_of("void int short long char float double")
500 decl_data_type = Combine(data_type + Opt(Word('*')))
501 ident = Word(alphas+'_', alphanums+'_')
502 number = pyparsing_common.number
503 arg = Group(decl_data_type + ident)
504 LPAR, RPAR = map(Suppress, "()")
506 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
508 c_function = (decl_data_type("type")
509 + ident("name")
510 + LPAR + Opt(delimited_list(arg), [])("args") + RPAR
511 + code_body("body"))
512 c_function.ignore(c_style_comment)
514 source_code = '''
515 int is_odd(int x) {
516 return (x%2);
517 }
519 int dec_to_hex(char hchar) {
520 if (hchar >= '0' && hchar <= '9') {
521 return (ord(hchar)-ord('0'));
522 } else {
523 return (10+ord(hchar)-ord('A'));
524 }
525 }
526 '''
527 for func in c_function.search_string(source_code):
528 print("%(name)s (%(type)s) args: %(args)s" % func)
531 prints::
533 is_odd (int) args: [['int', 'x']]
534 dec_to_hex (int) args: [['char', 'hchar']]
535 """
536 if ignoreExpr != ignore_expr:
537 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
538 if opener == closer:
539 raise ValueError("opening and closing strings cannot be the same")
540 if content is None:
541 if isinstance(opener, str_type) and isinstance(closer, str_type):
542 if len(opener) == 1 and len(closer) == 1:
543 if ignoreExpr is not None:
544 content = Combine(
545 OneOrMore(
546 ~ignoreExpr
547 + CharsNotIn(
548 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
549 exact=1,
550 )
551 )
552 ).set_parse_action(lambda t: t[0].strip())
553 else:
554 content = empty.copy() + CharsNotIn(
555 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
556 ).set_parse_action(lambda t: t[0].strip())
557 else:
558 if ignoreExpr is not None:
559 content = Combine(
560 OneOrMore(
561 ~ignoreExpr
562 + ~Literal(opener)
563 + ~Literal(closer)
564 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
565 )
566 ).set_parse_action(lambda t: t[0].strip())
567 else:
568 content = Combine(
569 OneOrMore(
570 ~Literal(opener)
571 + ~Literal(closer)
572 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
573 )
574 ).set_parse_action(lambda t: t[0].strip())
575 else:
576 raise ValueError(
577 "opening and closing arguments must be strings if no content expression is given"
578 )
579 ret = Forward()
580 if ignoreExpr is not None:
581 ret <<= Group(
582 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
583 )
584 else:
585 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
586 ret.set_name("nested %s%s expression" % (opener, closer))
587 return ret
590def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
591 """Internal helper to construct opening and closing tag expressions, given a tag name"""
592 if isinstance(tagStr, str_type):
593 resname = tagStr
594 tagStr = Keyword(tagStr, caseless=not xml)
595 else:
596 resname = tagStr.name
598 tagAttrName = Word(alphas, alphanums + "_-:")
599 if xml:
600 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
601 openTag = (
602 suppress_LT
603 + tagStr("tag")
604 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
605 + Opt("/", default=[False])("empty").set_parse_action(
606 lambda s, l, t: t[0] == "/"
607 )
608 + suppress_GT
609 )
610 else:
611 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
612 printables, exclude_chars=">"
613 )
614 openTag = (
615 suppress_LT
616 + tagStr("tag")
617 + Dict(
618 ZeroOrMore(
619 Group(
620 tagAttrName.set_parse_action(lambda t: t[0].lower())
621 + Opt(Suppress("=") + tagAttrValue)
622 )
623 )
624 )
625 + Opt("/", default=[False])("empty").set_parse_action(
626 lambda s, l, t: t[0] == "/"
627 )
628 + suppress_GT
629 )
630 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
632 openTag.set_name("<%s>" % resname)
633 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
634 openTag.add_parse_action(
635 lambda t: t.__setitem__(
636 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
637 )
638 )
639 closeTag = closeTag(
640 "end" + "".join(resname.replace(":", " ").title().split())
641 ).set_name("</%s>" % resname)
642 openTag.tag = resname
643 closeTag.tag = resname
644 openTag.tag_body = SkipTo(closeTag())
645 return openTag, closeTag
648def make_html_tags(
649 tag_str: Union[str, ParserElement]
650) -> Tuple[ParserElement, ParserElement]:
651 """Helper to construct opening and closing tag expressions for HTML,
652 given a tag name. Matches tags in either upper or lower case,
653 attributes with namespaces and with quoted or unquoted values.
655 Example::
657 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
658 # make_html_tags returns pyparsing expressions for the opening and
659 # closing tags as a 2-tuple
660 a, a_end = make_html_tags("A")
661 link_expr = a + SkipTo(a_end)("link_text") + a_end
663 for link in link_expr.search_string(text):
664 # attributes in the <A> tag (like "href" shown here) are
665 # also accessible as named results
666 print(link.link_text, '->', link.href)
668 prints::
670 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
671 """
672 return _makeTags(tag_str, False)
675def make_xml_tags(
676 tag_str: Union[str, ParserElement]
677) -> Tuple[ParserElement, ParserElement]:
678 """Helper to construct opening and closing tag expressions for XML,
679 given a tag name. Matches tags only in the given upper/lower case.
681 Example: similar to :class:`make_html_tags`
682 """
683 return _makeTags(tag_str, True)
686any_open_tag: ParserElement
687any_close_tag: ParserElement
688any_open_tag, any_close_tag = make_html_tags(
689 Word(alphas, alphanums + "_:").set_name("any tag")
690)
692_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
693common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
694 "common HTML entity"
695)
698def replace_html_entity(t):
699 """Helper parser action to replace common HTML entities with their special characters"""
700 return _htmlEntityMap.get(t.entity)
703class OpAssoc(Enum):
704 LEFT = 1
705 RIGHT = 2
708InfixNotationOperatorArgType = Union[
709 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
710]
711InfixNotationOperatorSpec = Union[
712 Tuple[
713 InfixNotationOperatorArgType,
714 int,
715 OpAssoc,
716 typing.Optional[ParseAction],
717 ],
718 Tuple[
719 InfixNotationOperatorArgType,
720 int,
721 OpAssoc,
722 ],
723]
726def infix_notation(
727 base_expr: ParserElement,
728 op_list: List[InfixNotationOperatorSpec],
729 lpar: Union[str, ParserElement] = Suppress("("),
730 rpar: Union[str, ParserElement] = Suppress(")"),
731) -> ParserElement:
732 """Helper method for constructing grammars of expressions made up of
733 operators working in a precedence hierarchy. Operators may be unary
734 or binary, left- or right-associative. Parse actions can also be
735 attached to operator expressions. The generated parser will also
736 recognize the use of parentheses to override operator precedences
737 (see example below).
739 Note: if you define a deep operator list, you may see performance
740 issues when using infix_notation. See
741 :class:`ParserElement.enable_packrat` for a mechanism to potentially
742 improve your parser performance.
744 Parameters:
745 - ``base_expr`` - expression representing the most basic operand to
746 be used in the expression
747 - ``op_list`` - list of tuples, one for each operator precedence level
748 in the expression grammar; each tuple is of the form ``(op_expr,
749 num_operands, right_left_assoc, (optional)parse_action)``, where:
751 - ``op_expr`` is the pyparsing expression for the operator; may also
752 be a string, which will be converted to a Literal; if ``num_operands``
753 is 3, ``op_expr`` is a tuple of two expressions, for the two
754 operators separating the 3 terms
755 - ``num_operands`` is the number of terms for this operator (must be 1,
756 2, or 3)
757 - ``right_left_assoc`` is the indicator whether the operator is right
758 or left associative, using the pyparsing-defined constants
759 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
760 - ``parse_action`` is the parse action to be associated with
761 expressions matching this operator expression (the parse action
762 tuple member may be omitted); if the parse action is passed
763 a tuple or list of functions, this is equivalent to calling
764 ``set_parse_action(*fn)``
765 (:class:`ParserElement.set_parse_action`)
766 - ``lpar`` - expression for matching left-parentheses; if passed as a
767 str, then will be parsed as Suppress(lpar). If lpar is passed as
768 an expression (such as ``Literal('(')``), then it will be kept in
769 the parsed results, and grouped with them. (default= ``Suppress('(')``)
770 - ``rpar`` - expression for matching right-parentheses; if passed as a
771 str, then will be parsed as Suppress(rpar). If rpar is passed as
772 an expression (such as ``Literal(')')``), then it will be kept in
773 the parsed results, and grouped with them. (default= ``Suppress(')')``)
775 Example::
777 # simple example of four-function arithmetic with ints and
778 # variable names
779 integer = pyparsing_common.signed_integer
780 varname = pyparsing_common.identifier
782 arith_expr = infix_notation(integer | varname,
783 [
784 ('-', 1, OpAssoc.RIGHT),
785 (one_of('* /'), 2, OpAssoc.LEFT),
786 (one_of('+ -'), 2, OpAssoc.LEFT),
787 ])
789 arith_expr.run_tests('''
790 5+3*6
791 (5+3)*6
792 -2--11
793 ''', full_dump=False)
795 prints::
797 5+3*6
798 [[5, '+', [3, '*', 6]]]
800 (5+3)*6
801 [[[5, '+', 3], '*', 6]]
803 -2--11
804 [[['-', 2], '-', ['-', 11]]]
805 """
806 # captive version of FollowedBy that does not do parse actions or capture results names
807 class _FB(FollowedBy):
808 def parseImpl(self, instring, loc, doActions=True):
809 self.expr.try_parse(instring, loc)
810 return loc, []
812 _FB.__name__ = "FollowedBy>"
814 ret = Forward()
815 if isinstance(lpar, str):
816 lpar = Suppress(lpar)
817 if isinstance(rpar, str):
818 rpar = Suppress(rpar)
820 # if lpar and rpar are not suppressed, wrap in group
821 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
822 lastExpr = base_expr | Group(lpar + ret + rpar)
823 else:
824 lastExpr = base_expr | (lpar + ret + rpar)
826 for i, operDef in enumerate(op_list):
827 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]
828 if isinstance(opExpr, str_type):
829 opExpr = ParserElement._literalStringClass(opExpr)
830 if arity == 3:
831 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
832 raise ValueError(
833 "if numterms=3, opExpr must be a tuple or list of two expressions"
834 )
835 opExpr1, opExpr2 = opExpr
836 term_name = "{}{} term".format(opExpr1, opExpr2)
837 else:
838 term_name = "{} term".format(opExpr)
840 if not 1 <= arity <= 3:
841 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
843 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
844 raise ValueError("operator must indicate right or left associativity")
846 thisExpr: Forward = Forward().set_name(term_name)
847 if rightLeftAssoc is OpAssoc.LEFT:
848 if arity == 1:
849 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
850 elif arity == 2:
851 if opExpr is not None:
852 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
853 lastExpr + (opExpr + lastExpr)[1, ...]
854 )
855 else:
856 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
857 elif arity == 3:
858 matchExpr = _FB(
859 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
860 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
861 elif rightLeftAssoc is OpAssoc.RIGHT:
862 if arity == 1:
863 # try to avoid LR with this extra test
864 if not isinstance(opExpr, Opt):
865 opExpr = Opt(opExpr)
866 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
867 elif arity == 2:
868 if opExpr is not None:
869 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
870 lastExpr + (opExpr + thisExpr)[1, ...]
871 )
872 else:
873 matchExpr = _FB(lastExpr + thisExpr) + Group(
874 lastExpr + thisExpr[1, ...]
875 )
876 elif arity == 3:
877 matchExpr = _FB(
878 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
879 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
880 if pa:
881 if isinstance(pa, (tuple, list)):
882 matchExpr.set_parse_action(*pa)
883 else:
884 matchExpr.set_parse_action(pa)
885 thisExpr <<= (matchExpr | lastExpr).setName(term_name)
886 lastExpr = thisExpr
887 ret <<= lastExpr
888 return ret
891def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
892 """
893 (DEPRECATED - use IndentedBlock class instead)
894 Helper method for defining space-delimited indentation blocks,
895 such as those used to define block statements in Python source code.
897 Parameters:
899 - ``blockStatementExpr`` - expression defining syntax of statement that
900 is repeated within the indented block
901 - ``indentStack`` - list created by caller to manage indentation stack
902 (multiple ``statementWithIndentedBlock`` expressions within a single
903 grammar should share a common ``indentStack``)
904 - ``indent`` - boolean indicating whether block must be indented beyond
905 the current level; set to ``False`` for block of left-most statements
906 (default= ``True``)
908 A valid block must contain at least one ``blockStatement``.
910 (Note that indentedBlock uses internal parse actions which make it
911 incompatible with packrat parsing.)
913 Example::
915 data = '''
916 def A(z):
917 A1
918 B = 100
919 G = A2
920 A2
921 A3
922 B
923 def BB(a,b,c):
924 BB1
925 def BBA():
926 bba1
927 bba2
928 bba3
929 C
930 D
931 def spam(x,y):
932 def eggs(z):
933 pass
934 '''
937 indentStack = [1]
938 stmt = Forward()
940 identifier = Word(alphas, alphanums)
941 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
942 func_body = indentedBlock(stmt, indentStack)
943 funcDef = Group(funcDecl + func_body)
945 rvalue = Forward()
946 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
947 rvalue << (funcCall | identifier | Word(nums))
948 assignment = Group(identifier + "=" + rvalue)
949 stmt << (funcDef | assignment | identifier)
951 module_body = stmt[1, ...]
953 parseTree = module_body.parseString(data)
954 parseTree.pprint()
956 prints::
958 [['def',
959 'A',
960 ['(', 'z', ')'],
961 ':',
962 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
963 'B',
964 ['def',
965 'BB',
966 ['(', 'a', 'b', 'c', ')'],
967 ':',
968 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
969 'C',
970 'D',
971 ['def',
972 'spam',
973 ['(', 'x', 'y', ')'],
974 ':',
975 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
976 """
977 backup_stacks.append(indentStack[:])
979 def reset_stack():
980 indentStack[:] = backup_stacks[-1]
982 def checkPeerIndent(s, l, t):
983 if l >= len(s):
984 return
985 curCol = col(l, s)
986 if curCol != indentStack[-1]:
987 if curCol > indentStack[-1]:
988 raise ParseException(s, l, "illegal nesting")
989 raise ParseException(s, l, "not a peer entry")
991 def checkSubIndent(s, l, t):
992 curCol = col(l, s)
993 if curCol > indentStack[-1]:
994 indentStack.append(curCol)
995 else:
996 raise ParseException(s, l, "not a subentry")
998 def checkUnindent(s, l, t):
999 if l >= len(s):
1000 return
1001 curCol = col(l, s)
1002 if not (indentStack and curCol in indentStack):
1003 raise ParseException(s, l, "not an unindent")
1004 if curCol < indentStack[-1]:
1005 indentStack.pop()
1007 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
1008 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
1009 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
1010 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
1011 if indent:
1012 smExpr = Group(
1013 Opt(NL)
1014 + INDENT
1015 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1016 + UNDENT
1017 )
1018 else:
1019 smExpr = Group(
1020 Opt(NL)
1021 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
1022 + Opt(UNDENT)
1023 )
1025 # add a parse action to remove backup_stack from list of backups
1026 smExpr.add_parse_action(
1027 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
1028 )
1029 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
1030 blockStatementExpr.ignore(_bslash + LineEnd())
1031 return smExpr.set_name("indented block")
1034# it's easy to get these comment structures wrong - they're very common, so may as well make them available
1035c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1036 "C style comment"
1037)
1038"Comment of the form ``/* ... */``"
1040html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1041"Comment of the form ``<!-- ... -->``"
1043rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1044dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1045"Comment of the form ``// ... (to end of line)``"
1047cpp_style_comment = Combine(
1048 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1049).set_name("C++ style comment")
1050"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1052java_style_comment = cpp_style_comment
1053"Same as :class:`cpp_style_comment`"
1055python_style_comment = Regex(r"#.*").set_name("Python style comment")
1056"Comment of the form ``# ... (to end of line)``"
1059# build list of built-in expressions, for future reference if a global default value
1060# gets updated
1061_builtin_exprs: List[ParserElement] = [
1062 v for v in vars().values() if isinstance(v, ParserElement)
1063]
1066# pre-PEP8 compatible names
1067delimitedList = delimited_list
1068countedArray = counted_array
1069matchPreviousLiteral = match_previous_literal
1070matchPreviousExpr = match_previous_expr
1071oneOf = one_of
1072dictOf = dict_of
1073originalTextFor = original_text_for
1074nestedExpr = nested_expr
1075makeHTMLTags = make_html_tags
1076makeXMLTags = make_xml_tags
1077anyOpenTag, anyCloseTag = any_open_tag, any_close_tag
1078commonHTMLEntity = common_html_entity
1079replaceHTMLEntity = replace_html_entity
1080opAssoc = OpAssoc
1081infixNotation = infix_notation
1082cStyleComment = c_style_comment
1083htmlComment = html_comment
1084restOfLine = rest_of_line
1085dblSlashComment = dbl_slash_comment
1086cppStyleComment = cpp_style_comment
1087javaStyleComment = java_style_comment
1088pythonStyleComment = python_style_comment