1# results.py
2from __future__ import annotations
3
4import collections
5from collections.abc import (
6 MutableMapping,
7 Mapping,
8 MutableSequence,
9 Iterator,
10 Iterable,
11)
12import pprint
13from typing import Any
14
15from .util import replaced_by_pep8
16
17
18str_type: tuple[type, ...] = (str, bytes)
19_generator_type = type((_ for _ in ()))
20
21
22class _ParseResultsWithOffset:
23 tup: tuple[ParseResults, int]
24 __slots__ = ["tup"]
25
26 def __init__(self, p1: ParseResults, p2: int) -> None:
27 self.tup: tuple[ParseResults, int] = (p1, p2)
28
29 def __getitem__(self, i):
30 return self.tup[i]
31
32 def __getstate__(self):
33 return self.tup
34
35 def __setstate__(self, *args):
36 self.tup = args[0]
37
38
39class ParseResults:
40 """Structured parse results, to provide multiple means of access to
41 the parsed data:
42
43 - as a list (``len(results)``)
44 - by list index (``results[0], results[1]``, etc.)
45 - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
46
47 Example::
48
49 integer = Word(nums)
50 date_str = (integer.set_results_name("year") + '/'
51 + integer.set_results_name("month") + '/'
52 + integer.set_results_name("day"))
53 # equivalent form:
54 # date_str = (integer("year") + '/'
55 # + integer("month") + '/'
56 # + integer("day"))
57
58 # parse_string returns a ParseResults object
59 result = date_str.parse_string("1999/12/31")
60
61 def test(s, fn=repr):
62 print(f"{s} -> {fn(eval(s))}")
63 test("list(result)")
64 test("result[0]")
65 test("result['month']")
66 test("result.day")
67 test("'month' in result")
68 test("'minutes' in result")
69 test("result.dump()", str)
70
71 prints::
72
73 list(result) -> ['1999', '/', '12', '/', '31']
74 result[0] -> '1999'
75 result['month'] -> '12'
76 result.day -> '31'
77 'month' in result -> True
78 'minutes' in result -> False
79 result.dump() -> ['1999', '/', '12', '/', '31']
80 - day: '31'
81 - month: '12'
82 - year: '1999'
83 """
84
85 _null_values: tuple[Any, ...] = (None, [], ())
86
87 _name: str
88 _parent: ParseResults
89 _all_names: set[str]
90 _modal: bool
91 _toklist: list[Any]
92 _tokdict: dict[str, Any]
93
94 __slots__ = (
95 "_name",
96 "_parent",
97 "_all_names",
98 "_modal",
99 "_toklist",
100 "_tokdict",
101 )
102
103 class List(list):
104 """
105 Simple wrapper class to distinguish parsed list results that should be preserved
106 as actual Python lists, instead of being converted to :class:`ParseResults`::
107
108 LBRACK, RBRACK = map(pp.Suppress, "[]")
109 element = pp.Forward()
110 item = ppc.integer
111 element_list = LBRACK + pp.DelimitedList(element) + RBRACK
112
113 # add parse actions to convert from ParseResults to actual Python collection types
114 def as_python_list(t):
115 return pp.ParseResults.List(t.as_list())
116 element_list.add_parse_action(as_python_list)
117
118 element <<= item | element_list
119
120 element.run_tests('''
121 100
122 [2,3,4]
123 [[2, 1],3,4]
124 [(2, 1),3,4]
125 (2,3,4)
126 ''', post_parse=lambda s, r: (r[0], type(r[0])))
127
128 prints::
129
130 100
131 (100, <class 'int'>)
132
133 [2,3,4]
134 ([2, 3, 4], <class 'list'>)
135
136 [[2, 1],3,4]
137 ([[2, 1], 3, 4], <class 'list'>)
138
139 (Used internally by :class:`Group` when `aslist=True`.)
140 """
141
142 def __new__(cls, contained=None):
143 if contained is None:
144 contained = []
145
146 if not isinstance(contained, list):
147 raise TypeError(
148 f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
149 )
150
151 return list.__new__(cls)
152
153 def __new__(cls, toklist=None, name=None, **kwargs):
154 if isinstance(toklist, ParseResults):
155 return toklist
156 self = object.__new__(cls)
157 self._name = None
158 self._parent = None
159 self._all_names = set()
160
161 if toklist is None:
162 self._toklist = []
163 elif isinstance(toklist, (list, _generator_type)):
164 self._toklist = (
165 [toklist[:]]
166 if isinstance(toklist, ParseResults.List)
167 else list(toklist)
168 )
169 else:
170 self._toklist = [toklist]
171 self._tokdict = dict()
172 return self
173
174 # Performance tuning: we construct a *lot* of these, so keep this
175 # constructor as small and fast as possible
176 def __init__(
177 self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
178 ) -> None:
179 self._tokdict: dict[str, _ParseResultsWithOffset]
180 self._modal = modal
181
182 if name is None or name == "":
183 return
184
185 if isinstance(name, int):
186 name = str(name)
187
188 if not modal:
189 self._all_names = {name}
190
191 self._name = name
192
193 if toklist in self._null_values:
194 return
195
196 if isinstance(toklist, (str_type, type)):
197 toklist = [toklist]
198
199 if asList:
200 if isinstance(toklist, ParseResults):
201 self[name] = _ParseResultsWithOffset(ParseResults(toklist._toklist), 0)
202 else:
203 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
204 self[name]._name = name
205 return
206
207 try:
208 self[name] = toklist[0]
209 except (KeyError, TypeError, IndexError):
210 if toklist is not self:
211 self[name] = toklist
212 else:
213 self._name = name
214
215 def __getitem__(self, i):
216 if isinstance(i, (int, slice)):
217 return self._toklist[i]
218
219 if i not in self._all_names:
220 return self._tokdict[i][-1][0]
221
222 return ParseResults([v[0] for v in self._tokdict[i]])
223
224 def __setitem__(self, k, v, isinstance=isinstance):
225 if isinstance(v, _ParseResultsWithOffset):
226 self._tokdict[k] = self._tokdict.get(k, list()) + [v]
227 sub = v[0]
228 elif isinstance(k, (int, slice)):
229 self._toklist[k] = v
230 sub = v
231 else:
232 self._tokdict[k] = self._tokdict.get(k, []) + [
233 _ParseResultsWithOffset(v, 0)
234 ]
235 sub = v
236 if isinstance(sub, ParseResults):
237 sub._parent = self
238
239 def __delitem__(self, i):
240 if not isinstance(i, (int, slice)):
241 del self._tokdict[i]
242 return
243
244 mylen = len(self._toklist)
245 del self._toklist[i]
246
247 # convert int to slice
248 if isinstance(i, int):
249 if i < 0:
250 i += mylen
251 i = slice(i, i + 1)
252 # get removed indices
253 removed = list(range(*i.indices(mylen)))
254 removed.reverse()
255 # fixup indices in token dictionary
256 for occurrences in self._tokdict.values():
257 for j in removed:
258 for k, (value, position) in enumerate(occurrences):
259 occurrences[k] = _ParseResultsWithOffset(
260 value, position - (position > j)
261 )
262
263 def __contains__(self, k) -> bool:
264 return k in self._tokdict
265
266 def __len__(self) -> int:
267 return len(self._toklist)
268
269 def __bool__(self) -> bool:
270 return not not (self._toklist or self._tokdict)
271
272 def __iter__(self) -> Iterator:
273 return iter(self._toklist)
274
275 def __reversed__(self) -> Iterator:
276 return iter(self._toklist[::-1])
277
278 def keys(self):
279 return iter(self._tokdict)
280
281 def values(self):
282 return (self[k] for k in self.keys())
283
284 def items(self):
285 return ((k, self[k]) for k in self.keys())
286
287 def haskeys(self) -> bool:
288 """
289 Since ``keys()`` returns an iterator, this method is helpful in bypassing
290 code that looks for the existence of any defined results names."""
291 return not not self._tokdict
292
293 def pop(self, *args, **kwargs):
294 """
295 Removes and returns item at specified index (default= ``last``).
296 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
297 passed no argument or an integer argument, it will use ``list``
298 semantics and pop tokens from the list of parsed tokens. If passed
299 a non-integer argument (most likely a string), it will use ``dict``
300 semantics and pop the corresponding value from any defined results
301 names. A second default return value argument is supported, just as in
302 ``dict.pop()``.
303
304 Example::
305
306 numlist = Word(nums)[...]
307 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
308
309 def remove_first(tokens):
310 tokens.pop(0)
311 numlist.add_parse_action(remove_first)
312 print(numlist.parse_string("0 123 321")) # -> ['123', '321']
313
314 label = Word(alphas)
315 patt = label("LABEL") + Word(nums)[1, ...]
316 print(patt.parse_string("AAB 123 321").dump())
317
318 # Use pop() in a parse action to remove named result (note that corresponding value is not
319 # removed from list form of results)
320 def remove_LABEL(tokens):
321 tokens.pop("LABEL")
322 return tokens
323 patt.add_parse_action(remove_LABEL)
324 print(patt.parse_string("AAB 123 321").dump())
325
326 prints::
327
328 ['AAB', '123', '321']
329 - LABEL: 'AAB'
330
331 ['AAB', '123', '321']
332 """
333 if not args:
334 args = [-1]
335 for k, v in kwargs.items():
336 if k == "default":
337 args = (args[0], v)
338 else:
339 raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
340 if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
341 index = args[0]
342 ret = self[index]
343 del self[index]
344 return ret
345 else:
346 defaultvalue = args[1]
347 return defaultvalue
348
349 def get(self, key, default_value=None):
350 """
351 Returns named result matching the given key, or if there is no
352 such name, then returns the given ``default_value`` or ``None`` if no
353 ``default_value`` is specified.
354
355 Similar to ``dict.get()``.
356
357 Example::
358
359 integer = Word(nums)
360 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
361
362 result = date_str.parse_string("1999/12/31")
363 print(result.get("year")) # -> '1999'
364 print(result.get("hour", "not specified")) # -> 'not specified'
365 print(result.get("hour")) # -> None
366 """
367 if key in self:
368 return self[key]
369 else:
370 return default_value
371
372 def insert(self, index, ins_string):
373 """
374 Inserts new element at location index in the list of parsed tokens.
375
376 Similar to ``list.insert()``.
377
378 Example::
379
380 numlist = Word(nums)[...]
381 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
382
383 # use a parse action to insert the parse location in the front of the parsed results
384 def insert_locn(locn, tokens):
385 tokens.insert(0, locn)
386 numlist.add_parse_action(insert_locn)
387 print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
388 """
389 self._toklist.insert(index, ins_string)
390 # fixup indices in token dictionary
391 for occurrences in self._tokdict.values():
392 for k, (value, position) in enumerate(occurrences):
393 occurrences[k] = _ParseResultsWithOffset(
394 value, position + (position > index)
395 )
396
397 def append(self, item):
398 """
399 Add single element to end of ``ParseResults`` list of elements.
400
401 Example::
402
403 numlist = Word(nums)[...]
404 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
405
406 # use a parse action to compute the sum of the parsed integers, and add it to the end
407 def append_sum(tokens):
408 tokens.append(sum(map(int, tokens)))
409 numlist.add_parse_action(append_sum)
410 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
411 """
412 self._toklist.append(item)
413
414 def extend(self, itemseq):
415 """
416 Add sequence of elements to end of ``ParseResults`` list of elements.
417
418 Example::
419
420 patt = Word(alphas)[1, ...]
421
422 # use a parse action to append the reverse of the matched strings, to make a palindrome
423 def make_palindrome(tokens):
424 tokens.extend(reversed([t[::-1] for t in tokens]))
425 return ''.join(tokens)
426 patt.add_parse_action(make_palindrome)
427 print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
428 """
429 if isinstance(itemseq, ParseResults):
430 self.__iadd__(itemseq)
431 else:
432 self._toklist.extend(itemseq)
433
434 def clear(self):
435 """
436 Clear all elements and results names.
437 """
438 del self._toklist[:]
439 self._tokdict.clear()
440
441 def __getattr__(self, name):
442 try:
443 return self[name]
444 except KeyError:
445 if name.startswith("__"):
446 raise AttributeError(name)
447 return ""
448
449 def __add__(self, other: ParseResults) -> ParseResults:
450 ret = self.copy()
451 ret += other
452 return ret
453
454 def __iadd__(self, other: ParseResults) -> ParseResults:
455 if not other:
456 return self
457
458 if other._tokdict:
459 offset = len(self._toklist)
460 addoffset = lambda a: offset if a < 0 else a + offset
461 otheritems = other._tokdict.items()
462 otherdictitems = [
463 (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
464 for k, vlist in otheritems
465 for v in vlist
466 ]
467 for k, v in otherdictitems:
468 self[k] = v
469 if isinstance(v[0], ParseResults):
470 v[0]._parent = self
471
472 self._toklist += other._toklist
473 self._all_names |= other._all_names
474 return self
475
476 def __radd__(self, other) -> ParseResults:
477 if isinstance(other, int) and other == 0:
478 # useful for merging many ParseResults using sum() builtin
479 return self.copy()
480 else:
481 # this may raise a TypeError - so be it
482 return other + self
483
484 def __repr__(self) -> str:
485 return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
486
487 def __str__(self) -> str:
488 return (
489 "["
490 + ", ".join(
491 [
492 str(i) if isinstance(i, ParseResults) else repr(i)
493 for i in self._toklist
494 ]
495 )
496 + "]"
497 )
498
499 def _asStringList(self, sep=""):
500 out = []
501 for item in self._toklist:
502 if out and sep:
503 out.append(sep)
504 if isinstance(item, ParseResults):
505 out += item._asStringList()
506 else:
507 out.append(str(item))
508 return out
509
510 def as_list(self, *, flatten: bool = False) -> list:
511 """
512 Returns the parse results as a nested list of matching tokens, all converted to strings.
513 If flatten is True, all the nesting levels in the returned list are collapsed.
514
515 Example::
516
517 patt = Word(alphas)[1, ...]
518 result = patt.parse_string("sldkj lsdkj sldkj")
519 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
520 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
521
522 # Use as_list() to create an actual list
523 result_list = result.as_list()
524 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
525 """
526
527 def flattened(pr):
528 to_visit = collections.deque([*self])
529 while to_visit:
530 to_do = to_visit.popleft()
531 if isinstance(to_do, ParseResults):
532 to_visit.extendleft(to_do[::-1])
533 else:
534 yield to_do
535
536 if flatten:
537 return [*flattened(self)]
538 else:
539 return [
540 res.as_list() if isinstance(res, ParseResults) else res
541 for res in self._toklist
542 ]
543
544 def as_dict(self) -> dict:
545 """
546 Returns the named parse results as a nested dictionary.
547
548 Example::
549
550 integer = Word(nums)
551 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
552
553 result = date_str.parse_string('12/31/1999')
554 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
555
556 result_dict = result.as_dict()
557 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
558
559 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
560 import json
561 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
562 print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
563 """
564
565 def to_item(obj):
566 if isinstance(obj, ParseResults):
567 return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
568 else:
569 return obj
570
571 return dict((k, to_item(v)) for k, v in self.items())
572
573 def copy(self) -> ParseResults:
574 """
575 Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
576 items contained within the source are shared with the copy. Use
577 :class:`ParseResults.deepcopy()` to create a copy with its own separate
578 content values.
579 """
580 ret = ParseResults(self._toklist)
581 ret._tokdict = self._tokdict.copy()
582 ret._parent = self._parent
583 ret._all_names |= self._all_names
584 ret._name = self._name
585 return ret
586
587 def deepcopy(self) -> ParseResults:
588 """
589 Returns a new deep copy of a :class:`ParseResults` object.
590 """
591 ret = self.copy()
592 # replace values with copies if they are of known mutable types
593 for i, obj in enumerate(self._toklist):
594 if isinstance(obj, ParseResults):
595 ret._toklist[i] = obj.deepcopy()
596 elif isinstance(obj, (str, bytes)):
597 pass
598 elif isinstance(obj, MutableMapping):
599 ret._toklist[i] = dest = type(obj)()
600 for k, v in obj.items():
601 dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
602 elif isinstance(obj, Iterable):
603 ret._toklist[i] = type(obj)(
604 v.deepcopy() if isinstance(v, ParseResults) else v for v in obj # type: ignore[call-arg]
605 )
606 return ret
607
608 def get_name(self) -> str | None:
609 r"""
610 Returns the results name for this token expression. Useful when several
611 different expressions might match at a particular location.
612
613 Example::
614
615 integer = Word(nums)
616 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
617 house_number_expr = Suppress('#') + Word(nums, alphanums)
618 user_data = (Group(house_number_expr)("house_number")
619 | Group(ssn_expr)("ssn")
620 | Group(integer)("age"))
621 user_info = user_data[1, ...]
622
623 result = user_info.parse_string("22 111-22-3333 #221B")
624 for item in result:
625 print(item.get_name(), ':', item[0])
626
627 prints::
628
629 age : 22
630 ssn : 111-22-3333
631 house_number : 221B
632 """
633 if self._name:
634 return self._name
635 elif self._parent:
636 par: ParseResults = self._parent
637 parent_tokdict_items = par._tokdict.items()
638 return next(
639 (
640 k
641 for k, vlist in parent_tokdict_items
642 for v, loc in vlist
643 if v is self
644 ),
645 None,
646 )
647 elif (
648 len(self) == 1
649 and len(self._tokdict) == 1
650 and next(iter(self._tokdict.values()))[0][1] in (0, -1)
651 ):
652 return next(iter(self._tokdict.keys()))
653 else:
654 return None
655
656 def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
657 """
658 Diagnostic method for listing out the contents of
659 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
660 that this string can be embedded in a nested display of other data.
661
662 Example::
663
664 integer = Word(nums)
665 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
666
667 result = date_str.parse_string('1999/12/31')
668 print(result.dump())
669
670 prints::
671
672 ['1999', '/', '12', '/', '31']
673 - day: '31'
674 - month: '12'
675 - year: '1999'
676 """
677 out = []
678 NL = "\n"
679 out.append(indent + str(self.as_list()) if include_list else "")
680
681 if not full:
682 return "".join(out)
683
684 if self.haskeys():
685 items = sorted((str(k), v) for k, v in self.items())
686 for k, v in items:
687 if out:
688 out.append(NL)
689 out.append(f"{indent}{(' ' * _depth)}- {k}: ")
690 if not isinstance(v, ParseResults):
691 out.append(repr(v))
692 continue
693
694 if not v:
695 out.append(str(v))
696 continue
697
698 out.append(
699 v.dump(
700 indent=indent,
701 full=full,
702 include_list=include_list,
703 _depth=_depth + 1,
704 )
705 )
706 if not any(isinstance(vv, ParseResults) for vv in self):
707 return "".join(out)
708
709 v = self
710 incr = " "
711 nl = "\n"
712 for i, vv in enumerate(v):
713 if isinstance(vv, ParseResults):
714 vv_dump = vv.dump(
715 indent=indent,
716 full=full,
717 include_list=include_list,
718 _depth=_depth + 1,
719 )
720 out.append(
721 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv_dump}"
722 )
723 else:
724 out.append(
725 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv}"
726 )
727
728 return "".join(out)
729
730 def pprint(self, *args, **kwargs):
731 """
732 Pretty-printer for parsed results as a list, using the
733 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
734 Accepts additional positional or keyword args as defined for
735 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
736
737 Example::
738
739 ident = Word(alphas, alphanums)
740 num = Word(nums)
741 func = Forward()
742 term = ident | num | Group('(' + func + ')')
743 func <<= ident + Group(Optional(DelimitedList(term)))
744 result = func.parse_string("fna a,b,(fnb c,d,200),100")
745 result.pprint(width=40)
746
747 prints::
748
749 ['fna',
750 ['a',
751 'b',
752 ['(', 'fnb', ['c', 'd', '200'], ')'],
753 '100']]
754 """
755 pprint.pprint(self.as_list(), *args, **kwargs)
756
757 # add support for pickle protocol
758 def __getstate__(self):
759 return (
760 self._toklist,
761 (
762 self._tokdict.copy(),
763 None,
764 self._all_names,
765 self._name,
766 ),
767 )
768
769 def __setstate__(self, state):
770 self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
771 self._all_names = set(inAccumNames)
772 self._parent = None
773
774 def __getnewargs__(self):
775 return self._toklist, self._name
776
777 def __dir__(self):
778 return dir(type(self)) + list(self.keys())
779
780 @classmethod
781 def from_dict(cls, other, name=None) -> ParseResults:
782 """
783 Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
784 name-value relations as results names. If an optional ``name`` argument is
785 given, a nested ``ParseResults`` will be returned.
786 """
787
788 def is_iterable(obj):
789 try:
790 iter(obj)
791 except Exception:
792 return False
793 # str's are iterable, but in pyparsing, we don't want to iterate over them
794 else:
795 return not isinstance(obj, str_type)
796
797 ret = cls([])
798 for k, v in other.items():
799 if isinstance(v, Mapping):
800 ret += cls.from_dict(v, name=k)
801 else:
802 ret += cls([v], name=k, asList=is_iterable(v))
803 if name is not None:
804 ret = cls([ret], name=name)
805 return ret
806
807 asList = as_list
808 """Deprecated - use :class:`as_list`"""
809 asDict = as_dict
810 """Deprecated - use :class:`as_dict`"""
811 getName = get_name
812 """Deprecated - use :class:`get_name`"""
813
814
815MutableMapping.register(ParseResults)
816MutableSequence.register(ParseResults)