1# results.py
2from collections.abc import (
3 MutableMapping,
4 Mapping,
5 MutableSequence,
6 Iterator,
7 Sequence,
8 Container,
9)
10import pprint
11from typing import Tuple, Any, Dict, Set, List
12
13str_type: Tuple[type, ...] = (str, bytes)
14_generator_type = type((_ for _ in ()))
15
16
17class _ParseResultsWithOffset:
18 tup: Tuple["ParseResults", int]
19 __slots__ = ["tup"]
20
21 def __init__(self, p1: "ParseResults", p2: int):
22 self.tup: Tuple[ParseResults, int] = (p1, p2)
23
24 def __getitem__(self, i):
25 return self.tup[i]
26
27 def __getstate__(self):
28 return self.tup
29
30 def __setstate__(self, *args):
31 self.tup = args[0]
32
33
34class ParseResults:
35 """Structured parse results, to provide multiple means of access to
36 the parsed data:
37
38 - as a list (``len(results)``)
39 - by list index (``results[0], results[1]``, etc.)
40 - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
41
42 Example::
43
44 integer = Word(nums)
45 date_str = (integer.set_results_name("year") + '/'
46 + integer.set_results_name("month") + '/'
47 + integer.set_results_name("day"))
48 # equivalent form:
49 # date_str = (integer("year") + '/'
50 # + integer("month") + '/'
51 # + integer("day"))
52
53 # parse_string returns a ParseResults object
54 result = date_str.parse_string("1999/12/31")
55
56 def test(s, fn=repr):
57 print(f"{s} -> {fn(eval(s))}")
58 test("list(result)")
59 test("result[0]")
60 test("result['month']")
61 test("result.day")
62 test("'month' in result")
63 test("'minutes' in result")
64 test("result.dump()", str)
65
66 prints::
67
68 list(result) -> ['1999', '/', '12', '/', '31']
69 result[0] -> '1999'
70 result['month'] -> '12'
71 result.day -> '31'
72 'month' in result -> True
73 'minutes' in result -> False
74 result.dump() -> ['1999', '/', '12', '/', '31']
75 - day: '31'
76 - month: '12'
77 - year: '1999'
78 """
79
80 _null_values: Tuple[Any, ...] = (None, [], ())
81
82 _name: str
83 _parent: "ParseResults"
84 _all_names: Set[str]
85 _modal: bool
86 _toklist: List[Any]
87 _tokdict: Dict[str, Any]
88
89 __slots__ = (
90 "_name",
91 "_parent",
92 "_all_names",
93 "_modal",
94 "_toklist",
95 "_tokdict",
96 )
97
98 class List(list):
99 """
100 Simple wrapper class to distinguish parsed list results that should be preserved
101 as actual Python lists, instead of being converted to :class:`ParseResults`::
102
103 LBRACK, RBRACK = map(pp.Suppress, "[]")
104 element = pp.Forward()
105 item = ppc.integer
106 element_list = LBRACK + pp.DelimitedList(element) + RBRACK
107
108 # add parse actions to convert from ParseResults to actual Python collection types
109 def as_python_list(t):
110 return pp.ParseResults.List(t.as_list())
111 element_list.add_parse_action(as_python_list)
112
113 element <<= item | element_list
114
115 element.run_tests('''
116 100
117 [2,3,4]
118 [[2, 1],3,4]
119 [(2, 1),3,4]
120 (2,3,4)
121 ''', post_parse=lambda s, r: (r[0], type(r[0])))
122
123 prints::
124
125 100
126 (100, <class 'int'>)
127
128 [2,3,4]
129 ([2, 3, 4], <class 'list'>)
130
131 [[2, 1],3,4]
132 ([[2, 1], 3, 4], <class 'list'>)
133
134 (Used internally by :class:`Group` when `aslist=True`.)
135 """
136
137 def __new__(cls, contained=None):
138 if contained is None:
139 contained = []
140
141 if not isinstance(contained, list):
142 raise TypeError(
143 f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
144 )
145
146 return list.__new__(cls)
147
148 def __new__(cls, toklist=None, name=None, **kwargs):
149 if isinstance(toklist, ParseResults):
150 return toklist
151 self = object.__new__(cls)
152 self._name = None
153 self._parent = None
154 self._all_names = set()
155
156 if toklist is None:
157 self._toklist = []
158 elif isinstance(toklist, (list, _generator_type)):
159 self._toklist = (
160 [toklist[:]]
161 if isinstance(toklist, ParseResults.List)
162 else list(toklist)
163 )
164 else:
165 self._toklist = [toklist]
166 self._tokdict = dict()
167 return self
168
169 # Performance tuning: we construct a *lot* of these, so keep this
170 # constructor as small and fast as possible
171 def __init__(
172 self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
173 ):
174 self._tokdict: Dict[str, _ParseResultsWithOffset]
175 self._modal = modal
176
177 if name is None or name == "":
178 return
179
180 if isinstance(name, int):
181 name = str(name)
182
183 if not modal:
184 self._all_names = {name}
185
186 self._name = name
187
188 if toklist in self._null_values:
189 return
190
191 if isinstance(toklist, (str_type, type)):
192 toklist = [toklist]
193
194 if asList:
195 if isinstance(toklist, ParseResults):
196 self[name] = _ParseResultsWithOffset(ParseResults(toklist._toklist), 0)
197 else:
198 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
199 self[name]._name = name
200 return
201
202 try:
203 self[name] = toklist[0]
204 except (KeyError, TypeError, IndexError):
205 if toklist is not self:
206 self[name] = toklist
207 else:
208 self._name = name
209
210 def __getitem__(self, i):
211 if isinstance(i, (int, slice)):
212 return self._toklist[i]
213
214 if i not in self._all_names:
215 return self._tokdict[i][-1][0]
216
217 return ParseResults([v[0] for v in self._tokdict[i]])
218
219 def __setitem__(self, k, v, isinstance=isinstance):
220 if isinstance(v, _ParseResultsWithOffset):
221 self._tokdict[k] = self._tokdict.get(k, list()) + [v]
222 sub = v[0]
223 elif isinstance(k, (int, slice)):
224 self._toklist[k] = v
225 sub = v
226 else:
227 self._tokdict[k] = self._tokdict.get(k, list()) + [
228 _ParseResultsWithOffset(v, 0)
229 ]
230 sub = v
231 if isinstance(sub, ParseResults):
232 sub._parent = self
233
234 def __delitem__(self, i):
235 if not isinstance(i, (int, slice)):
236 del self._tokdict[i]
237 return
238
239 mylen = len(self._toklist)
240 del self._toklist[i]
241
242 # convert int to slice
243 if isinstance(i, int):
244 if i < 0:
245 i += mylen
246 i = slice(i, i + 1)
247 # get removed indices
248 removed = list(range(*i.indices(mylen)))
249 removed.reverse()
250 # fixup indices in token dictionary
251 for occurrences in self._tokdict.values():
252 for j in removed:
253 for k, (value, position) in enumerate(occurrences):
254 occurrences[k] = _ParseResultsWithOffset(
255 value, position - (position > j)
256 )
257
258 def __contains__(self, k) -> bool:
259 return k in self._tokdict
260
261 def __len__(self) -> int:
262 return len(self._toklist)
263
264 def __bool__(self) -> bool:
265 return not not (self._toklist or self._tokdict)
266
267 def __iter__(self) -> Iterator:
268 return iter(self._toklist)
269
270 def __reversed__(self) -> Iterator:
271 return iter(self._toklist[::-1])
272
273 def keys(self):
274 return iter(self._tokdict)
275
276 def values(self):
277 return (self[k] for k in self.keys())
278
279 def items(self):
280 return ((k, self[k]) for k in self.keys())
281
282 def haskeys(self) -> bool:
283 """
284 Since ``keys()`` returns an iterator, this method is helpful in bypassing
285 code that looks for the existence of any defined results names."""
286 return not not self._tokdict
287
288 def pop(self, *args, **kwargs):
289 """
290 Removes and returns item at specified index (default= ``last``).
291 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
292 passed no argument or an integer argument, it will use ``list``
293 semantics and pop tokens from the list of parsed tokens. If passed
294 a non-integer argument (most likely a string), it will use ``dict``
295 semantics and pop the corresponding value from any defined results
296 names. A second default return value argument is supported, just as in
297 ``dict.pop()``.
298
299 Example::
300
301 numlist = Word(nums)[...]
302 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
303
304 def remove_first(tokens):
305 tokens.pop(0)
306 numlist.add_parse_action(remove_first)
307 print(numlist.parse_string("0 123 321")) # -> ['123', '321']
308
309 label = Word(alphas)
310 patt = label("LABEL") + Word(nums)[1, ...]
311 print(patt.parse_string("AAB 123 321").dump())
312
313 # Use pop() in a parse action to remove named result (note that corresponding value is not
314 # removed from list form of results)
315 def remove_LABEL(tokens):
316 tokens.pop("LABEL")
317 return tokens
318 patt.add_parse_action(remove_LABEL)
319 print(patt.parse_string("AAB 123 321").dump())
320
321 prints::
322
323 ['AAB', '123', '321']
324 - LABEL: 'AAB'
325
326 ['AAB', '123', '321']
327 """
328 if not args:
329 args = [-1]
330 for k, v in kwargs.items():
331 if k == "default":
332 args = (args[0], v)
333 else:
334 raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
335 if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
336 index = args[0]
337 ret = self[index]
338 del self[index]
339 return ret
340 else:
341 defaultvalue = args[1]
342 return defaultvalue
343
344 def get(self, key, default_value=None):
345 """
346 Returns named result matching the given key, or if there is no
347 such name, then returns the given ``default_value`` or ``None`` if no
348 ``default_value`` is specified.
349
350 Similar to ``dict.get()``.
351
352 Example::
353
354 integer = Word(nums)
355 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
356
357 result = date_str.parse_string("1999/12/31")
358 print(result.get("year")) # -> '1999'
359 print(result.get("hour", "not specified")) # -> 'not specified'
360 print(result.get("hour")) # -> None
361 """
362 if key in self:
363 return self[key]
364 else:
365 return default_value
366
367 def insert(self, index, ins_string):
368 """
369 Inserts new element at location index in the list of parsed tokens.
370
371 Similar to ``list.insert()``.
372
373 Example::
374
375 numlist = Word(nums)[...]
376 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
377
378 # use a parse action to insert the parse location in the front of the parsed results
379 def insert_locn(locn, tokens):
380 tokens.insert(0, locn)
381 numlist.add_parse_action(insert_locn)
382 print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
383 """
384 self._toklist.insert(index, ins_string)
385 # fixup indices in token dictionary
386 for occurrences in self._tokdict.values():
387 for k, (value, position) in enumerate(occurrences):
388 occurrences[k] = _ParseResultsWithOffset(
389 value, position + (position > index)
390 )
391
392 def append(self, item):
393 """
394 Add single element to end of ``ParseResults`` list of elements.
395
396 Example::
397
398 numlist = Word(nums)[...]
399 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
400
401 # use a parse action to compute the sum of the parsed integers, and add it to the end
402 def append_sum(tokens):
403 tokens.append(sum(map(int, tokens)))
404 numlist.add_parse_action(append_sum)
405 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
406 """
407 self._toklist.append(item)
408
409 def extend(self, itemseq):
410 """
411 Add sequence of elements to end of ``ParseResults`` list of elements.
412
413 Example::
414
415 patt = Word(alphas)[1, ...]
416
417 # use a parse action to append the reverse of the matched strings, to make a palindrome
418 def make_palindrome(tokens):
419 tokens.extend(reversed([t[::-1] for t in tokens]))
420 return ''.join(tokens)
421 patt.add_parse_action(make_palindrome)
422 print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
423 """
424 if isinstance(itemseq, ParseResults):
425 self.__iadd__(itemseq)
426 else:
427 self._toklist.extend(itemseq)
428
429 def clear(self):
430 """
431 Clear all elements and results names.
432 """
433 del self._toklist[:]
434 self._tokdict.clear()
435
436 def __getattr__(self, name):
437 try:
438 return self[name]
439 except KeyError:
440 if name.startswith("__"):
441 raise AttributeError(name)
442 return ""
443
444 def __add__(self, other: "ParseResults") -> "ParseResults":
445 ret = self.copy()
446 ret += other
447 return ret
448
449 def __iadd__(self, other: "ParseResults") -> "ParseResults":
450 if not other:
451 return self
452
453 if other._tokdict:
454 offset = len(self._toklist)
455 addoffset = lambda a: offset if a < 0 else a + offset
456 otheritems = other._tokdict.items()
457 otherdictitems = [
458 (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
459 for k, vlist in otheritems
460 for v in vlist
461 ]
462 for k, v in otherdictitems:
463 self[k] = v
464 if isinstance(v[0], ParseResults):
465 v[0]._parent = self
466
467 self._toklist += other._toklist
468 self._all_names |= other._all_names
469 return self
470
471 def __radd__(self, other) -> "ParseResults":
472 if isinstance(other, int) and other == 0:
473 # useful for merging many ParseResults using sum() builtin
474 return self.copy()
475 else:
476 # this may raise a TypeError - so be it
477 return other + self
478
479 def __repr__(self) -> str:
480 return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
481
482 def __str__(self) -> str:
483 return (
484 "["
485 + ", ".join(
486 [
487 str(i) if isinstance(i, ParseResults) else repr(i)
488 for i in self._toklist
489 ]
490 )
491 + "]"
492 )
493
494 def _asStringList(self, sep=""):
495 out = []
496 for item in self._toklist:
497 if out and sep:
498 out.append(sep)
499 if isinstance(item, ParseResults):
500 out += item._asStringList()
501 else:
502 out.append(str(item))
503 return out
504
505 def as_list(self) -> list:
506 """
507 Returns the parse results as a nested list of matching tokens, all converted to strings.
508
509 Example::
510
511 patt = Word(alphas)[1, ...]
512 result = patt.parse_string("sldkj lsdkj sldkj")
513 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
514 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
515
516 # Use as_list() to create an actual list
517 result_list = result.as_list()
518 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
519 """
520 return [
521 res.as_list() if isinstance(res, ParseResults) else res
522 for res in self._toklist
523 ]
524
525 def as_dict(self) -> dict:
526 """
527 Returns the named parse results as a nested dictionary.
528
529 Example::
530
531 integer = Word(nums)
532 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
533
534 result = date_str.parse_string('12/31/1999')
535 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
536
537 result_dict = result.as_dict()
538 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
539
540 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
541 import json
542 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
543 print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
544 """
545
546 def to_item(obj):
547 if isinstance(obj, ParseResults):
548 return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
549 else:
550 return obj
551
552 return dict((k, to_item(v)) for k, v in self.items())
553
554 def copy(self) -> "ParseResults":
555 """
556 Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
557 items contained within the source are shared with the copy. Use
558 :class:`ParseResults.deepcopy()` to create a copy with its own separate
559 content values.
560 """
561 ret = ParseResults(self._toklist)
562 ret._tokdict = self._tokdict.copy()
563 ret._parent = self._parent
564 ret._all_names |= self._all_names
565 ret._name = self._name
566 return ret
567
568 def deepcopy(self) -> "ParseResults":
569 """
570 Returns a new deep copy of a :class:`ParseResults` object.
571 """
572 ret = self.copy()
573 # replace values with copies if they are of known mutable types
574 for i, obj in enumerate(self._toklist):
575 if isinstance(obj, ParseResults):
576 self._toklist[i] = obj.deepcopy()
577 elif isinstance(obj, (str, bytes)):
578 pass
579 elif isinstance(obj, MutableMapping):
580 self._toklist[i] = dest = type(obj)()
581 for k, v in obj.items():
582 dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
583 elif isinstance(obj, Container):
584 self._toklist[i] = type(obj)(
585 v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
586 )
587 return ret
588
589 def get_name(self):
590 r"""
591 Returns the results name for this token expression. Useful when several
592 different expressions might match at a particular location.
593
594 Example::
595
596 integer = Word(nums)
597 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
598 house_number_expr = Suppress('#') + Word(nums, alphanums)
599 user_data = (Group(house_number_expr)("house_number")
600 | Group(ssn_expr)("ssn")
601 | Group(integer)("age"))
602 user_info = user_data[1, ...]
603
604 result = user_info.parse_string("22 111-22-3333 #221B")
605 for item in result:
606 print(item.get_name(), ':', item[0])
607
608 prints::
609
610 age : 22
611 ssn : 111-22-3333
612 house_number : 221B
613 """
614 if self._name:
615 return self._name
616 elif self._parent:
617 par: "ParseResults" = self._parent
618 parent_tokdict_items = par._tokdict.items()
619 return next(
620 (
621 k
622 for k, vlist in parent_tokdict_items
623 for v, loc in vlist
624 if v is self
625 ),
626 None,
627 )
628 elif (
629 len(self) == 1
630 and len(self._tokdict) == 1
631 and next(iter(self._tokdict.values()))[0][1] in (0, -1)
632 ):
633 return next(iter(self._tokdict.keys()))
634 else:
635 return None
636
637 def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
638 """
639 Diagnostic method for listing out the contents of
640 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
641 that this string can be embedded in a nested display of other data.
642
643 Example::
644
645 integer = Word(nums)
646 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
647
648 result = date_str.parse_string('1999/12/31')
649 print(result.dump())
650
651 prints::
652
653 ['1999', '/', '12', '/', '31']
654 - day: '31'
655 - month: '12'
656 - year: '1999'
657 """
658 out = []
659 NL = "\n"
660 out.append(indent + str(self.as_list()) if include_list else "")
661
662 if not full:
663 return "".join(out)
664
665 if self.haskeys():
666 items = sorted((str(k), v) for k, v in self.items())
667 for k, v in items:
668 if out:
669 out.append(NL)
670 out.append(f"{indent}{(' ' * _depth)}- {k}: ")
671 if not isinstance(v, ParseResults):
672 out.append(repr(v))
673 continue
674
675 if not v:
676 out.append(str(v))
677 continue
678
679 out.append(
680 v.dump(
681 indent=indent,
682 full=full,
683 include_list=include_list,
684 _depth=_depth + 1,
685 )
686 )
687 if not any(isinstance(vv, ParseResults) for vv in self):
688 return "".join(out)
689
690 v = self
691 incr = " "
692 nl = "\n"
693 for i, vv in enumerate(v):
694 if isinstance(vv, ParseResults):
695 vv_dump = vv.dump(
696 indent=indent,
697 full=full,
698 include_list=include_list,
699 _depth=_depth + 1,
700 )
701 out.append(
702 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv_dump}"
703 )
704 else:
705 out.append(
706 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv}"
707 )
708
709 return "".join(out)
710
711 def pprint(self, *args, **kwargs):
712 """
713 Pretty-printer for parsed results as a list, using the
714 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
715 Accepts additional positional or keyword args as defined for
716 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
717
718 Example::
719
720 ident = Word(alphas, alphanums)
721 num = Word(nums)
722 func = Forward()
723 term = ident | num | Group('(' + func + ')')
724 func <<= ident + Group(Optional(DelimitedList(term)))
725 result = func.parse_string("fna a,b,(fnb c,d,200),100")
726 result.pprint(width=40)
727
728 prints::
729
730 ['fna',
731 ['a',
732 'b',
733 ['(', 'fnb', ['c', 'd', '200'], ')'],
734 '100']]
735 """
736 pprint.pprint(self.as_list(), *args, **kwargs)
737
738 # add support for pickle protocol
739 def __getstate__(self):
740 return (
741 self._toklist,
742 (
743 self._tokdict.copy(),
744 None,
745 self._all_names,
746 self._name,
747 ),
748 )
749
750 def __setstate__(self, state):
751 self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
752 self._all_names = set(inAccumNames)
753 self._parent = None
754
755 def __getnewargs__(self):
756 return self._toklist, self._name
757
758 def __dir__(self):
759 return dir(type(self)) + list(self.keys())
760
761 @classmethod
762 def from_dict(cls, other, name=None) -> "ParseResults":
763 """
764 Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
765 name-value relations as results names. If an optional ``name`` argument is
766 given, a nested ``ParseResults`` will be returned.
767 """
768
769 def is_iterable(obj):
770 try:
771 iter(obj)
772 except Exception:
773 return False
774 # str's are iterable, but in pyparsing, we don't want to iterate over them
775 else:
776 return not isinstance(obj, str_type)
777
778 ret = cls([])
779 for k, v in other.items():
780 if isinstance(v, Mapping):
781 ret += cls.from_dict(v, name=k)
782 else:
783 ret += cls([v], name=k, asList=is_iterable(v))
784 if name is not None:
785 ret = cls([ret], name=name)
786 return ret
787
788 asList = as_list
789 """Deprecated - use :class:`as_list`"""
790 asDict = as_dict
791 """Deprecated - use :class:`as_dict`"""
792 getName = get_name
793 """Deprecated - use :class:`get_name`"""
794
795
796MutableMapping.register(ParseResults)
797MutableSequence.register(ParseResults)