1# results.py
2from collections.abc import (
3 MutableMapping,
4 Mapping,
5 MutableSequence,
6 Iterator,
7 Iterable,
8)
9import pprint
10from typing import Tuple, Any, Dict, Set, List
11
12from .util import replaced_by_pep8
13
14
15str_type: Tuple[type, ...] = (str, bytes)
16_generator_type = type((_ for _ in ()))
17
18
19class _ParseResultsWithOffset:
20 tup: Tuple["ParseResults", int]
21 __slots__ = ["tup"]
22
23 def __init__(self, p1: "ParseResults", p2: int):
24 self.tup: Tuple[ParseResults, int] = (p1, p2)
25
26 def __getitem__(self, i):
27 return self.tup[i]
28
29 def __getstate__(self):
30 return self.tup
31
32 def __setstate__(self, *args):
33 self.tup = args[0]
34
35
36class ParseResults:
37 """Structured parse results, to provide multiple means of access to
38 the parsed data:
39
40 - as a list (``len(results)``)
41 - by list index (``results[0], results[1]``, etc.)
42 - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
43
44 Example::
45
46 integer = Word(nums)
47 date_str = (integer.set_results_name("year") + '/'
48 + integer.set_results_name("month") + '/'
49 + integer.set_results_name("day"))
50 # equivalent form:
51 # date_str = (integer("year") + '/'
52 # + integer("month") + '/'
53 # + integer("day"))
54
55 # parse_string returns a ParseResults object
56 result = date_str.parse_string("1999/12/31")
57
58 def test(s, fn=repr):
59 print(f"{s} -> {fn(eval(s))}")
60 test("list(result)")
61 test("result[0]")
62 test("result['month']")
63 test("result.day")
64 test("'month' in result")
65 test("'minutes' in result")
66 test("result.dump()", str)
67
68 prints::
69
70 list(result) -> ['1999', '/', '12', '/', '31']
71 result[0] -> '1999'
72 result['month'] -> '12'
73 result.day -> '31'
74 'month' in result -> True
75 'minutes' in result -> False
76 result.dump() -> ['1999', '/', '12', '/', '31']
77 - day: '31'
78 - month: '12'
79 - year: '1999'
80 """
81
82 _null_values: Tuple[Any, ...] = (None, [], ())
83
84 _name: str
85 _parent: "ParseResults"
86 _all_names: Set[str]
87 _modal: bool
88 _toklist: List[Any]
89 _tokdict: Dict[str, Any]
90
91 __slots__ = (
92 "_name",
93 "_parent",
94 "_all_names",
95 "_modal",
96 "_toklist",
97 "_tokdict",
98 )
99
100 class List(list):
101 """
102 Simple wrapper class to distinguish parsed list results that should be preserved
103 as actual Python lists, instead of being converted to :class:`ParseResults`::
104
105 LBRACK, RBRACK = map(pp.Suppress, "[]")
106 element = pp.Forward()
107 item = ppc.integer
108 element_list = LBRACK + pp.DelimitedList(element) + RBRACK
109
110 # add parse actions to convert from ParseResults to actual Python collection types
111 def as_python_list(t):
112 return pp.ParseResults.List(t.as_list())
113 element_list.add_parse_action(as_python_list)
114
115 element <<= item | element_list
116
117 element.run_tests('''
118 100
119 [2,3,4]
120 [[2, 1],3,4]
121 [(2, 1),3,4]
122 (2,3,4)
123 ''', post_parse=lambda s, r: (r[0], type(r[0])))
124
125 prints::
126
127 100
128 (100, <class 'int'>)
129
130 [2,3,4]
131 ([2, 3, 4], <class 'list'>)
132
133 [[2, 1],3,4]
134 ([[2, 1], 3, 4], <class 'list'>)
135
136 (Used internally by :class:`Group` when `aslist=True`.)
137 """
138
139 def __new__(cls, contained=None):
140 if contained is None:
141 contained = []
142
143 if not isinstance(contained, list):
144 raise TypeError(
145 f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
146 )
147
148 return list.__new__(cls)
149
150 def __new__(cls, toklist=None, name=None, **kwargs):
151 if isinstance(toklist, ParseResults):
152 return toklist
153 self = object.__new__(cls)
154 self._name = None
155 self._parent = None
156 self._all_names = set()
157
158 if toklist is None:
159 self._toklist = []
160 elif isinstance(toklist, (list, _generator_type)):
161 self._toklist = (
162 [toklist[:]]
163 if isinstance(toklist, ParseResults.List)
164 else list(toklist)
165 )
166 else:
167 self._toklist = [toklist]
168 self._tokdict = dict()
169 return self
170
171 # Performance tuning: we construct a *lot* of these, so keep this
172 # constructor as small and fast as possible
173 def __init__(
174 self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
175 ):
176 self._tokdict: Dict[str, _ParseResultsWithOffset]
177 self._modal = modal
178
179 if name is None or name == "":
180 return
181
182 if isinstance(name, int):
183 name = str(name)
184
185 if not modal:
186 self._all_names = {name}
187
188 self._name = name
189
190 if toklist in self._null_values:
191 return
192
193 if isinstance(toklist, (str_type, type)):
194 toklist = [toklist]
195
196 if asList:
197 if isinstance(toklist, ParseResults):
198 self[name] = _ParseResultsWithOffset(ParseResults(toklist._toklist), 0)
199 else:
200 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
201 self[name]._name = name
202 return
203
204 try:
205 self[name] = toklist[0]
206 except (KeyError, TypeError, IndexError):
207 if toklist is not self:
208 self[name] = toklist
209 else:
210 self._name = name
211
212 def __getitem__(self, i):
213 if isinstance(i, (int, slice)):
214 return self._toklist[i]
215
216 if i not in self._all_names:
217 return self._tokdict[i][-1][0]
218
219 return ParseResults([v[0] for v in self._tokdict[i]])
220
221 def __setitem__(self, k, v, isinstance=isinstance):
222 if isinstance(v, _ParseResultsWithOffset):
223 self._tokdict[k] = self._tokdict.get(k, list()) + [v]
224 sub = v[0]
225 elif isinstance(k, (int, slice)):
226 self._toklist[k] = v
227 sub = v
228 else:
229 self._tokdict[k] = self._tokdict.get(k, list()) + [
230 _ParseResultsWithOffset(v, 0)
231 ]
232 sub = v
233 if isinstance(sub, ParseResults):
234 sub._parent = self
235
236 def __delitem__(self, i):
237 if not isinstance(i, (int, slice)):
238 del self._tokdict[i]
239 return
240
241 mylen = len(self._toklist)
242 del self._toklist[i]
243
244 # convert int to slice
245 if isinstance(i, int):
246 if i < 0:
247 i += mylen
248 i = slice(i, i + 1)
249 # get removed indices
250 removed = list(range(*i.indices(mylen)))
251 removed.reverse()
252 # fixup indices in token dictionary
253 for occurrences in self._tokdict.values():
254 for j in removed:
255 for k, (value, position) in enumerate(occurrences):
256 occurrences[k] = _ParseResultsWithOffset(
257 value, position - (position > j)
258 )
259
260 def __contains__(self, k) -> bool:
261 return k in self._tokdict
262
263 def __len__(self) -> int:
264 return len(self._toklist)
265
266 def __bool__(self) -> bool:
267 return not not (self._toklist or self._tokdict)
268
269 def __iter__(self) -> Iterator:
270 return iter(self._toklist)
271
272 def __reversed__(self) -> Iterator:
273 return iter(self._toklist[::-1])
274
275 def keys(self):
276 return iter(self._tokdict)
277
278 def values(self):
279 return (self[k] for k in self.keys())
280
281 def items(self):
282 return ((k, self[k]) for k in self.keys())
283
284 def haskeys(self) -> bool:
285 """
286 Since ``keys()`` returns an iterator, this method is helpful in bypassing
287 code that looks for the existence of any defined results names."""
288 return not not self._tokdict
289
290 def pop(self, *args, **kwargs):
291 """
292 Removes and returns item at specified index (default= ``last``).
293 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
294 passed no argument or an integer argument, it will use ``list``
295 semantics and pop tokens from the list of parsed tokens. If passed
296 a non-integer argument (most likely a string), it will use ``dict``
297 semantics and pop the corresponding value from any defined results
298 names. A second default return value argument is supported, just as in
299 ``dict.pop()``.
300
301 Example::
302
303 numlist = Word(nums)[...]
304 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
305
306 def remove_first(tokens):
307 tokens.pop(0)
308 numlist.add_parse_action(remove_first)
309 print(numlist.parse_string("0 123 321")) # -> ['123', '321']
310
311 label = Word(alphas)
312 patt = label("LABEL") + Word(nums)[1, ...]
313 print(patt.parse_string("AAB 123 321").dump())
314
315 # Use pop() in a parse action to remove named result (note that corresponding value is not
316 # removed from list form of results)
317 def remove_LABEL(tokens):
318 tokens.pop("LABEL")
319 return tokens
320 patt.add_parse_action(remove_LABEL)
321 print(patt.parse_string("AAB 123 321").dump())
322
323 prints::
324
325 ['AAB', '123', '321']
326 - LABEL: 'AAB'
327
328 ['AAB', '123', '321']
329 """
330 if not args:
331 args = [-1]
332 for k, v in kwargs.items():
333 if k == "default":
334 args = (args[0], v)
335 else:
336 raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
337 if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
338 index = args[0]
339 ret = self[index]
340 del self[index]
341 return ret
342 else:
343 defaultvalue = args[1]
344 return defaultvalue
345
346 def get(self, key, default_value=None):
347 """
348 Returns named result matching the given key, or if there is no
349 such name, then returns the given ``default_value`` or ``None`` if no
350 ``default_value`` is specified.
351
352 Similar to ``dict.get()``.
353
354 Example::
355
356 integer = Word(nums)
357 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
358
359 result = date_str.parse_string("1999/12/31")
360 print(result.get("year")) # -> '1999'
361 print(result.get("hour", "not specified")) # -> 'not specified'
362 print(result.get("hour")) # -> None
363 """
364 if key in self:
365 return self[key]
366 else:
367 return default_value
368
369 def insert(self, index, ins_string):
370 """
371 Inserts new element at location index in the list of parsed tokens.
372
373 Similar to ``list.insert()``.
374
375 Example::
376
377 numlist = Word(nums)[...]
378 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
379
380 # use a parse action to insert the parse location in the front of the parsed results
381 def insert_locn(locn, tokens):
382 tokens.insert(0, locn)
383 numlist.add_parse_action(insert_locn)
384 print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
385 """
386 self._toklist.insert(index, ins_string)
387 # fixup indices in token dictionary
388 for occurrences in self._tokdict.values():
389 for k, (value, position) in enumerate(occurrences):
390 occurrences[k] = _ParseResultsWithOffset(
391 value, position + (position > index)
392 )
393
394 def append(self, item):
395 """
396 Add single element to end of ``ParseResults`` list of elements.
397
398 Example::
399
400 numlist = Word(nums)[...]
401 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
402
403 # use a parse action to compute the sum of the parsed integers, and add it to the end
404 def append_sum(tokens):
405 tokens.append(sum(map(int, tokens)))
406 numlist.add_parse_action(append_sum)
407 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
408 """
409 self._toklist.append(item)
410
411 def extend(self, itemseq):
412 """
413 Add sequence of elements to end of ``ParseResults`` list of elements.
414
415 Example::
416
417 patt = Word(alphas)[1, ...]
418
419 # use a parse action to append the reverse of the matched strings, to make a palindrome
420 def make_palindrome(tokens):
421 tokens.extend(reversed([t[::-1] for t in tokens]))
422 return ''.join(tokens)
423 patt.add_parse_action(make_palindrome)
424 print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
425 """
426 if isinstance(itemseq, ParseResults):
427 self.__iadd__(itemseq)
428 else:
429 self._toklist.extend(itemseq)
430
431 def clear(self):
432 """
433 Clear all elements and results names.
434 """
435 del self._toklist[:]
436 self._tokdict.clear()
437
438 def __getattr__(self, name):
439 try:
440 return self[name]
441 except KeyError:
442 if name.startswith("__"):
443 raise AttributeError(name)
444 return ""
445
446 def __add__(self, other: "ParseResults") -> "ParseResults":
447 ret = self.copy()
448 ret += other
449 return ret
450
451 def __iadd__(self, other: "ParseResults") -> "ParseResults":
452 if not other:
453 return self
454
455 if other._tokdict:
456 offset = len(self._toklist)
457 addoffset = lambda a: offset if a < 0 else a + offset
458 otheritems = other._tokdict.items()
459 otherdictitems = [
460 (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
461 for k, vlist in otheritems
462 for v in vlist
463 ]
464 for k, v in otherdictitems:
465 self[k] = v
466 if isinstance(v[0], ParseResults):
467 v[0]._parent = self
468
469 self._toklist += other._toklist
470 self._all_names |= other._all_names
471 return self
472
473 def __radd__(self, other) -> "ParseResults":
474 if isinstance(other, int) and other == 0:
475 # useful for merging many ParseResults using sum() builtin
476 return self.copy()
477 else:
478 # this may raise a TypeError - so be it
479 return other + self
480
481 def __repr__(self) -> str:
482 return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
483
484 def __str__(self) -> str:
485 return (
486 "["
487 + ", ".join(
488 [
489 str(i) if isinstance(i, ParseResults) else repr(i)
490 for i in self._toklist
491 ]
492 )
493 + "]"
494 )
495
496 def _asStringList(self, sep=""):
497 out = []
498 for item in self._toklist:
499 if out and sep:
500 out.append(sep)
501 if isinstance(item, ParseResults):
502 out += item._asStringList()
503 else:
504 out.append(str(item))
505 return out
506
507 def as_list(self) -> list:
508 """
509 Returns the parse results as a nested list of matching tokens, all converted to strings.
510
511 Example::
512
513 patt = Word(alphas)[1, ...]
514 result = patt.parse_string("sldkj lsdkj sldkj")
515 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
516 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
517
518 # Use as_list() to create an actual list
519 result_list = result.as_list()
520 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
521 """
522 return [
523 res.as_list() if isinstance(res, ParseResults) else res
524 for res in self._toklist
525 ]
526
527 def as_dict(self) -> dict:
528 """
529 Returns the named parse results as a nested dictionary.
530
531 Example::
532
533 integer = Word(nums)
534 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
535
536 result = date_str.parse_string('12/31/1999')
537 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
538
539 result_dict = result.as_dict()
540 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
541
542 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
543 import json
544 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
545 print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
546 """
547
548 def to_item(obj):
549 if isinstance(obj, ParseResults):
550 return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
551 else:
552 return obj
553
554 return dict((k, to_item(v)) for k, v in self.items())
555
556 def copy(self) -> "ParseResults":
557 """
558 Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
559 items contained within the source are shared with the copy. Use
560 :class:`ParseResults.deepcopy()` to create a copy with its own separate
561 content values.
562 """
563 ret = ParseResults(self._toklist)
564 ret._tokdict = self._tokdict.copy()
565 ret._parent = self._parent
566 ret._all_names |= self._all_names
567 ret._name = self._name
568 return ret
569
570 def deepcopy(self) -> "ParseResults":
571 """
572 Returns a new deep copy of a :class:`ParseResults` object.
573 """
574 ret = self.copy()
575 # replace values with copies if they are of known mutable types
576 for i, obj in enumerate(self._toklist):
577 if isinstance(obj, ParseResults):
578 ret._toklist[i] = obj.deepcopy()
579 elif isinstance(obj, (str, bytes)):
580 pass
581 elif isinstance(obj, MutableMapping):
582 ret._toklist[i] = dest = type(obj)()
583 for k, v in obj.items():
584 dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
585 elif isinstance(obj, Iterable):
586 ret._toklist[i] = type(obj)(
587 v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
588 )
589 return ret
590
591 def get_name(self) -> str:
592 r"""
593 Returns the results name for this token expression. Useful when several
594 different expressions might match at a particular location.
595
596 Example::
597
598 integer = Word(nums)
599 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
600 house_number_expr = Suppress('#') + Word(nums, alphanums)
601 user_data = (Group(house_number_expr)("house_number")
602 | Group(ssn_expr)("ssn")
603 | Group(integer)("age"))
604 user_info = user_data[1, ...]
605
606 result = user_info.parse_string("22 111-22-3333 #221B")
607 for item in result:
608 print(item.get_name(), ':', item[0])
609
610 prints::
611
612 age : 22
613 ssn : 111-22-3333
614 house_number : 221B
615 """
616 if self._name:
617 return self._name
618 elif self._parent:
619 par: "ParseResults" = self._parent
620 parent_tokdict_items = par._tokdict.items()
621 return next(
622 (
623 k
624 for k, vlist in parent_tokdict_items
625 for v, loc in vlist
626 if v is self
627 ),
628 None,
629 )
630 elif (
631 len(self) == 1
632 and len(self._tokdict) == 1
633 and next(iter(self._tokdict.values()))[0][1] in (0, -1)
634 ):
635 return next(iter(self._tokdict.keys()))
636 else:
637 return None
638
639 def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
640 """
641 Diagnostic method for listing out the contents of
642 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
643 that this string can be embedded in a nested display of other data.
644
645 Example::
646
647 integer = Word(nums)
648 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
649
650 result = date_str.parse_string('1999/12/31')
651 print(result.dump())
652
653 prints::
654
655 ['1999', '/', '12', '/', '31']
656 - day: '31'
657 - month: '12'
658 - year: '1999'
659 """
660 out = []
661 NL = "\n"
662 out.append(indent + str(self.as_list()) if include_list else "")
663
664 if not full:
665 return "".join(out)
666
667 if self.haskeys():
668 items = sorted((str(k), v) for k, v in self.items())
669 for k, v in items:
670 if out:
671 out.append(NL)
672 out.append(f"{indent}{(' ' * _depth)}- {k}: ")
673 if not isinstance(v, ParseResults):
674 out.append(repr(v))
675 continue
676
677 if not v:
678 out.append(str(v))
679 continue
680
681 out.append(
682 v.dump(
683 indent=indent,
684 full=full,
685 include_list=include_list,
686 _depth=_depth + 1,
687 )
688 )
689 if not any(isinstance(vv, ParseResults) for vv in self):
690 return "".join(out)
691
692 v = self
693 incr = " "
694 nl = "\n"
695 for i, vv in enumerate(v):
696 if isinstance(vv, ParseResults):
697 vv_dump = vv.dump(
698 indent=indent,
699 full=full,
700 include_list=include_list,
701 _depth=_depth + 1,
702 )
703 out.append(
704 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv_dump}"
705 )
706 else:
707 out.append(
708 f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv}"
709 )
710
711 return "".join(out)
712
713 def pprint(self, *args, **kwargs):
714 """
715 Pretty-printer for parsed results as a list, using the
716 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
717 Accepts additional positional or keyword args as defined for
718 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
719
720 Example::
721
722 ident = Word(alphas, alphanums)
723 num = Word(nums)
724 func = Forward()
725 term = ident | num | Group('(' + func + ')')
726 func <<= ident + Group(Optional(DelimitedList(term)))
727 result = func.parse_string("fna a,b,(fnb c,d,200),100")
728 result.pprint(width=40)
729
730 prints::
731
732 ['fna',
733 ['a',
734 'b',
735 ['(', 'fnb', ['c', 'd', '200'], ')'],
736 '100']]
737 """
738 pprint.pprint(self.as_list(), *args, **kwargs)
739
740 # add support for pickle protocol
741 def __getstate__(self):
742 return (
743 self._toklist,
744 (
745 self._tokdict.copy(),
746 None,
747 self._all_names,
748 self._name,
749 ),
750 )
751
752 def __setstate__(self, state):
753 self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
754 self._all_names = set(inAccumNames)
755 self._parent = None
756
757 def __getnewargs__(self):
758 return self._toklist, self._name
759
760 def __dir__(self):
761 return dir(type(self)) + list(self.keys())
762
763 @classmethod
764 def from_dict(cls, other, name=None) -> "ParseResults":
765 """
766 Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
767 name-value relations as results names. If an optional ``name`` argument is
768 given, a nested ``ParseResults`` will be returned.
769 """
770
771 def is_iterable(obj):
772 try:
773 iter(obj)
774 except Exception:
775 return False
776 # str's are iterable, but in pyparsing, we don't want to iterate over them
777 else:
778 return not isinstance(obj, str_type)
779
780 ret = cls([])
781 for k, v in other.items():
782 if isinstance(v, Mapping):
783 ret += cls.from_dict(v, name=k)
784 else:
785 ret += cls([v], name=k, asList=is_iterable(v))
786 if name is not None:
787 ret = cls([ret], name=name)
788 return ret
789
790 asList = as_list
791 """Deprecated - use :class:`as_list`"""
792 asDict = as_dict
793 """Deprecated - use :class:`as_dict`"""
794 getName = get_name
795 """Deprecated - use :class:`get_name`"""
796
797
798MutableMapping.register(ParseResults)
799MutableSequence.register(ParseResults)