1# actions.py
2
3from .exceptions import ParseException
4from .util import col, replaced_by_pep8
5
6
7class OnlyOnce:
8 """
9 Wrapper for parse actions, to ensure they are only called once.
10 """
11
12 def __init__(self, method_call):
13 from .core import _trim_arity
14
15 self.callable = _trim_arity(method_call)
16 self.called = False
17
18 def __call__(self, s, l, t):
19 if not self.called:
20 results = self.callable(s, l, t)
21 self.called = True
22 return results
23 raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
24
25 def reset(self):
26 """
27 Allow the associated parse action to be called once more.
28 """
29
30 self.called = False
31
32
33def match_only_at_col(n):
34 """
35 Helper method for defining parse actions that require matching at
36 a specific column in the input text.
37 """
38
39 def verify_col(strg, locn, toks):
40 if col(locn, strg) != n:
41 raise ParseException(strg, locn, f"matched token not at column {n}")
42
43 return verify_col
44
45
46def replace_with(repl_str):
47 """
48 Helper method for common parse actions that simply return
49 a literal value. Especially useful when used with
50 :class:`transform_string<ParserElement.transform_string>` ().
51
52 Example::
53
54 num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
55 na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
56 term = na | num
57
58 term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
59 """
60 return lambda s, l, t: [repl_str]
61
62
63def remove_quotes(s, l, t):
64 """
65 Helper parse action for removing quotation marks from parsed
66 quoted strings.
67
68 Example::
69
70 # by default, quotation marks are included in parsed results
71 quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
72
73 # use remove_quotes to strip quotation marks from parsed results
74 quoted_string.set_parse_action(remove_quotes)
75 quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
76 """
77 return t[0][1:-1]
78
79
80def with_attribute(*args, **attr_dict):
81 """
82 Helper to create a validating parse action to be used with start
83 tags created with :class:`make_xml_tags` or
84 :class:`make_html_tags`. Use ``with_attribute`` to qualify
85 a starting tag with a required attribute value, to avoid false
86 matches on common tags such as ``<TD>`` or ``<DIV>``.
87
88 Call ``with_attribute`` with a series of attribute names and
89 values. Specify the list of filter attributes names and values as:
90
91 - keyword arguments, as in ``(align="right")``, or
92 - as an explicit dict with ``**`` operator, when an attribute
93 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
94 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
95
96 For attribute names with a namespace prefix, you must use the second
97 form. Attribute names are matched insensitive to upper/lower case.
98
99 If just testing for ``class`` (with or without a namespace), use
100 :class:`with_class`.
101
102 To verify that the attribute exists, but without specifying a value,
103 pass ``with_attribute.ANY_VALUE`` as the value.
104
105 Example::
106
107 html = '''
108 <div>
109 Some text
110 <div type="grid">1 4 0 1 0</div>
111 <div type="graph">1,3 2,3 1,1</div>
112 <div>this has no type</div>
113 </div>
114 '''
115 div,div_end = make_html_tags("div")
116
117 # only match div tag having a type attribute with value "grid"
118 div_grid = div().set_parse_action(with_attribute(type="grid"))
119 grid_expr = div_grid + SkipTo(div | div_end)("body")
120 for grid_header in grid_expr.search_string(html):
121 print(grid_header.body)
122
123 # construct a match with any div tag having a type attribute, regardless of the value
124 div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
125 div_expr = div_any_type + SkipTo(div | div_end)("body")
126 for div_header in div_expr.search_string(html):
127 print(div_header.body)
128
129 prints::
130
131 1 4 0 1 0
132
133 1 4 0 1 0
134 1,3 2,3 1,1
135 """
136 if args:
137 attrs = args[:]
138 else:
139 attrs = attr_dict.items()
140 attrs = [(k, v) for k, v in attrs]
141
142 def pa(s, l, tokens):
143 for attrName, attrValue in attrs:
144 if attrName not in tokens:
145 raise ParseException(s, l, "no matching attribute " + attrName)
146 if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
147 raise ParseException(
148 s,
149 l,
150 f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
151 )
152
153 return pa
154
155
156with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
157
158
159def with_class(classname, namespace=""):
160 """
161 Simplified version of :class:`with_attribute` when
162 matching on a div class - made difficult because ``class`` is
163 a reserved word in Python.
164
165 Example::
166
167 html = '''
168 <div>
169 Some text
170 <div class="grid">1 4 0 1 0</div>
171 <div class="graph">1,3 2,3 1,1</div>
172 <div>this <div> has no class</div>
173 </div>
174
175 '''
176 div,div_end = make_html_tags("div")
177 div_grid = div().set_parse_action(with_class("grid"))
178
179 grid_expr = div_grid + SkipTo(div | div_end)("body")
180 for grid_header in grid_expr.search_string(html):
181 print(grid_header.body)
182
183 div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
184 div_expr = div_any_type + SkipTo(div | div_end)("body")
185 for div_header in div_expr.search_string(html):
186 print(div_header.body)
187
188 prints::
189
190 1 4 0 1 0
191
192 1 4 0 1 0
193 1,3 2,3 1,1
194 """
195 classattr = f"{namespace}:class" if namespace else "class"
196 return with_attribute(**{classattr: classname})
197
198
199# Compatibility synonyms
200# fmt: off
201replaceWith = replaced_by_pep8("replaceWith", replace_with)
202removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
203withAttribute = replaced_by_pep8("withAttribute", with_attribute)
204withClass = replaced_by_pep8("withClass", with_class)
205matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
206# fmt: on