1# actions.py
2from __future__ import annotations
3
4from typing import Union, Callable, Any
5
6from .exceptions import ParseException
7from .util import col, replaced_by_pep8
8from .results import ParseResults
9
10
11ParseAction = Union[
12 Callable[[], Any],
13 Callable[[ParseResults], Any],
14 Callable[[int, ParseResults], Any],
15 Callable[[str, int, ParseResults], Any],
16]
17
18
19class OnlyOnce:
20 """
21 Wrapper for parse actions, to ensure they are only called once.
22 Note: parse action signature must include all 3 arguments.
23 """
24
25 def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None:
26 from .core import _trim_arity
27
28 self.callable = _trim_arity(method_call)
29 self.called = False
30
31 def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults:
32 if not self.called:
33 results = self.callable(s, l, t)
34 self.called = True
35 return results
36 raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
37
38 def reset(self):
39 """
40 Allow the associated parse action to be called once more.
41 """
42
43 self.called = False
44
45
46def match_only_at_col(n: int) -> ParseAction:
47 """
48 Helper method for defining parse actions that require matching at
49 a specific column in the input text.
50 """
51
52 def verify_col(strg: str, locn: int, toks: ParseResults) -> None:
53 if col(locn, strg) != n:
54 raise ParseException(strg, locn, f"matched token not at column {n}")
55
56 return verify_col
57
58
59def replace_with(repl_str: Any) -> ParseAction:
60 """
61 Helper method for common parse actions that simply return
62 a literal value. Especially useful when used with
63 :meth:`~ParserElement.transform_string`.
64
65 Example:
66
67 .. doctest::
68
69 >>> num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
70 >>> na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
71 >>> term = na | num
72
73 >>> term[1, ...].parse_string("324 234 N/A 234")
74 ParseResults([324, 234, nan, 234], {})
75 """
76 return lambda s, l, t: [repl_str]
77
78
79def remove_quotes(s: str, l: int, t: ParseResults) -> Any:
80 r"""
81 Helper parse action for removing quotation marks from parsed
82 quoted strings, that use a single character for quoting. For parsing
83 strings that may have multiple characters, use the :class:`QuotedString`
84 class.
85
86 Example:
87
88 .. doctest::
89
90 >>> # by default, quotation marks are included in parsed results
91 >>> quoted_string.parse_string("'Now is the Winter of our Discontent'")
92 ParseResults(["'Now is the Winter of our Discontent'"], {})
93
94 >>> # use remove_quotes to strip quotation marks from parsed results
95 >>> dequoted = quoted_string().set_parse_action(remove_quotes)
96 >>> dequoted.parse_string("'Now is the Winter of our Discontent'")
97 ParseResults(['Now is the Winter of our Discontent'], {})
98 """
99 return t[0][1:-1]
100
101
102def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction:
103 """
104 Helper to create a validating parse action to be used with start
105 tags created with :class:`make_xml_tags` or
106 :class:`make_html_tags`. Use ``with_attribute`` to qualify
107 a starting tag with a required attribute value, to avoid false
108 matches on common tags such as ``<TD>`` or ``<DIV>``.
109
110 Call ``with_attribute`` with a series of attribute names and
111 values. Specify the list of filter attributes names and values as:
112
113 - keyword arguments, as in ``(align="right")``, or
114 - as an explicit dict with ``**`` operator, when an attribute
115 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
116 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
117
118 For attribute names with a namespace prefix, you must use the second
119 form. Attribute names are matched insensitive to upper/lower case.
120
121 If just testing for ``class`` (with or without a namespace), use
122 :class:`with_class`.
123
124 To verify that the attribute exists, but without specifying a value,
125 pass ``with_attribute.ANY_VALUE`` as the value.
126
127 The next two examples use the following input data and tag parsers:
128
129 .. testcode::
130
131 html = '''
132 <div>
133 Some text
134 <div type="grid">1 4 0 1 0</div>
135 <div type="graph">1,3 2,3 1,1</div>
136 <div>this has no type</div>
137 </div>
138 '''
139 div,div_end = make_html_tags("div")
140
141 Only match div tag having a type attribute with value "grid":
142
143 .. testcode::
144
145 div_grid = div().set_parse_action(with_attribute(type="grid"))
146 grid_expr = div_grid + SkipTo(div | div_end)("body")
147 for grid_header in grid_expr.search_string(html):
148 print(grid_header.body)
149
150 prints:
151
152 .. testoutput::
153
154 1 4 0 1 0
155
156 Construct a match with any div tag having a type attribute,
157 regardless of the value:
158
159 .. testcode::
160
161 div_any_type = div().set_parse_action(
162 with_attribute(type=with_attribute.ANY_VALUE)
163 )
164 div_expr = div_any_type + SkipTo(div | div_end)("body")
165 for div_header in div_expr.search_string(html):
166 print(div_header.body)
167
168 prints:
169
170 .. testoutput::
171
172 1 4 0 1 0
173 1,3 2,3 1,1
174 """
175 attrs_list: list[tuple[str, str]] = []
176 if args:
177 attrs_list.extend(args)
178 else:
179 attrs_list.extend(attr_dict.items())
180
181 def pa(s: str, l: int, tokens: ParseResults) -> None:
182 for attrName, attrValue in attrs_list:
183 if attrName not in tokens:
184 raise ParseException(s, l, "no matching attribute " + attrName)
185 if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined]
186 raise ParseException(
187 s,
188 l,
189 f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
190 )
191
192 return pa
193
194
195with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
196"Value to use with :class:`with_attribute` parse action, to match any value, as long as the attribute is present"
197
198
199def with_class(classname: str, namespace: str = "") -> ParseAction:
200 """
201 Simplified version of :meth:`with_attribute` when
202 matching on a div class - made difficult because ``class`` is
203 a reserved word in Python.
204
205 Using similar input data to the :meth:`with_attribute` examples:
206
207 .. testcode::
208
209 html = '''
210 <div>
211 Some text
212 <div class="grid">1 4 0 1 0</div>
213 <div class="graph">1,3 2,3 1,1</div>
214 <div>this <div> has no class</div>
215 </div>
216 '''
217 div,div_end = make_html_tags("div")
218
219 Only match div tag having the "grid" class:
220
221 .. testcode::
222
223 div_grid = div().set_parse_action(with_class("grid"))
224 grid_expr = div_grid + SkipTo(div | div_end)("body")
225 for grid_header in grid_expr.search_string(html):
226 print(grid_header.body)
227
228 prints:
229
230 .. testoutput::
231
232 1 4 0 1 0
233
234 Construct a match with any div tag having a class attribute,
235 regardless of the value:
236
237 .. testcode::
238
239 div_any_type = div().set_parse_action(
240 with_class(withAttribute.ANY_VALUE)
241 )
242 div_expr = div_any_type + SkipTo(div | div_end)("body")
243 for div_header in div_expr.search_string(html):
244 print(div_header.body)
245
246 prints:
247
248 .. testoutput::
249
250 1 4 0 1 0
251 1,3 2,3 1,1
252 """
253 classattr = f"{namespace}:class" if namespace else "class"
254 return with_attribute(**{classattr: classname})
255
256
257# Compatibility synonyms
258# fmt: off
259replaceWith = replaced_by_pep8("replaceWith", replace_with)
260removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
261withAttribute = replaced_by_pep8("withAttribute", with_attribute)
262withClass = replaced_by_pep8("withClass", with_class)
263matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
264# fmt: on