Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/regex_helper.py: 3%
193 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 06:13 +0000
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 06:13 +0000
1"""
2Functions for reversing a regular expression (used in reverse URL resolving).
3Used internally by Django and not intended for external use.
5This is not, and is not intended to be, a complete reg-exp decompiler. It
6should be good enough for a large class of URLS, however.
7"""
8import re
10from django.utils.functional import SimpleLazyObject
12# Mapping of an escape character to a representative of that class. So, e.g.,
13# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
14# this sequence. Any missing key is mapped to itself.
15ESCAPE_MAPPINGS = {
16 "A": None,
17 "b": None,
18 "B": None,
19 "d": "0",
20 "D": "x",
21 "s": " ",
22 "S": "x",
23 "w": "x",
24 "W": "!",
25 "Z": None,
26}
29class Choice(list):
30 """Represent multiple possibilities at this point in a pattern string."""
33class Group(list):
34 """Represent a capturing group in the pattern string."""
37class NonCapture(list):
38 """Represent a non-capturing group in the pattern string."""
41def normalize(pattern):
42 r"""
43 Given a reg-exp pattern, normalize it to an iterable of forms that
44 suffice for reverse matching. This does the following:
46 (1) For any repeating sections, keeps the minimum number of occurrences
47 permitted (this means zero for optional groups).
48 (2) If an optional group includes parameters, include one occurrence of
49 that group (along with the zero occurrence case from step (1)).
50 (3) Select the first (essentially an arbitrary) element from any character
51 class. Select an arbitrary character for any unordered class (e.g. '.'
52 or '\w') in the pattern.
53 (4) Ignore look-ahead and look-behind assertions.
54 (5) Raise an error on any disjunctive ('|') constructs.
56 Django's URLs for forward resolving are either all positional arguments or
57 all keyword arguments. That is assumed here, as well. Although reverse
58 resolving can be done using positional args when keyword args are
59 specified, the two cannot be mixed in the same reverse() call.
60 """
61 # Do a linear scan to work out the special features of this pattern. The
62 # idea is that we scan once here and collect all the information we need to
63 # make future decisions.
64 result = []
65 non_capturing_groups = []
66 consume_next = True
67 pattern_iter = next_char(iter(pattern))
68 num_args = 0
70 # A "while" loop is used here because later on we need to be able to peek
71 # at the next character and possibly go around without consuming another
72 # one at the top of the loop.
73 try:
74 ch, escaped = next(pattern_iter)
75 except StopIteration:
76 return [("", [])]
78 try:
79 while True:
80 if escaped:
81 result.append(ch)
82 elif ch == ".":
83 # Replace "any character" with an arbitrary representative.
84 result.append(".")
85 elif ch == "|":
86 # FIXME: One day we'll should do this, but not in 1.0.
87 raise NotImplementedError("Awaiting Implementation")
88 elif ch == "^":
89 pass
90 elif ch == "$":
91 break
92 elif ch == ")":
93 # This can only be the end of a non-capturing group, since all
94 # other unescaped parentheses are handled by the grouping
95 # section later (and the full group is handled there).
96 #
97 # We regroup everything inside the capturing group so that it
98 # can be quantified, if necessary.
99 start = non_capturing_groups.pop()
100 inner = NonCapture(result[start:])
101 result = result[:start] + [inner]
102 elif ch == "[":
103 # Replace ranges with the first character in the range.
104 ch, escaped = next(pattern_iter)
105 result.append(ch)
106 ch, escaped = next(pattern_iter)
107 while escaped or ch != "]":
108 ch, escaped = next(pattern_iter)
109 elif ch == "(":
110 # Some kind of group.
111 ch, escaped = next(pattern_iter)
112 if ch != "?" or escaped:
113 # A positional group
114 name = "_%d" % num_args
115 num_args += 1
116 result.append(Group((("%%(%s)s" % name), name)))
117 walk_to_end(ch, pattern_iter)
118 else:
119 ch, escaped = next(pattern_iter)
120 if ch in "!=<":
121 # All of these are ignorable. Walk to the end of the
122 # group.
123 walk_to_end(ch, pattern_iter)
124 elif ch == ":":
125 # Non-capturing group
126 non_capturing_groups.append(len(result))
127 elif ch != "P":
128 # Anything else, other than a named group, is something
129 # we cannot reverse.
130 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch)
131 else:
132 ch, escaped = next(pattern_iter)
133 if ch not in ("<", "="):
134 raise ValueError(
135 "Non-reversible reg-exp portion: '(?P%s'" % ch
136 )
137 # We are in a named capturing group. Extra the name and
138 # then skip to the end.
139 if ch == "<":
140 terminal_char = ">"
141 # We are in a named backreference.
142 else:
143 terminal_char = ")"
144 name = []
145 ch, escaped = next(pattern_iter)
146 while ch != terminal_char:
147 name.append(ch)
148 ch, escaped = next(pattern_iter)
149 param = "".join(name)
150 # Named backreferences have already consumed the
151 # parenthesis.
152 if terminal_char != ")":
153 result.append(Group((("%%(%s)s" % param), param)))
154 walk_to_end(ch, pattern_iter)
155 else:
156 result.append(Group((("%%(%s)s" % param), None)))
157 elif ch in "*?+{":
158 # Quantifiers affect the previous item in the result list.
159 count, ch = get_quantifier(ch, pattern_iter)
160 if ch:
161 # We had to look ahead, but it wasn't need to compute the
162 # quantifier, so use this character next time around the
163 # main loop.
164 consume_next = False
166 if count == 0:
167 if contains(result[-1], Group):
168 # If we are quantifying a capturing group (or
169 # something containing such a group) and the minimum is
170 # zero, we must also handle the case of one occurrence
171 # being present. All the quantifiers (except {0,0},
172 # which we conveniently ignore) that have a 0 minimum
173 # also allow a single occurrence.
174 result[-1] = Choice([None, result[-1]])
175 else:
176 result.pop()
177 elif count > 1:
178 result.extend([result[-1]] * (count - 1))
179 else:
180 # Anything else is a literal.
181 result.append(ch)
183 if consume_next:
184 ch, escaped = next(pattern_iter)
185 consume_next = True
186 except StopIteration:
187 pass
188 except NotImplementedError:
189 # A case of using the disjunctive form. No results for you!
190 return [("", [])]
192 return list(zip(*flatten_result(result)))
195def next_char(input_iter):
196 r"""
197 An iterator that yields the next character from "pattern_iter", respecting
198 escape sequences. An escaped character is replaced by a representative of
199 its class (e.g. \w -> "x"). If the escaped character is one that is
200 skipped, it is not returned (the next character is returned instead).
202 Yield the next character, along with a boolean indicating whether it is a
203 raw (unescaped) character or not.
204 """
205 for ch in input_iter:
206 if ch != "\\":
207 yield ch, False
208 continue
209 ch = next(input_iter)
210 representative = ESCAPE_MAPPINGS.get(ch, ch)
211 if representative is None:
212 continue
213 yield representative, True
216def walk_to_end(ch, input_iter):
217 """
218 The iterator is currently inside a capturing group. Walk to the close of
219 this group, skipping over any nested groups and handling escaped
220 parentheses correctly.
221 """
222 if ch == "(":
223 nesting = 1
224 else:
225 nesting = 0
226 for ch, escaped in input_iter:
227 if escaped:
228 continue
229 elif ch == "(":
230 nesting += 1
231 elif ch == ")":
232 if not nesting:
233 return
234 nesting -= 1
237def get_quantifier(ch, input_iter):
238 """
239 Parse a quantifier from the input, where "ch" is the first character in the
240 quantifier.
242 Return the minimum number of occurrences permitted by the quantifier and
243 either None or the next character from the input_iter if the next character
244 is not part of the quantifier.
245 """
246 if ch in "*?+":
247 try:
248 ch2, escaped = next(input_iter)
249 except StopIteration:
250 ch2 = None
251 if ch2 == "?":
252 ch2 = None
253 if ch == "+":
254 return 1, ch2
255 return 0, ch2
257 quant = []
258 while ch != "}":
259 ch, escaped = next(input_iter)
260 quant.append(ch)
261 quant = quant[:-1]
262 values = "".join(quant).split(",")
264 # Consume the trailing '?', if necessary.
265 try:
266 ch, escaped = next(input_iter)
267 except StopIteration:
268 ch = None
269 if ch == "?":
270 ch = None
271 return int(values[0]), ch
274def contains(source, inst):
275 """
276 Return True if the "source" contains an instance of "inst". False,
277 otherwise.
278 """
279 if isinstance(source, inst):
280 return True
281 if isinstance(source, NonCapture):
282 for elt in source:
283 if contains(elt, inst):
284 return True
285 return False
288def flatten_result(source):
289 """
290 Turn the given source sequence into a list of reg-exp possibilities and
291 their arguments. Return a list of strings and a list of argument lists.
292 Each of the two lists will be of the same length.
293 """
294 if source is None:
295 return [""], [[]]
296 if isinstance(source, Group):
297 if source[1] is None:
298 params = []
299 else:
300 params = [source[1]]
301 return [source[0]], [params]
302 result = [""]
303 result_args = [[]]
304 pos = last = 0
305 for pos, elt in enumerate(source):
306 if isinstance(elt, str):
307 continue
308 piece = "".join(source[last:pos])
309 if isinstance(elt, Group):
310 piece += elt[0]
311 param = elt[1]
312 else:
313 param = None
314 last = pos + 1
315 for i in range(len(result)):
316 result[i] += piece
317 if param:
318 result_args[i].append(param)
319 if isinstance(elt, (Choice, NonCapture)):
320 if isinstance(elt, NonCapture):
321 elt = [elt]
322 inner_result, inner_args = [], []
323 for item in elt:
324 res, args = flatten_result(item)
325 inner_result.extend(res)
326 inner_args.extend(args)
327 new_result = []
328 new_args = []
329 for item, args in zip(result, result_args):
330 for i_item, i_args in zip(inner_result, inner_args):
331 new_result.append(item + i_item)
332 new_args.append(args[:] + i_args)
333 result = new_result
334 result_args = new_args
335 if pos >= last:
336 piece = "".join(source[last:])
337 for i in range(len(result)):
338 result[i] += piece
339 return result, result_args
342def _lazy_re_compile(regex, flags=0):
343 """Lazily compile a regex with flags."""
345 def _compile():
346 # Compile the regex if it was not passed pre-compiled.
347 if isinstance(regex, (str, bytes)):
348 return re.compile(regex, flags)
349 else:
350 assert not flags, "flags must be empty if regex is passed pre-compiled"
351 return regex
353 return SimpleLazyObject(_compile)