Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/django/utils/regex_helper.py: 3%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

194 statements  

1""" 

2Functions for reversing a regular expression (used in reverse URL resolving). 

3Used internally by Django and not intended for external use. 

4 

5This is not, and is not intended to be, a complete reg-exp decompiler. It 

6should be good enough for a large class of URLS, however. 

7""" 

8 

9import re 

10 

11from django.utils.functional import SimpleLazyObject 

12 

13# Mapping of an escape character to a representative of that class. So, e.g., 

14# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore 

15# this sequence. Any missing key is mapped to itself. 

16ESCAPE_MAPPINGS = { 

17 "A": None, 

18 "b": None, 

19 "B": None, 

20 "d": "0", 

21 "D": "x", 

22 "s": " ", 

23 "S": "x", 

24 "w": "x", 

25 "W": "!", 

26 "Z": None, 

27} 

28 

29 

30class Choice(list): 

31 """Represent multiple possibilities at this point in a pattern string.""" 

32 

33 

34class Group(list): 

35 """Represent a capturing group in the pattern string.""" 

36 

37 

38class NonCapture(list): 

39 """Represent a non-capturing group in the pattern string.""" 

40 

41 

42def normalize(pattern): 

43 r""" 

44 Given a reg-exp pattern, normalize it to an iterable of forms that 

45 suffice for reverse matching. This does the following: 

46 

47 (1) For any repeating sections, keeps the minimum number of occurrences 

48 permitted (this means zero for optional groups). 

49 (2) If an optional group includes parameters, include one occurrence of 

50 that group (along with the zero occurrence case from step (1)). 

51 (3) Select the first (essentially an arbitrary) element from any character 

52 class. Select an arbitrary character for any unordered class (e.g. '.' 

53 or '\w') in the pattern. 

54 (4) Ignore look-ahead and look-behind assertions. 

55 (5) Raise an error on any disjunctive ('|') constructs. 

56 

57 Django's URLs for forward resolving are either all positional arguments or 

58 all keyword arguments. That is assumed here, as well. Although reverse 

59 resolving can be done using positional args when keyword args are 

60 specified, the two cannot be mixed in the same reverse() call. 

61 """ 

62 # Do a linear scan to work out the special features of this pattern. The 

63 # idea is that we scan once here and collect all the information we need to 

64 # make future decisions. 

65 result = [] 

66 non_capturing_groups = [] 

67 consume_next = True 

68 pattern_iter = next_char(iter(pattern)) 

69 num_args = 0 

70 

71 # A "while" loop is used here because later on we need to be able to peek 

72 # at the next character and possibly go around without consuming another 

73 # one at the top of the loop. 

74 try: 

75 ch, escaped = next(pattern_iter) 

76 except StopIteration: 

77 return [("", [])] 

78 

79 try: 

80 while True: 

81 if escaped: 

82 result.append(ch) 

83 elif ch == ".": 

84 # Replace "any character" with an arbitrary representative. 

85 result.append(".") 

86 elif ch == "|": 

87 # FIXME: One day we'll should do this, but not in 1.0. 

88 raise NotImplementedError("Awaiting Implementation") 

89 elif ch == "^": 

90 pass 

91 elif ch == "$": 

92 break 

93 elif ch == ")": 

94 # This can only be the end of a non-capturing group, since all 

95 # other unescaped parentheses are handled by the grouping 

96 # section later (and the full group is handled there). 

97 # 

98 # We regroup everything inside the capturing group so that it 

99 # can be quantified, if necessary. 

100 start = non_capturing_groups.pop() 

101 inner = NonCapture(result[start:]) 

102 result = result[:start] + [inner] 

103 elif ch == "[": 

104 # Replace ranges with the first character in the range. 

105 ch, escaped = next(pattern_iter) 

106 result.append(ch) 

107 ch, escaped = next(pattern_iter) 

108 while escaped or ch != "]": 

109 ch, escaped = next(pattern_iter) 

110 elif ch == "(": 

111 # Some kind of group. 

112 ch, escaped = next(pattern_iter) 

113 if ch != "?" or escaped: 

114 # A positional group 

115 name = "_%d" % num_args 

116 num_args += 1 

117 result.append(Group((("%%(%s)s" % name), name))) 

118 walk_to_end(ch, pattern_iter) 

119 else: 

120 ch, escaped = next(pattern_iter) 

121 if ch in "!=<": 

122 # All of these are ignorable. Walk to the end of the 

123 # group. 

124 walk_to_end(ch, pattern_iter) 

125 elif ch == ":": 

126 # Non-capturing group 

127 non_capturing_groups.append(len(result)) 

128 elif ch != "P": 

129 # Anything else, other than a named group, is something 

130 # we cannot reverse. 

131 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch) 

132 else: 

133 ch, escaped = next(pattern_iter) 

134 if ch not in ("<", "="): 

135 raise ValueError( 

136 "Non-reversible reg-exp portion: '(?P%s'" % ch 

137 ) 

138 # We are in a named capturing group. Extra the name and 

139 # then skip to the end. 

140 if ch == "<": 

141 terminal_char = ">" 

142 # We are in a named backreference. 

143 else: 

144 terminal_char = ")" 

145 name = [] 

146 ch, escaped = next(pattern_iter) 

147 while ch != terminal_char: 

148 name.append(ch) 

149 ch, escaped = next(pattern_iter) 

150 param = "".join(name) 

151 # Named backreferences have already consumed the 

152 # parenthesis. 

153 if terminal_char != ")": 

154 result.append(Group((("%%(%s)s" % param), param))) 

155 walk_to_end(ch, pattern_iter) 

156 else: 

157 result.append(Group((("%%(%s)s" % param), None))) 

158 elif ch in "*?+{": 

159 # Quantifiers affect the previous item in the result list. 

160 count, ch = get_quantifier(ch, pattern_iter) 

161 if ch: 

162 # We had to look ahead, but it wasn't need to compute the 

163 # quantifier, so use this character next time around the 

164 # main loop. 

165 consume_next = False 

166 

167 if count == 0: 

168 if contains(result[-1], Group): 

169 # If we are quantifying a capturing group (or 

170 # something containing such a group) and the minimum is 

171 # zero, we must also handle the case of one occurrence 

172 # being present. All the quantifiers (except {0,0}, 

173 # which we conveniently ignore) that have a 0 minimum 

174 # also allow a single occurrence. 

175 result[-1] = Choice([None, result[-1]]) 

176 else: 

177 result.pop() 

178 elif count > 1: 

179 result.extend([result[-1]] * (count - 1)) 

180 else: 

181 # Anything else is a literal. 

182 result.append(ch) 

183 

184 if consume_next: 

185 ch, escaped = next(pattern_iter) 

186 consume_next = True 

187 except StopIteration: 

188 pass 

189 except NotImplementedError: 

190 # A case of using the disjunctive form. No results for you! 

191 return [("", [])] 

192 

193 return list(zip(*flatten_result(result))) 

194 

195 

196def next_char(input_iter): 

197 r""" 

198 An iterator that yields the next character from "pattern_iter", respecting 

199 escape sequences. An escaped character is replaced by a representative of 

200 its class (e.g. \w -> "x"). If the escaped character is one that is 

201 skipped, it is not returned (the next character is returned instead). 

202 

203 Yield the next character, along with a boolean indicating whether it is a 

204 raw (unescaped) character or not. 

205 """ 

206 for ch in input_iter: 

207 if ch != "\\": 

208 yield ch, False 

209 continue 

210 ch = next(input_iter) 

211 representative = ESCAPE_MAPPINGS.get(ch, ch) 

212 if representative is None: 

213 continue 

214 yield representative, True 

215 

216 

217def walk_to_end(ch, input_iter): 

218 """ 

219 The iterator is currently inside a capturing group. Walk to the close of 

220 this group, skipping over any nested groups and handling escaped 

221 parentheses correctly. 

222 """ 

223 if ch == "(": 

224 nesting = 1 

225 else: 

226 nesting = 0 

227 for ch, escaped in input_iter: 

228 if escaped: 

229 continue 

230 elif ch == "(": 

231 nesting += 1 

232 elif ch == ")": 

233 if not nesting: 

234 return 

235 nesting -= 1 

236 

237 

238def get_quantifier(ch, input_iter): 

239 """ 

240 Parse a quantifier from the input, where "ch" is the first character in the 

241 quantifier. 

242 

243 Return the minimum number of occurrences permitted by the quantifier and 

244 either None or the next character from the input_iter if the next character 

245 is not part of the quantifier. 

246 """ 

247 if ch in "*?+": 

248 try: 

249 ch2, escaped = next(input_iter) 

250 except StopIteration: 

251 ch2 = None 

252 if ch2 == "?": 

253 ch2 = None 

254 if ch == "+": 

255 return 1, ch2 

256 return 0, ch2 

257 

258 quant = [] 

259 while ch != "}": 

260 ch, escaped = next(input_iter) 

261 quant.append(ch) 

262 quant = quant[:-1] 

263 values = "".join(quant).split(",") 

264 

265 # Consume the trailing '?', if necessary. 

266 try: 

267 ch, escaped = next(input_iter) 

268 except StopIteration: 

269 ch = None 

270 if ch == "?": 

271 ch = None 

272 return int(values[0]), ch 

273 

274 

275def contains(source, inst): 

276 """ 

277 Return True if the "source" contains an instance of "inst". False, 

278 otherwise. 

279 """ 

280 if isinstance(source, inst): 

281 return True 

282 if isinstance(source, NonCapture): 

283 for elt in source: 

284 if contains(elt, inst): 

285 return True 

286 return False 

287 

288 

289def flatten_result(source): 

290 """ 

291 Turn the given source sequence into a list of reg-exp possibilities and 

292 their arguments. Return a list of strings and a list of argument lists. 

293 Each of the two lists will be of the same length. 

294 """ 

295 if source is None: 

296 return [""], [[]] 

297 if isinstance(source, Group): 

298 if source[1] is None: 

299 params = [] 

300 else: 

301 params = [source[1]] 

302 return [source[0]], [params] 

303 result = [""] 

304 result_args = [[]] 

305 pos = last = 0 

306 for pos, elt in enumerate(source): 

307 if isinstance(elt, str): 

308 continue 

309 piece = "".join(source[last:pos]) 

310 if isinstance(elt, Group): 

311 piece += elt[0] 

312 param = elt[1] 

313 else: 

314 param = None 

315 last = pos + 1 

316 for i in range(len(result)): 

317 result[i] += piece 

318 if param: 

319 result_args[i].append(param) 

320 if isinstance(elt, (Choice, NonCapture)): 

321 if isinstance(elt, NonCapture): 

322 elt = [elt] 

323 inner_result, inner_args = [], [] 

324 for item in elt: 

325 res, args = flatten_result(item) 

326 inner_result.extend(res) 

327 inner_args.extend(args) 

328 new_result = [] 

329 new_args = [] 

330 for item, args in zip(result, result_args): 

331 for i_item, i_args in zip(inner_result, inner_args): 

332 new_result.append(item + i_item) 

333 new_args.append(args[:] + i_args) 

334 result = new_result 

335 result_args = new_args 

336 if pos >= last: 

337 piece = "".join(source[last:]) 

338 for i in range(len(result)): 

339 result[i] += piece 

340 return result, result_args 

341 

342 

343def _lazy_re_compile(regex, flags=0): 

344 """Lazily compile a regex with flags.""" 

345 

346 def _compile(): 

347 # Compile the regex if it was not passed pre-compiled. 

348 if isinstance(regex, (str, bytes)): 

349 return re.compile(regex, flags) 

350 else: 

351 assert not flags, "flags must be empty if regex is passed pre-compiled" 

352 return regex 

353 

354 return SimpleLazyObject(_compile)