Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/regex_helper.py: 3%

193 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1""" 

2Functions for reversing a regular expression (used in reverse URL resolving). 

3Used internally by Django and not intended for external use. 

4 

5This is not, and is not intended to be, a complete reg-exp decompiler. It 

6should be good enough for a large class of URLS, however. 

7""" 

8import re 

9 

10from django.utils.functional import SimpleLazyObject 

11 

12# Mapping of an escape character to a representative of that class. So, e.g., 

13# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore 

14# this sequence. Any missing key is mapped to itself. 

15ESCAPE_MAPPINGS = { 

16 "A": None, 

17 "b": None, 

18 "B": None, 

19 "d": "0", 

20 "D": "x", 

21 "s": " ", 

22 "S": "x", 

23 "w": "x", 

24 "W": "!", 

25 "Z": None, 

26} 

27 

28 

29class Choice(list): 

30 """Represent multiple possibilities at this point in a pattern string.""" 

31 

32 

33class Group(list): 

34 """Represent a capturing group in the pattern string.""" 

35 

36 

37class NonCapture(list): 

38 """Represent a non-capturing group in the pattern string.""" 

39 

40 

41def normalize(pattern): 

42 r""" 

43 Given a reg-exp pattern, normalize it to an iterable of forms that 

44 suffice for reverse matching. This does the following: 

45 

46 (1) For any repeating sections, keeps the minimum number of occurrences 

47 permitted (this means zero for optional groups). 

48 (2) If an optional group includes parameters, include one occurrence of 

49 that group (along with the zero occurrence case from step (1)). 

50 (3) Select the first (essentially an arbitrary) element from any character 

51 class. Select an arbitrary character for any unordered class (e.g. '.' 

52 or '\w') in the pattern. 

53 (4) Ignore look-ahead and look-behind assertions. 

54 (5) Raise an error on any disjunctive ('|') constructs. 

55 

56 Django's URLs for forward resolving are either all positional arguments or 

57 all keyword arguments. That is assumed here, as well. Although reverse 

58 resolving can be done using positional args when keyword args are 

59 specified, the two cannot be mixed in the same reverse() call. 

60 """ 

61 # Do a linear scan to work out the special features of this pattern. The 

62 # idea is that we scan once here and collect all the information we need to 

63 # make future decisions. 

64 result = [] 

65 non_capturing_groups = [] 

66 consume_next = True 

67 pattern_iter = next_char(iter(pattern)) 

68 num_args = 0 

69 

70 # A "while" loop is used here because later on we need to be able to peek 

71 # at the next character and possibly go around without consuming another 

72 # one at the top of the loop. 

73 try: 

74 ch, escaped = next(pattern_iter) 

75 except StopIteration: 

76 return [("", [])] 

77 

78 try: 

79 while True: 

80 if escaped: 

81 result.append(ch) 

82 elif ch == ".": 

83 # Replace "any character" with an arbitrary representative. 

84 result.append(".") 

85 elif ch == "|": 

86 # FIXME: One day we'll should do this, but not in 1.0. 

87 raise NotImplementedError("Awaiting Implementation") 

88 elif ch == "^": 

89 pass 

90 elif ch == "$": 

91 break 

92 elif ch == ")": 

93 # This can only be the end of a non-capturing group, since all 

94 # other unescaped parentheses are handled by the grouping 

95 # section later (and the full group is handled there). 

96 # 

97 # We regroup everything inside the capturing group so that it 

98 # can be quantified, if necessary. 

99 start = non_capturing_groups.pop() 

100 inner = NonCapture(result[start:]) 

101 result = result[:start] + [inner] 

102 elif ch == "[": 

103 # Replace ranges with the first character in the range. 

104 ch, escaped = next(pattern_iter) 

105 result.append(ch) 

106 ch, escaped = next(pattern_iter) 

107 while escaped or ch != "]": 

108 ch, escaped = next(pattern_iter) 

109 elif ch == "(": 

110 # Some kind of group. 

111 ch, escaped = next(pattern_iter) 

112 if ch != "?" or escaped: 

113 # A positional group 

114 name = "_%d" % num_args 

115 num_args += 1 

116 result.append(Group((("%%(%s)s" % name), name))) 

117 walk_to_end(ch, pattern_iter) 

118 else: 

119 ch, escaped = next(pattern_iter) 

120 if ch in "!=<": 

121 # All of these are ignorable. Walk to the end of the 

122 # group. 

123 walk_to_end(ch, pattern_iter) 

124 elif ch == ":": 

125 # Non-capturing group 

126 non_capturing_groups.append(len(result)) 

127 elif ch != "P": 

128 # Anything else, other than a named group, is something 

129 # we cannot reverse. 

130 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch) 

131 else: 

132 ch, escaped = next(pattern_iter) 

133 if ch not in ("<", "="): 

134 raise ValueError( 

135 "Non-reversible reg-exp portion: '(?P%s'" % ch 

136 ) 

137 # We are in a named capturing group. Extra the name and 

138 # then skip to the end. 

139 if ch == "<": 

140 terminal_char = ">" 

141 # We are in a named backreference. 

142 else: 

143 terminal_char = ")" 

144 name = [] 

145 ch, escaped = next(pattern_iter) 

146 while ch != terminal_char: 

147 name.append(ch) 

148 ch, escaped = next(pattern_iter) 

149 param = "".join(name) 

150 # Named backreferences have already consumed the 

151 # parenthesis. 

152 if terminal_char != ")": 

153 result.append(Group((("%%(%s)s" % param), param))) 

154 walk_to_end(ch, pattern_iter) 

155 else: 

156 result.append(Group((("%%(%s)s" % param), None))) 

157 elif ch in "*?+{": 

158 # Quantifiers affect the previous item in the result list. 

159 count, ch = get_quantifier(ch, pattern_iter) 

160 if ch: 

161 # We had to look ahead, but it wasn't need to compute the 

162 # quantifier, so use this character next time around the 

163 # main loop. 

164 consume_next = False 

165 

166 if count == 0: 

167 if contains(result[-1], Group): 

168 # If we are quantifying a capturing group (or 

169 # something containing such a group) and the minimum is 

170 # zero, we must also handle the case of one occurrence 

171 # being present. All the quantifiers (except {0,0}, 

172 # which we conveniently ignore) that have a 0 minimum 

173 # also allow a single occurrence. 

174 result[-1] = Choice([None, result[-1]]) 

175 else: 

176 result.pop() 

177 elif count > 1: 

178 result.extend([result[-1]] * (count - 1)) 

179 else: 

180 # Anything else is a literal. 

181 result.append(ch) 

182 

183 if consume_next: 

184 ch, escaped = next(pattern_iter) 

185 consume_next = True 

186 except StopIteration: 

187 pass 

188 except NotImplementedError: 

189 # A case of using the disjunctive form. No results for you! 

190 return [("", [])] 

191 

192 return list(zip(*flatten_result(result))) 

193 

194 

195def next_char(input_iter): 

196 r""" 

197 An iterator that yields the next character from "pattern_iter", respecting 

198 escape sequences. An escaped character is replaced by a representative of 

199 its class (e.g. \w -> "x"). If the escaped character is one that is 

200 skipped, it is not returned (the next character is returned instead). 

201 

202 Yield the next character, along with a boolean indicating whether it is a 

203 raw (unescaped) character or not. 

204 """ 

205 for ch in input_iter: 

206 if ch != "\\": 

207 yield ch, False 

208 continue 

209 ch = next(input_iter) 

210 representative = ESCAPE_MAPPINGS.get(ch, ch) 

211 if representative is None: 

212 continue 

213 yield representative, True 

214 

215 

216def walk_to_end(ch, input_iter): 

217 """ 

218 The iterator is currently inside a capturing group. Walk to the close of 

219 this group, skipping over any nested groups and handling escaped 

220 parentheses correctly. 

221 """ 

222 if ch == "(": 

223 nesting = 1 

224 else: 

225 nesting = 0 

226 for ch, escaped in input_iter: 

227 if escaped: 

228 continue 

229 elif ch == "(": 

230 nesting += 1 

231 elif ch == ")": 

232 if not nesting: 

233 return 

234 nesting -= 1 

235 

236 

237def get_quantifier(ch, input_iter): 

238 """ 

239 Parse a quantifier from the input, where "ch" is the first character in the 

240 quantifier. 

241 

242 Return the minimum number of occurrences permitted by the quantifier and 

243 either None or the next character from the input_iter if the next character 

244 is not part of the quantifier. 

245 """ 

246 if ch in "*?+": 

247 try: 

248 ch2, escaped = next(input_iter) 

249 except StopIteration: 

250 ch2 = None 

251 if ch2 == "?": 

252 ch2 = None 

253 if ch == "+": 

254 return 1, ch2 

255 return 0, ch2 

256 

257 quant = [] 

258 while ch != "}": 

259 ch, escaped = next(input_iter) 

260 quant.append(ch) 

261 quant = quant[:-1] 

262 values = "".join(quant).split(",") 

263 

264 # Consume the trailing '?', if necessary. 

265 try: 

266 ch, escaped = next(input_iter) 

267 except StopIteration: 

268 ch = None 

269 if ch == "?": 

270 ch = None 

271 return int(values[0]), ch 

272 

273 

274def contains(source, inst): 

275 """ 

276 Return True if the "source" contains an instance of "inst". False, 

277 otherwise. 

278 """ 

279 if isinstance(source, inst): 

280 return True 

281 if isinstance(source, NonCapture): 

282 for elt in source: 

283 if contains(elt, inst): 

284 return True 

285 return False 

286 

287 

288def flatten_result(source): 

289 """ 

290 Turn the given source sequence into a list of reg-exp possibilities and 

291 their arguments. Return a list of strings and a list of argument lists. 

292 Each of the two lists will be of the same length. 

293 """ 

294 if source is None: 

295 return [""], [[]] 

296 if isinstance(source, Group): 

297 if source[1] is None: 

298 params = [] 

299 else: 

300 params = [source[1]] 

301 return [source[0]], [params] 

302 result = [""] 

303 result_args = [[]] 

304 pos = last = 0 

305 for pos, elt in enumerate(source): 

306 if isinstance(elt, str): 

307 continue 

308 piece = "".join(source[last:pos]) 

309 if isinstance(elt, Group): 

310 piece += elt[0] 

311 param = elt[1] 

312 else: 

313 param = None 

314 last = pos + 1 

315 for i in range(len(result)): 

316 result[i] += piece 

317 if param: 

318 result_args[i].append(param) 

319 if isinstance(elt, (Choice, NonCapture)): 

320 if isinstance(elt, NonCapture): 

321 elt = [elt] 

322 inner_result, inner_args = [], [] 

323 for item in elt: 

324 res, args = flatten_result(item) 

325 inner_result.extend(res) 

326 inner_args.extend(args) 

327 new_result = [] 

328 new_args = [] 

329 for item, args in zip(result, result_args): 

330 for i_item, i_args in zip(inner_result, inner_args): 

331 new_result.append(item + i_item) 

332 new_args.append(args[:] + i_args) 

333 result = new_result 

334 result_args = new_args 

335 if pos >= last: 

336 piece = "".join(source[last:]) 

337 for i in range(len(result)): 

338 result[i] += piece 

339 return result, result_args 

340 

341 

342def _lazy_re_compile(regex, flags=0): 

343 """Lazily compile a regex with flags.""" 

344 

345 def _compile(): 

346 # Compile the regex if it was not passed pre-compiled. 

347 if isinstance(regex, (str, bytes)): 

348 return re.compile(regex, flags) 

349 else: 

350 assert not flags, "flags must be empty if regex is passed pre-compiled" 

351 return regex 

352 

353 return SimpleLazyObject(_compile)