Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/xml/etree/ElementPath.py: 25%

259 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1# 

2# ElementTree 

3# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 

4# 

5# limited xpath support for element trees 

6# 

7# history: 

8# 2003-05-23 fl created 

9# 2003-05-28 fl added support for // etc 

10# 2003-08-27 fl fixed parsing of periods in element names 

11# 2007-09-10 fl new selection engine 

12# 2007-09-12 fl fixed parent selector 

13# 2007-09-13 fl added iterfind; changed findall to return a list 

14# 2007-11-30 fl added namespaces support 

15# 2009-10-30 fl added child element value filter 

16# 

17# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 

18# 

19# fredrik@pythonware.com 

20# http://www.pythonware.com 

21# 

22# -------------------------------------------------------------------- 

23# The ElementTree toolkit is 

24# 

25# Copyright (c) 1999-2009 by Fredrik Lundh 

26# 

27# By obtaining, using, and/or copying this software and/or its 

28# associated documentation, you agree that you have read, understood, 

29# and will comply with the following terms and conditions: 

30# 

31# Permission to use, copy, modify, and distribute this software and 

32# its associated documentation for any purpose and without fee is 

33# hereby granted, provided that the above copyright notice appears in 

34# all copies, and that both that copyright notice and this permission 

35# notice appear in supporting documentation, and that the name of 

36# Secret Labs AB or the author not be used in advertising or publicity 

37# pertaining to distribution of the software without specific, written 

38# prior permission. 

39# 

40# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 

41# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 

42# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 

43# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 

44# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 

45# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

46# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 

47# OF THIS SOFTWARE. 

48# -------------------------------------------------------------------- 

49 

50# Licensed to PSF under a Contributor Agreement. 

51# See http://www.python.org/psf/license for licensing details. 

52 

53## 

54# Implementation module for XPath support. There's usually no reason 

55# to import this module directly; the <b>ElementTree</b> does this for 

56# you, if needed. 

57## 

58 

59import re 

60 

61xpath_tokenizer_re = re.compile( 

62 r"(" 

63 r"'[^']*'|\"[^\"]*\"|" 

64 r"::|" 

65 r"//?|" 

66 r"\.\.|" 

67 r"\(\)|" 

68 r"[/.*:\[\]\(\)@=])|" 

69 r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 

70 r"\s+" 

71 ) 

72 

73def xpath_tokenizer(pattern, namespaces=None): 

74 default_namespace = namespaces.get('') if namespaces else None 

75 parsing_attribute = False 

76 for token in xpath_tokenizer_re.findall(pattern): 

77 ttype, tag = token 

78 if tag and tag[0] != "{": 

79 if ":" in tag: 

80 prefix, uri = tag.split(":", 1) 

81 try: 

82 if not namespaces: 

83 raise KeyError 

84 yield ttype, "{%s}%s" % (namespaces[prefix], uri) 

85 except KeyError: 

86 raise SyntaxError("prefix %r not found in prefix map" % prefix) from None 

87 elif default_namespace and not parsing_attribute: 

88 yield ttype, "{%s}%s" % (default_namespace, tag) 

89 else: 

90 yield token 

91 parsing_attribute = False 

92 else: 

93 yield token 

94 parsing_attribute = ttype == '@' 

95 

96 

97def get_parent_map(context): 

98 parent_map = context.parent_map 

99 if parent_map is None: 

100 context.parent_map = parent_map = {} 

101 for p in context.root.iter(): 

102 for e in p: 

103 parent_map[e] = p 

104 return parent_map 

105 

106 

107def _is_wildcard_tag(tag): 

108 return tag[:3] == '{*}' or tag[-2:] == '}*' 

109 

110 

111def _prepare_tag(tag): 

112 _isinstance, _str = isinstance, str 

113 if tag == '{*}*': 

114 # Same as '*', but no comments or processing instructions. 

115 # It can be a surprise that '*' includes those, but there is no 

116 # justification for '{*}*' doing the same. 

117 def select(context, result): 

118 for elem in result: 

119 if _isinstance(elem.tag, _str): 

120 yield elem 

121 elif tag == '{}*': 

122 # Any tag that is not in a namespace. 

123 def select(context, result): 

124 for elem in result: 

125 el_tag = elem.tag 

126 if _isinstance(el_tag, _str) and el_tag[0] != '{': 

127 yield elem 

128 elif tag[:3] == '{*}': 

129 # The tag in any (or no) namespace. 

130 suffix = tag[2:] # '}name' 

131 no_ns = slice(-len(suffix), None) 

132 tag = tag[3:] 

133 def select(context, result): 

134 for elem in result: 

135 el_tag = elem.tag 

136 if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix: 

137 yield elem 

138 elif tag[-2:] == '}*': 

139 # Any tag in the given namespace. 

140 ns = tag[:-1] 

141 ns_only = slice(None, len(ns)) 

142 def select(context, result): 

143 for elem in result: 

144 el_tag = elem.tag 

145 if _isinstance(el_tag, _str) and el_tag[ns_only] == ns: 

146 yield elem 

147 else: 

148 raise RuntimeError(f"internal parser error, got {tag}") 

149 return select 

150 

151 

152def prepare_child(next, token): 

153 tag = token[1] 

154 if _is_wildcard_tag(tag): 

155 select_tag = _prepare_tag(tag) 

156 def select(context, result): 

157 def select_child(result): 

158 for elem in result: 

159 yield from elem 

160 return select_tag(context, select_child(result)) 

161 else: 

162 if tag[:2] == '{}': 

163 tag = tag[2:] # '{}tag' == 'tag' 

164 def select(context, result): 

165 for elem in result: 

166 for e in elem: 

167 if e.tag == tag: 

168 yield e 

169 return select 

170 

171def prepare_star(next, token): 

172 def select(context, result): 

173 for elem in result: 

174 yield from elem 

175 return select 

176 

177def prepare_self(next, token): 

178 def select(context, result): 

179 yield from result 

180 return select 

181 

182def prepare_descendant(next, token): 

183 try: 

184 token = next() 

185 except StopIteration: 

186 return 

187 if token[0] == "*": 

188 tag = "*" 

189 elif not token[0]: 

190 tag = token[1] 

191 else: 

192 raise SyntaxError("invalid descendant") 

193 

194 if _is_wildcard_tag(tag): 

195 select_tag = _prepare_tag(tag) 

196 def select(context, result): 

197 def select_child(result): 

198 for elem in result: 

199 for e in elem.iter(): 

200 if e is not elem: 

201 yield e 

202 return select_tag(context, select_child(result)) 

203 else: 

204 if tag[:2] == '{}': 

205 tag = tag[2:] # '{}tag' == 'tag' 

206 def select(context, result): 

207 for elem in result: 

208 for e in elem.iter(tag): 

209 if e is not elem: 

210 yield e 

211 return select 

212 

213def prepare_parent(next, token): 

214 def select(context, result): 

215 # FIXME: raise error if .. is applied at toplevel? 

216 parent_map = get_parent_map(context) 

217 result_map = {} 

218 for elem in result: 

219 if elem in parent_map: 

220 parent = parent_map[elem] 

221 if parent not in result_map: 

222 result_map[parent] = None 

223 yield parent 

224 return select 

225 

226def prepare_predicate(next, token): 

227 # FIXME: replace with real parser!!! refs: 

228 # http://effbot.org/zone/simple-iterator-parser.htm 

229 # http://javascript.crockford.com/tdop/tdop.html 

230 signature = [] 

231 predicate = [] 

232 while 1: 

233 try: 

234 token = next() 

235 except StopIteration: 

236 return 

237 if token[0] == "]": 

238 break 

239 if token == ('', ''): 

240 # ignore whitespace 

241 continue 

242 if token[0] and token[0][:1] in "'\"": 

243 token = "'", token[0][1:-1] 

244 signature.append(token[0] or "-") 

245 predicate.append(token[1]) 

246 signature = "".join(signature) 

247 # use signature to determine predicate type 

248 if signature == "@-": 

249 # [@attribute] predicate 

250 key = predicate[1] 

251 def select(context, result): 

252 for elem in result: 

253 if elem.get(key) is not None: 

254 yield elem 

255 return select 

256 if signature == "@-='": 

257 # [@attribute='value'] 

258 key = predicate[1] 

259 value = predicate[-1] 

260 def select(context, result): 

261 for elem in result: 

262 if elem.get(key) == value: 

263 yield elem 

264 return select 

265 if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): 

266 # [tag] 

267 tag = predicate[0] 

268 def select(context, result): 

269 for elem in result: 

270 if elem.find(tag) is not None: 

271 yield elem 

272 return select 

273 if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): 

274 # [.='value'] or [tag='value'] 

275 tag = predicate[0] 

276 value = predicate[-1] 

277 if tag: 

278 def select(context, result): 

279 for elem in result: 

280 for e in elem.findall(tag): 

281 if "".join(e.itertext()) == value: 

282 yield elem 

283 break 

284 else: 

285 def select(context, result): 

286 for elem in result: 

287 if "".join(elem.itertext()) == value: 

288 yield elem 

289 return select 

290 if signature == "-" or signature == "-()" or signature == "-()-": 

291 # [index] or [last()] or [last()-index] 

292 if signature == "-": 

293 # [index] 

294 index = int(predicate[0]) - 1 

295 if index < 0: 

296 raise SyntaxError("XPath position >= 1 expected") 

297 else: 

298 if predicate[0] != "last": 

299 raise SyntaxError("unsupported function") 

300 if signature == "-()-": 

301 try: 

302 index = int(predicate[2]) - 1 

303 except ValueError: 

304 raise SyntaxError("unsupported expression") 

305 if index > -2: 

306 raise SyntaxError("XPath offset from last() must be negative") 

307 else: 

308 index = -1 

309 def select(context, result): 

310 parent_map = get_parent_map(context) 

311 for elem in result: 

312 try: 

313 parent = parent_map[elem] 

314 # FIXME: what if the selector is "*" ? 

315 elems = list(parent.findall(elem.tag)) 

316 if elems[index] is elem: 

317 yield elem 

318 except (IndexError, KeyError): 

319 pass 

320 return select 

321 raise SyntaxError("invalid predicate") 

322 

323ops = { 

324 "": prepare_child, 

325 "*": prepare_star, 

326 ".": prepare_self, 

327 "..": prepare_parent, 

328 "//": prepare_descendant, 

329 "[": prepare_predicate, 

330 } 

331 

332_cache = {} 

333 

334class _SelectorContext: 

335 parent_map = None 

336 def __init__(self, root): 

337 self.root = root 

338 

339# -------------------------------------------------------------------- 

340 

341## 

342# Generate all matching objects. 

343 

344def iterfind(elem, path, namespaces=None): 

345 # compile selector pattern 

346 if path[-1:] == "/": 

347 path = path + "*" # implicit all (FIXME: keep this?) 

348 

349 cache_key = (path,) 

350 if namespaces: 

351 cache_key += tuple(sorted(namespaces.items())) 

352 

353 try: 

354 selector = _cache[cache_key] 

355 except KeyError: 

356 if len(_cache) > 100: 

357 _cache.clear() 

358 if path[:1] == "/": 

359 raise SyntaxError("cannot use absolute path on element") 

360 next = iter(xpath_tokenizer(path, namespaces)).__next__ 

361 try: 

362 token = next() 

363 except StopIteration: 

364 return 

365 selector = [] 

366 while 1: 

367 try: 

368 selector.append(ops[token[0]](next, token)) 

369 except StopIteration: 

370 raise SyntaxError("invalid path") from None 

371 try: 

372 token = next() 

373 if token[0] == "/": 

374 token = next() 

375 except StopIteration: 

376 break 

377 _cache[cache_key] = selector 

378 # execute selector pattern 

379 result = [elem] 

380 context = _SelectorContext(elem) 

381 for select in selector: 

382 result = select(context, result) 

383 return result 

384 

385## 

386# Find first matching object. 

387 

388def find(elem, path, namespaces=None): 

389 return next(iterfind(elem, path, namespaces), None) 

390 

391## 

392# Find all matching objects. 

393 

394def findall(elem, path, namespaces=None): 

395 return list(iterfind(elem, path, namespaces)) 

396 

397## 

398# Find text for first matching object. 

399 

400def findtext(elem, path, default=None, namespaces=None): 

401 try: 

402 elem = next(iterfind(elem, path, namespaces)) 

403 return elem.text or "" 

404 except StopIteration: 

405 return default