Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/xml/etree/ElementPath.py: 25%

2# ElementTree

3# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $

5# limited xpath support for element trees

7# history:

8# 2003-05-23 fl created

9# 2003-05-28 fl added support for // etc

10# 2003-08-27 fl fixed parsing of periods in element names

11# 2007-09-10 fl new selection engine

12# 2007-09-12 fl fixed parent selector

13# 2007-09-13 fl added iterfind; changed findall to return a list

14# 2007-11-30 fl added namespaces support

15# 2009-10-30 fl added child element value filter

16#

18#

19# fredrik@pythonware.com

20# http://www.pythonware.com

21#

22# --------------------------------------------------------------------

23# The ElementTree toolkit is

24#

26#

27# By obtaining, using, and/or copying this software and/or its

28# associated documentation, you agree that you have read, understood,

29# and will comply with the following terms and conditions:

30#

31# Permission to use, copy, modify, and distribute this software and

32# its associated documentation for any purpose and without fee is

33# hereby granted, provided that the above copyright notice appears in

34# all copies, and that both that copyright notice and this permission

35# notice appear in supporting documentation, and that the name of

36# Secret Labs AB or the author not be used in advertising or publicity

37# pertaining to distribution of the software without specific, written

38# prior permission.

39#

40# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD

41# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-

42# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR

43# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY

44# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

45# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

46# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

47# OF THIS SOFTWARE.

48# --------------------------------------------------------------------

50# Licensed to PSF under a Contributor Agreement.

51# See http://www.python.org/psf/license for licensing details.

53##

54# Implementation module for XPath support. There's usually no reason

55# to import this module directly; the <b>ElementTree</b> does this for

56# you, if needed.

57##

59import re

61xpath_tokenizer_re = re.compile(

62 r"("

63 r"'[^']*'|\"[^\"]*\"|"

64 r"::|"

65 r"//?|"

66 r"\.\.|"

67 r"|"

68 r"[/.*:\[\]@=])|"

69 r"((?:\{[^}]+\})?[^/\[\]@=\s]+)|"

70 r"\s+"

71 )

73def xpath_tokenizer(pattern, namespaces=None):

74 default_namespace = namespaces.get('') if namespaces else None

75 parsing_attribute = False

76 for token in xpath_tokenizer_re.findall(pattern):

77 ttype, tag = token

78 if tag and tag[0] != "{":

79 if ":" in tag:

80 prefix, uri = tag.split(":", 1)

81 try:

82 if not namespaces:

83 raise KeyError

84 yield ttype, "{%s}%s" % (namespaces[prefix], uri)

85 except KeyError:

86 raise SyntaxError("prefix %r not found in prefix map" % prefix) from None

87 elif default_namespace and not parsing_attribute:

88 yield ttype, "{%s}%s" % (default_namespace, tag)

89 else:

90 yield token

91 parsing_attribute = False

92 else:

93 yield token

94 parsing_attribute = ttype == '@'

97def get_parent_map(context):

98 parent_map = context.parent_map

99 if parent_map is None:

100 context.parent_map = parent_map = {}

101 for p in context.root.iter():

102 for e in p:

103 parent_map[e] = p

104 return parent_map

105

106

107def _is_wildcard_tag(tag):

108 return tag[:3] == '{*}' or tag[-2:] == '}*'

109

110

111def _prepare_tag(tag):

112 _isinstance, _str = isinstance, str

113 if tag == '{*}*':

114 # Same as '*', but no comments or processing instructions.

115 # It can be a surprise that '*' includes those, but there is no

116 # justification for '{*}*' doing the same.

117 def select(context, result):

118 for elem in result:

119 if _isinstance(elem.tag, _str):

120 yield elem

121 elif tag == '{}*':

122 # Any tag that is not in a namespace.

123 def select(context, result):

124 for elem in result:

125 el_tag = elem.tag

126 if _isinstance(el_tag, _str) and el_tag[0] != '{':

127 yield elem

128 elif tag[:3] == '{*}':

129 # The tag in any (or no) namespace.

130 suffix = tag[2:] # '}name'

131 no_ns = slice(-len(suffix), None)

132 tag = tag[3:]

133 def select(context, result):

134 for elem in result:

135 el_tag = elem.tag

136 if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix:

137 yield elem

138 elif tag[-2:] == '}*':

139 # Any tag in the given namespace.

140 ns = tag[:-1]

141 ns_only = slice(None, len(ns))

142 def select(context, result):

143 for elem in result:

144 el_tag = elem.tag

145 if _isinstance(el_tag, _str) and el_tag[ns_only] == ns:

146 yield elem

147 else:

148 raise RuntimeError(f"internal parser error, got {tag}")

149 return select

150

151

152def prepare_child(next, token):

153 tag = token[1]

154 if _is_wildcard_tag(tag):

155 select_tag = _prepare_tag(tag)

156 def select(context, result):

157 def select_child(result):

158 for elem in result:

159 yield from elem

160 return select_tag(context, select_child(result))

161 else:

162 if tag[:2] == '{}':

163 tag = tag[2:] # '{}tag' == 'tag'

164 def select(context, result):

165 for elem in result:

166 for e in elem:

167 if e.tag == tag:

168 yield e

169 return select

170

171def prepare_star(next, token):

172 def select(context, result):

173 for elem in result:

174 yield from elem

175 return select

176

177def prepare_self(next, token):

178 def select(context, result):

179 yield from result

180 return select

181

182def prepare_descendant(next, token):

183 try:

184 token = next()

185 except StopIteration:

186 return

187 if token[0] == "*":

188 tag = "*"

189 elif not token[0]:

190 tag = token[1]

191 else:

192 raise SyntaxError("invalid descendant")

193

194 if _is_wildcard_tag(tag):

195 select_tag = _prepare_tag(tag)

196 def select(context, result):

197 def select_child(result):

198 for elem in result:

199 for e in elem.iter():

200 if e is not elem:

201 yield e

202 return select_tag(context, select_child(result))

203 else:

204 if tag[:2] == '{}':

205 tag = tag[2:] # '{}tag' == 'tag'

206 def select(context, result):

207 for elem in result:

208 for e in elem.iter(tag):

209 if e is not elem:

210 yield e

211 return select

212

213def prepare_parent(next, token):

214 def select(context, result):

215 # FIXME: raise error if .. is applied at toplevel?

216 parent_map = get_parent_map(context)

217 result_map = {}

218 for elem in result:

219 if elem in parent_map:

220 parent = parent_map[elem]

221 if parent not in result_map:

222 result_map[parent] = None

223 yield parent

224 return select

225

226def prepare_predicate(next, token):

227 # FIXME: replace with real parser!!! refs:

228 # http://effbot.org/zone/simple-iterator-parser.htm

229 # http://javascript.crockford.com/tdop/tdop.html

230 signature = []

231 predicate = []

232 while 1:

233 try:

234 token = next()

235 except StopIteration:

236 return

237 if token[0] == "]":

238 break

239 if token == ('', ''):

240 # ignore whitespace

241 continue

242 if token[0] and token[0][:1] in "'\"":

243 token = "'", token[0][1:-1]

244 signature.append(token[0] or "-")

245 predicate.append(token[1])

246 signature = "".join(signature)

247 # use signature to determine predicate type

248 if signature == "@-":

249 # [@attribute] predicate

250 key = predicate[1]

251 def select(context, result):

252 for elem in result:

253 if elem.get(key) is not None:

254 yield elem

255 return select

256 if signature == "@-='":

257 # [@attribute='value']

258 key = predicate[1]

259 value = predicate[-1]

260 def select(context, result):

261 for elem in result:

262 if elem.get(key) == value:

263 yield elem

264 return select

265 if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):

266 # [tag]

267 tag = predicate[0]

268 def select(context, result):

269 for elem in result:

270 if elem.find(tag) is not None:

271 yield elem

272 return select

273 if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):

274 # [.='value'] or [tag='value']

275 tag = predicate[0]

276 value = predicate[-1]

277 if tag:

278 def select(context, result):

279 for elem in result:

280 for e in elem.findall(tag):

281 if "".join(e.itertext()) == value:

282 yield elem

283 break

284 else:

285 def select(context, result):

286 for elem in result:

287 if "".join(elem.itertext()) == value:

288 yield elem

289 return select

290 if signature == "-" or signature == "-()" or signature == "-()-":

291 # [index] or [last()] or [last()-index]

292 if signature == "-":

293 # [index]

294 index = int(predicate[0]) - 1

295 if index < 0:

296 raise SyntaxError("XPath position >= 1 expected")

297 else:

298 if predicate[0] != "last":

299 raise SyntaxError("unsupported function")

300 if signature == "-()-":

301 try:

302 index = int(predicate[2]) - 1

303 except ValueError:

304 raise SyntaxError("unsupported expression")

305 if index > -2:

306 raise SyntaxError("XPath offset from last() must be negative")

307 else:

308 index = -1

309 def select(context, result):

310 parent_map = get_parent_map(context)

311 for elem in result:

312 try:

313 parent = parent_map[elem]

314 # FIXME: what if the selector is "*" ?

315 elems = list(parent.findall(elem.tag))

316 if elems[index] is elem:

317 yield elem

318 except (IndexError, KeyError):

319 pass

320 return select

321 raise SyntaxError("invalid predicate")

322

323ops = {

324 "": prepare_child,

325 "*": prepare_star,

326 ".": prepare_self,

327 "..": prepare_parent,

328 "//": prepare_descendant,

329 "[": prepare_predicate,

330 }

331

332_cache = {}

333

334class _SelectorContext:

335 parent_map = None

336 def __init__(self, root):

337 self.root = root

338

339# --------------------------------------------------------------------

340

341##

342# Generate all matching objects.

343

344def iterfind(elem, path, namespaces=None):

345 # compile selector pattern

346 if path[-1:] == "/":

347 path = path + "*" # implicit all (FIXME: keep this?)

348

349 cache_key = (path,)

350 if namespaces:

351 cache_key += tuple(sorted(namespaces.items()))

352

353 try:

354 selector = _cache[cache_key]

355 except KeyError:

356 if len(_cache) > 100:

357 _cache.clear()

358 if path[:1] == "/":

359 raise SyntaxError("cannot use absolute path on element")

360 next = iter(xpath_tokenizer(path, namespaces)).__next__

361 try:

362 token = next()

363 except StopIteration:

364 return

365 selector = []

366 while 1:

367 try:

368 selector.append(ops[token[0]](next, token))

369 except StopIteration:

370 raise SyntaxError("invalid path") from None

371 try:

372 token = next()

373 if token[0] == "/":

374 token = next()

375 except StopIteration:

376 break

377 _cache[cache_key] = selector

378 # execute selector pattern

379 result = [elem]

380 context = _SelectorContext(elem)

381 for select in selector:

382 result = select(context, result)

383 return result

384

385##

386# Find first matching object.

387

388def find(elem, path, namespaces=None):

389 return next(iterfind(elem, path, namespaces), None)

390

391##

392# Find all matching objects.

393

394def findall(elem, path, namespaces=None):

395 return list(iterfind(elem, path, namespaces))

396

397##

398# Find text for first matching object.

399

400def findtext(elem, path, default=None, namespaces=None):

401 try:

402 elem = next(iterfind(elem, path, namespaces))

403 return elem.text or ""

404 except StopIteration:

405 return default