Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/xml/etree/ElementTree.py: 17%

1020 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Lightweight XML support for Python. 

2 

3 XML is an inherently hierarchical data format, and the most natural way to 

4 represent it is with a tree. This module has two classes for this purpose: 

5 

6 1. ElementTree represents the whole XML document as a tree and 

7 

8 2. Element represents a single node in this tree. 

9 

10 Interactions with the whole document (reading and writing to/from files) are 

11 usually done on the ElementTree level. Interactions with a single XML element 

12 and its sub-elements are done on the Element level. 

13 

14 Element is a flexible container object designed to store hierarchical data 

15 structures in memory. It can be described as a cross between a list and a 

16 dictionary. Each Element has a number of properties associated with it: 

17 

18 'tag' - a string containing the element's name. 

19 

20 'attributes' - a Python dictionary storing the element's attributes. 

21 

22 'text' - a string containing the element's text content. 

23 

24 'tail' - an optional string containing text after the element's end tag. 

25 

26 And a number of child elements stored in a Python sequence. 

27 

28 To create an element instance, use the Element constructor, 

29 or the SubElement factory function. 

30 

31 You can also use the ElementTree class to wrap an element structure 

32 and convert it to and from XML. 

33 

34""" 

35 

36#--------------------------------------------------------------------- 

37# Licensed to PSF under a Contributor Agreement. 

38# See http://www.python.org/psf/license for licensing details. 

39# 

40# ElementTree 

41# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 

42# 

43# fredrik@pythonware.com 

44# http://www.pythonware.com 

45# -------------------------------------------------------------------- 

46# The ElementTree toolkit is 

47# 

48# Copyright (c) 1999-2008 by Fredrik Lundh 

49# 

50# By obtaining, using, and/or copying this software and/or its 

51# associated documentation, you agree that you have read, understood, 

52# and will comply with the following terms and conditions: 

53# 

54# Permission to use, copy, modify, and distribute this software and 

55# its associated documentation for any purpose and without fee is 

56# hereby granted, provided that the above copyright notice appears in 

57# all copies, and that both that copyright notice and this permission 

58# notice appear in supporting documentation, and that the name of 

59# Secret Labs AB or the author not be used in advertising or publicity 

60# pertaining to distribution of the software without specific, written 

61# prior permission. 

62# 

63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 

64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 

65# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 

66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 

67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 

68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 

70# OF THIS SOFTWARE. 

71# -------------------------------------------------------------------- 

72 

73__all__ = [ 

74 # public symbols 

75 "Comment", 

76 "dump", 

77 "Element", "ElementTree", 

78 "fromstring", "fromstringlist", 

79 "indent", "iselement", "iterparse", 

80 "parse", "ParseError", 

81 "PI", "ProcessingInstruction", 

82 "QName", 

83 "SubElement", 

84 "tostring", "tostringlist", 

85 "TreeBuilder", 

86 "VERSION", 

87 "XML", "XMLID", 

88 "XMLParser", "XMLPullParser", 

89 "register_namespace", 

90 "canonicalize", "C14NWriterTarget", 

91 ] 

92 

93VERSION = "1.3.0" 

94 

95import sys 

96import re 

97import warnings 

98import io 

99import collections 

100import collections.abc 

101import contextlib 

102 

103from . import ElementPath 

104 

105 

106class ParseError(SyntaxError): 

107 """An error when parsing an XML document. 

108 

109 In addition to its exception value, a ParseError contains 

110 two extra attributes: 

111 'code' - the specific exception code 

112 'position' - the line and column of the error 

113 

114 """ 

115 pass 

116 

117# -------------------------------------------------------------------- 

118 

119 

120def iselement(element): 

121 """Return True if *element* appears to be an Element.""" 

122 return hasattr(element, 'tag') 

123 

124 

125class Element: 

126 """An XML element. 

127 

128 This class is the reference implementation of the Element interface. 

129 

130 An element's length is its number of subelements. That means if you 

131 want to check if an element is truly empty, you should check BOTH 

132 its length AND its text attribute. 

133 

134 The element tag, attribute names, and attribute values can be either 

135 bytes or strings. 

136 

137 *tag* is the element name. *attrib* is an optional dictionary containing 

138 element attributes. *extra* are additional element attributes given as 

139 keyword arguments. 

140 

141 Example form: 

142 <tag attrib>text<child/>...</tag>tail 

143 

144 """ 

145 

146 tag = None 

147 """The element's name.""" 

148 

149 attrib = None 

150 """Dictionary of the element's attributes.""" 

151 

152 text = None 

153 """ 

154 Text before first subelement. This is either a string or the value None. 

155 Note that if there is no text, this attribute may be either 

156 None or the empty string, depending on the parser. 

157 

158 """ 

159 

160 tail = None 

161 """ 

162 Text after this element's end tag, but before the next sibling element's 

163 start tag. This is either a string or the value None. Note that if there 

164 was no text, this attribute may be either None or an empty string, 

165 depending on the parser. 

166 

167 """ 

168 

169 def __init__(self, tag, attrib={}, **extra): 

170 if not isinstance(attrib, dict): 

171 raise TypeError("attrib must be dict, not %s" % ( 

172 attrib.__class__.__name__,)) 

173 self.tag = tag 

174 self.attrib = {**attrib, **extra} 

175 self._children = [] 

176 

177 def __repr__(self): 

178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) 

179 

180 def makeelement(self, tag, attrib): 

181 """Create a new element with the same type. 

182 

183 *tag* is a string containing the element name. 

184 *attrib* is a dictionary containing the element attributes. 

185 

186 Do not call this method, use the SubElement factory function instead. 

187 

188 """ 

189 return self.__class__(tag, attrib) 

190 

191 def copy(self): 

192 """Return copy of current element. 

193 

194 This creates a shallow copy. Subelements will be shared with the 

195 original tree. 

196 

197 """ 

198 warnings.warn( 

199 "elem.copy() is deprecated. Use copy.copy(elem) instead.", 

200 DeprecationWarning 

201 ) 

202 return self.__copy__() 

203 

204 def __copy__(self): 

205 elem = self.makeelement(self.tag, self.attrib) 

206 elem.text = self.text 

207 elem.tail = self.tail 

208 elem[:] = self 

209 return elem 

210 

211 def __len__(self): 

212 return len(self._children) 

213 

214 def __bool__(self): 

215 warnings.warn( 

216 "The behavior of this method will change in future versions. " 

217 "Use specific 'len(elem)' or 'elem is not None' test instead.", 

218 FutureWarning, stacklevel=2 

219 ) 

220 return len(self._children) != 0 # emulate old behaviour, for now 

221 

222 def __getitem__(self, index): 

223 return self._children[index] 

224 

225 def __setitem__(self, index, element): 

226 if isinstance(index, slice): 

227 for elt in element: 

228 self._assert_is_element(elt) 

229 else: 

230 self._assert_is_element(element) 

231 self._children[index] = element 

232 

233 def __delitem__(self, index): 

234 del self._children[index] 

235 

236 def append(self, subelement): 

237 """Add *subelement* to the end of this element. 

238 

239 The new element will appear in document order after the last existing 

240 subelement (or directly after the text, if it's the first subelement), 

241 but before the end tag for this element. 

242 

243 """ 

244 self._assert_is_element(subelement) 

245 self._children.append(subelement) 

246 

247 def extend(self, elements): 

248 """Append subelements from a sequence. 

249 

250 *elements* is a sequence with zero or more elements. 

251 

252 """ 

253 for element in elements: 

254 self._assert_is_element(element) 

255 self._children.append(element) 

256 

257 def insert(self, index, subelement): 

258 """Insert *subelement* at position *index*.""" 

259 self._assert_is_element(subelement) 

260 self._children.insert(index, subelement) 

261 

262 def _assert_is_element(self, e): 

263 # Need to refer to the actual Python implementation, not the 

264 # shadowing C implementation. 

265 if not isinstance(e, _Element_Py): 

266 raise TypeError('expected an Element, not %s' % type(e).__name__) 

267 

268 def remove(self, subelement): 

269 """Remove matching subelement. 

270 

271 Unlike the find methods, this method compares elements based on 

272 identity, NOT ON tag value or contents. To remove subelements by 

273 other means, the easiest way is to use a list comprehension to 

274 select what elements to keep, and then use slice assignment to update 

275 the parent element. 

276 

277 ValueError is raised if a matching element could not be found. 

278 

279 """ 

280 # assert iselement(element) 

281 self._children.remove(subelement) 

282 

283 def find(self, path, namespaces=None): 

284 """Find first matching element by tag name or path. 

285 

286 *path* is a string having either an element tag or an XPath, 

287 *namespaces* is an optional mapping from namespace prefix to full name. 

288 

289 Return the first matching element, or None if no element was found. 

290 

291 """ 

292 return ElementPath.find(self, path, namespaces) 

293 

294 def findtext(self, path, default=None, namespaces=None): 

295 """Find text for first matching element by tag name or path. 

296 

297 *path* is a string having either an element tag or an XPath, 

298 *default* is the value to return if the element was not found, 

299 *namespaces* is an optional mapping from namespace prefix to full name. 

300 

301 Return text content of first matching element, or default value if 

302 none was found. Note that if an element is found having no text 

303 content, the empty string is returned. 

304 

305 """ 

306 return ElementPath.findtext(self, path, default, namespaces) 

307 

308 def findall(self, path, namespaces=None): 

309 """Find all matching subelements by tag name or path. 

310 

311 *path* is a string having either an element tag or an XPath, 

312 *namespaces* is an optional mapping from namespace prefix to full name. 

313 

314 Returns list containing all matching elements in document order. 

315 

316 """ 

317 return ElementPath.findall(self, path, namespaces) 

318 

319 def iterfind(self, path, namespaces=None): 

320 """Find all matching subelements by tag name or path. 

321 

322 *path* is a string having either an element tag or an XPath, 

323 *namespaces* is an optional mapping from namespace prefix to full name. 

324 

325 Return an iterable yielding all matching elements in document order. 

326 

327 """ 

328 return ElementPath.iterfind(self, path, namespaces) 

329 

330 def clear(self): 

331 """Reset element. 

332 

333 This function removes all subelements, clears all attributes, and sets 

334 the text and tail attributes to None. 

335 

336 """ 

337 self.attrib.clear() 

338 self._children = [] 

339 self.text = self.tail = None 

340 

341 def get(self, key, default=None): 

342 """Get element attribute. 

343 

344 Equivalent to attrib.get, but some implementations may handle this a 

345 bit more efficiently. *key* is what attribute to look for, and 

346 *default* is what to return if the attribute was not found. 

347 

348 Returns a string containing the attribute value, or the default if 

349 attribute was not found. 

350 

351 """ 

352 return self.attrib.get(key, default) 

353 

354 def set(self, key, value): 

355 """Set element attribute. 

356 

357 Equivalent to attrib[key] = value, but some implementations may handle 

358 this a bit more efficiently. *key* is what attribute to set, and 

359 *value* is the attribute value to set it to. 

360 

361 """ 

362 self.attrib[key] = value 

363 

364 def keys(self): 

365 """Get list of attribute names. 

366 

367 Names are returned in an arbitrary order, just like an ordinary 

368 Python dict. Equivalent to attrib.keys() 

369 

370 """ 

371 return self.attrib.keys() 

372 

373 def items(self): 

374 """Get element attributes as a sequence. 

375 

376 The attributes are returned in arbitrary order. Equivalent to 

377 attrib.items(). 

378 

379 Return a list of (name, value) tuples. 

380 

381 """ 

382 return self.attrib.items() 

383 

384 def iter(self, tag=None): 

385 """Create tree iterator. 

386 

387 The iterator loops over the element and all subelements in document 

388 order, returning all elements with a matching tag. 

389 

390 If the tree structure is modified during iteration, new or removed 

391 elements may or may not be included. To get a stable set, use the 

392 list() function on the iterator, and loop over the resulting list. 

393 

394 *tag* is what tags to look for (default is to return all elements) 

395 

396 Return an iterator containing all the matching elements. 

397 

398 """ 

399 if tag == "*": 

400 tag = None 

401 if tag is None or self.tag == tag: 

402 yield self 

403 for e in self._children: 

404 yield from e.iter(tag) 

405 

406 def itertext(self): 

407 """Create text iterator. 

408 

409 The iterator loops over the element and all subelements in document 

410 order, returning all inner text. 

411 

412 """ 

413 tag = self.tag 

414 if not isinstance(tag, str) and tag is not None: 

415 return 

416 t = self.text 

417 if t: 

418 yield t 

419 for e in self: 

420 yield from e.itertext() 

421 t = e.tail 

422 if t: 

423 yield t 

424 

425 

426def SubElement(parent, tag, attrib={}, **extra): 

427 """Subelement factory which creates an element instance, and appends it 

428 to an existing parent. 

429 

430 The element tag, attribute names, and attribute values can be either 

431 bytes or Unicode strings. 

432 

433 *parent* is the parent element, *tag* is the subelements name, *attrib* is 

434 an optional directory containing element attributes, *extra* are 

435 additional attributes given as keyword arguments. 

436 

437 """ 

438 attrib = {**attrib, **extra} 

439 element = parent.makeelement(tag, attrib) 

440 parent.append(element) 

441 return element 

442 

443 

444def Comment(text=None): 

445 """Comment element factory. 

446 

447 This function creates a special element which the standard serializer 

448 serializes as an XML comment. 

449 

450 *text* is a string containing the comment string. 

451 

452 """ 

453 element = Element(Comment) 

454 element.text = text 

455 return element 

456 

457 

458def ProcessingInstruction(target, text=None): 

459 """Processing Instruction element factory. 

460 

461 This function creates a special element which the standard serializer 

462 serializes as an XML comment. 

463 

464 *target* is a string containing the processing instruction, *text* is a 

465 string containing the processing instruction contents, if any. 

466 

467 """ 

468 element = Element(ProcessingInstruction) 

469 element.text = target 

470 if text: 

471 element.text = element.text + " " + text 

472 return element 

473 

474PI = ProcessingInstruction 

475 

476 

477class QName: 

478 """Qualified name wrapper. 

479 

480 This class can be used to wrap a QName attribute value in order to get 

481 proper namespace handing on output. 

482 

483 *text_or_uri* is a string containing the QName value either in the form 

484 {uri}local, or if the tag argument is given, the URI part of a QName. 

485 

486 *tag* is an optional argument which if given, will make the first 

487 argument (text_or_uri) be interpreted as a URI, and this argument (tag) 

488 be interpreted as a local name. 

489 

490 """ 

491 def __init__(self, text_or_uri, tag=None): 

492 if tag: 

493 text_or_uri = "{%s}%s" % (text_or_uri, tag) 

494 self.text = text_or_uri 

495 def __str__(self): 

496 return self.text 

497 def __repr__(self): 

498 return '<%s %r>' % (self.__class__.__name__, self.text) 

499 def __hash__(self): 

500 return hash(self.text) 

501 def __le__(self, other): 

502 if isinstance(other, QName): 

503 return self.text <= other.text 

504 return self.text <= other 

505 def __lt__(self, other): 

506 if isinstance(other, QName): 

507 return self.text < other.text 

508 return self.text < other 

509 def __ge__(self, other): 

510 if isinstance(other, QName): 

511 return self.text >= other.text 

512 return self.text >= other 

513 def __gt__(self, other): 

514 if isinstance(other, QName): 

515 return self.text > other.text 

516 return self.text > other 

517 def __eq__(self, other): 

518 if isinstance(other, QName): 

519 return self.text == other.text 

520 return self.text == other 

521 

522# -------------------------------------------------------------------- 

523 

524 

525class ElementTree: 

526 """An XML element hierarchy. 

527 

528 This class also provides support for serialization to and from 

529 standard XML. 

530 

531 *element* is an optional root element node, 

532 *file* is an optional file handle or file name of an XML file whose 

533 contents will be used to initialize the tree with. 

534 

535 """ 

536 def __init__(self, element=None, file=None): 

537 # assert element is None or iselement(element) 

538 self._root = element # first node 

539 if file: 

540 self.parse(file) 

541 

542 def getroot(self): 

543 """Return root element of this tree.""" 

544 return self._root 

545 

546 def _setroot(self, element): 

547 """Replace root element of this tree. 

548 

549 This will discard the current contents of the tree and replace it 

550 with the given element. Use with care! 

551 

552 """ 

553 # assert iselement(element) 

554 self._root = element 

555 

556 def parse(self, source, parser=None): 

557 """Load external XML document into element tree. 

558 

559 *source* is a file name or file object, *parser* is an optional parser 

560 instance that defaults to XMLParser. 

561 

562 ParseError is raised if the parser fails to parse the document. 

563 

564 Returns the root element of the given source document. 

565 

566 """ 

567 close_source = False 

568 if not hasattr(source, "read"): 

569 source = open(source, "rb") 

570 close_source = True 

571 try: 

572 if parser is None: 

573 # If no parser was specified, create a default XMLParser 

574 parser = XMLParser() 

575 if hasattr(parser, '_parse_whole'): 

576 # The default XMLParser, when it comes from an accelerator, 

577 # can define an internal _parse_whole API for efficiency. 

578 # It can be used to parse the whole source without feeding 

579 # it with chunks. 

580 self._root = parser._parse_whole(source) 

581 return self._root 

582 while True: 

583 data = source.read(65536) 

584 if not data: 

585 break 

586 parser.feed(data) 

587 self._root = parser.close() 

588 return self._root 

589 finally: 

590 if close_source: 

591 source.close() 

592 

593 def iter(self, tag=None): 

594 """Create and return tree iterator for the root element. 

595 

596 The iterator loops over all elements in this tree, in document order. 

597 

598 *tag* is a string with the tag name to iterate over 

599 (default is to return all elements). 

600 

601 """ 

602 # assert self._root is not None 

603 return self._root.iter(tag) 

604 

605 def find(self, path, namespaces=None): 

606 """Find first matching element by tag name or path. 

607 

608 Same as getroot().find(path), which is Element.find() 

609 

610 *path* is a string having either an element tag or an XPath, 

611 *namespaces* is an optional mapping from namespace prefix to full name. 

612 

613 Return the first matching element, or None if no element was found. 

614 

615 """ 

616 # assert self._root is not None 

617 if path[:1] == "/": 

618 path = "." + path 

619 warnings.warn( 

620 "This search is broken in 1.3 and earlier, and will be " 

621 "fixed in a future version. If you rely on the current " 

622 "behaviour, change it to %r" % path, 

623 FutureWarning, stacklevel=2 

624 ) 

625 return self._root.find(path, namespaces) 

626 

627 def findtext(self, path, default=None, namespaces=None): 

628 """Find first matching element by tag name or path. 

629 

630 Same as getroot().findtext(path), which is Element.findtext() 

631 

632 *path* is a string having either an element tag or an XPath, 

633 *namespaces* is an optional mapping from namespace prefix to full name. 

634 

635 Return the first matching element, or None if no element was found. 

636 

637 """ 

638 # assert self._root is not None 

639 if path[:1] == "/": 

640 path = "." + path 

641 warnings.warn( 

642 "This search is broken in 1.3 and earlier, and will be " 

643 "fixed in a future version. If you rely on the current " 

644 "behaviour, change it to %r" % path, 

645 FutureWarning, stacklevel=2 

646 ) 

647 return self._root.findtext(path, default, namespaces) 

648 

649 def findall(self, path, namespaces=None): 

650 """Find all matching subelements by tag name or path. 

651 

652 Same as getroot().findall(path), which is Element.findall(). 

653 

654 *path* is a string having either an element tag or an XPath, 

655 *namespaces* is an optional mapping from namespace prefix to full name. 

656 

657 Return list containing all matching elements in document order. 

658 

659 """ 

660 # assert self._root is not None 

661 if path[:1] == "/": 

662 path = "." + path 

663 warnings.warn( 

664 "This search is broken in 1.3 and earlier, and will be " 

665 "fixed in a future version. If you rely on the current " 

666 "behaviour, change it to %r" % path, 

667 FutureWarning, stacklevel=2 

668 ) 

669 return self._root.findall(path, namespaces) 

670 

671 def iterfind(self, path, namespaces=None): 

672 """Find all matching subelements by tag name or path. 

673 

674 Same as getroot().iterfind(path), which is element.iterfind() 

675 

676 *path* is a string having either an element tag or an XPath, 

677 *namespaces* is an optional mapping from namespace prefix to full name. 

678 

679 Return an iterable yielding all matching elements in document order. 

680 

681 """ 

682 # assert self._root is not None 

683 if path[:1] == "/": 

684 path = "." + path 

685 warnings.warn( 

686 "This search is broken in 1.3 and earlier, and will be " 

687 "fixed in a future version. If you rely on the current " 

688 "behaviour, change it to %r" % path, 

689 FutureWarning, stacklevel=2 

690 ) 

691 return self._root.iterfind(path, namespaces) 

692 

693 def write(self, file_or_filename, 

694 encoding=None, 

695 xml_declaration=None, 

696 default_namespace=None, 

697 method=None, *, 

698 short_empty_elements=True): 

699 """Write element tree to a file as XML. 

700 

701 Arguments: 

702 *file_or_filename* -- file name or a file object opened for writing 

703 

704 *encoding* -- the output encoding (default: US-ASCII) 

705 

706 *xml_declaration* -- bool indicating if an XML declaration should be 

707 added to the output. If None, an XML declaration 

708 is added if encoding IS NOT either of: 

709 US-ASCII, UTF-8, or Unicode 

710 

711 *default_namespace* -- sets the default XML namespace (for "xmlns") 

712 

713 *method* -- either "xml" (default), "html, "text", or "c14n" 

714 

715 *short_empty_elements* -- controls the formatting of elements 

716 that contain no content. If True (default) 

717 they are emitted as a single self-closed 

718 tag, otherwise they are emitted as a pair 

719 of start/end tags 

720 

721 """ 

722 if not method: 

723 method = "xml" 

724 elif method not in _serialize: 

725 raise ValueError("unknown method %r" % method) 

726 if not encoding: 

727 if method == "c14n": 

728 encoding = "utf-8" 

729 else: 

730 encoding = "us-ascii" 

731 enc_lower = encoding.lower() 

732 with _get_writer(file_or_filename, enc_lower) as write: 

733 if method == "xml" and (xml_declaration or 

734 (xml_declaration is None and 

735 enc_lower not in ("utf-8", "us-ascii", "unicode"))): 

736 declared_encoding = encoding 

737 if enc_lower == "unicode": 

738 # Retrieve the default encoding for the xml declaration 

739 import locale 

740 declared_encoding = locale.getpreferredencoding() 

741 write("<?xml version='1.0' encoding='%s'?>\n" % ( 

742 declared_encoding,)) 

743 if method == "text": 

744 _serialize_text(write, self._root) 

745 else: 

746 qnames, namespaces = _namespaces(self._root, default_namespace) 

747 serialize = _serialize[method] 

748 serialize(write, self._root, qnames, namespaces, 

749 short_empty_elements=short_empty_elements) 

750 

751 def write_c14n(self, file): 

752 # lxml.etree compatibility. use output method instead 

753 return self.write(file, method="c14n") 

754 

755# -------------------------------------------------------------------- 

756# serialization support 

757 

758@contextlib.contextmanager 

759def _get_writer(file_or_filename, encoding): 

760 # returns text write method and release all resources after using 

761 try: 

762 write = file_or_filename.write 

763 except AttributeError: 

764 # file_or_filename is a file name 

765 if encoding == "unicode": 

766 file = open(file_or_filename, "w") 

767 else: 

768 file = open(file_or_filename, "w", encoding=encoding, 

769 errors="xmlcharrefreplace") 

770 with file: 

771 yield file.write 

772 else: 

773 # file_or_filename is a file-like object 

774 # encoding determines if it is a text or binary writer 

775 if encoding == "unicode": 

776 # use a text writer as is 

777 yield write 

778 else: 

779 # wrap a binary writer with TextIOWrapper 

780 with contextlib.ExitStack() as stack: 

781 if isinstance(file_or_filename, io.BufferedIOBase): 

782 file = file_or_filename 

783 elif isinstance(file_or_filename, io.RawIOBase): 

784 file = io.BufferedWriter(file_or_filename) 

785 # Keep the original file open when the BufferedWriter is 

786 # destroyed 

787 stack.callback(file.detach) 

788 else: 

789 # This is to handle passed objects that aren't in the 

790 # IOBase hierarchy, but just have a write method 

791 file = io.BufferedIOBase() 

792 file.writable = lambda: True 

793 file.write = write 

794 try: 

795 # TextIOWrapper uses this methods to determine 

796 # if BOM (for UTF-16, etc) should be added 

797 file.seekable = file_or_filename.seekable 

798 file.tell = file_or_filename.tell 

799 except AttributeError: 

800 pass 

801 file = io.TextIOWrapper(file, 

802 encoding=encoding, 

803 errors="xmlcharrefreplace", 

804 newline="\n") 

805 # Keep the original file open when the TextIOWrapper is 

806 # destroyed 

807 stack.callback(file.detach) 

808 yield file.write 

809 

810def _namespaces(elem, default_namespace=None): 

811 # identify namespaces used in this tree 

812 

813 # maps qnames to *encoded* prefix:local names 

814 qnames = {None: None} 

815 

816 # maps uri:s to prefixes 

817 namespaces = {} 

818 if default_namespace: 

819 namespaces[default_namespace] = "" 

820 

821 def add_qname(qname): 

822 # calculate serialized qname representation 

823 try: 

824 if qname[:1] == "{": 

825 uri, tag = qname[1:].rsplit("}", 1) 

826 prefix = namespaces.get(uri) 

827 if prefix is None: 

828 prefix = _namespace_map.get(uri) 

829 if prefix is None: 

830 prefix = "ns%d" % len(namespaces) 

831 if prefix != "xml": 

832 namespaces[uri] = prefix 

833 if prefix: 

834 qnames[qname] = "%s:%s" % (prefix, tag) 

835 else: 

836 qnames[qname] = tag # default element 

837 else: 

838 if default_namespace: 

839 # FIXME: can this be handled in XML 1.0? 

840 raise ValueError( 

841 "cannot use non-qualified names with " 

842 "default_namespace option" 

843 ) 

844 qnames[qname] = qname 

845 except TypeError: 

846 _raise_serialization_error(qname) 

847 

848 # populate qname and namespaces table 

849 for elem in elem.iter(): 

850 tag = elem.tag 

851 if isinstance(tag, QName): 

852 if tag.text not in qnames: 

853 add_qname(tag.text) 

854 elif isinstance(tag, str): 

855 if tag not in qnames: 

856 add_qname(tag) 

857 elif tag is not None and tag is not Comment and tag is not PI: 

858 _raise_serialization_error(tag) 

859 for key, value in elem.items(): 

860 if isinstance(key, QName): 

861 key = key.text 

862 if key not in qnames: 

863 add_qname(key) 

864 if isinstance(value, QName) and value.text not in qnames: 

865 add_qname(value.text) 

866 text = elem.text 

867 if isinstance(text, QName) and text.text not in qnames: 

868 add_qname(text.text) 

869 return qnames, namespaces 

870 

871def _serialize_xml(write, elem, qnames, namespaces, 

872 short_empty_elements, **kwargs): 

873 tag = elem.tag 

874 text = elem.text 

875 if tag is Comment: 

876 write("<!--%s-->" % text) 

877 elif tag is ProcessingInstruction: 

878 write("<?%s?>" % text) 

879 else: 

880 tag = qnames[tag] 

881 if tag is None: 

882 if text: 

883 write(_escape_cdata(text)) 

884 for e in elem: 

885 _serialize_xml(write, e, qnames, None, 

886 short_empty_elements=short_empty_elements) 

887 else: 

888 write("<" + tag) 

889 items = list(elem.items()) 

890 if items or namespaces: 

891 if namespaces: 

892 for v, k in sorted(namespaces.items(), 

893 key=lambda x: x[1]): # sort on prefix 

894 if k: 

895 k = ":" + k 

896 write(" xmlns%s=\"%s\"" % ( 

897 k, 

898 _escape_attrib(v) 

899 )) 

900 for k, v in items: 

901 if isinstance(k, QName): 

902 k = k.text 

903 if isinstance(v, QName): 

904 v = qnames[v.text] 

905 else: 

906 v = _escape_attrib(v) 

907 write(" %s=\"%s\"" % (qnames[k], v)) 

908 if text or len(elem) or not short_empty_elements: 

909 write(">") 

910 if text: 

911 write(_escape_cdata(text)) 

912 for e in elem: 

913 _serialize_xml(write, e, qnames, None, 

914 short_empty_elements=short_empty_elements) 

915 write("</" + tag + ">") 

916 else: 

917 write(" />") 

918 if elem.tail: 

919 write(_escape_cdata(elem.tail)) 

920 

921HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 

922 "img", "input", "isindex", "link", "meta", "param") 

923 

924try: 

925 HTML_EMPTY = set(HTML_EMPTY) 

926except NameError: 

927 pass 

928 

929def _serialize_html(write, elem, qnames, namespaces, **kwargs): 

930 tag = elem.tag 

931 text = elem.text 

932 if tag is Comment: 

933 write("<!--%s-->" % _escape_cdata(text)) 

934 elif tag is ProcessingInstruction: 

935 write("<?%s?>" % _escape_cdata(text)) 

936 else: 

937 tag = qnames[tag] 

938 if tag is None: 

939 if text: 

940 write(_escape_cdata(text)) 

941 for e in elem: 

942 _serialize_html(write, e, qnames, None) 

943 else: 

944 write("<" + tag) 

945 items = list(elem.items()) 

946 if items or namespaces: 

947 if namespaces: 

948 for v, k in sorted(namespaces.items(), 

949 key=lambda x: x[1]): # sort on prefix 

950 if k: 

951 k = ":" + k 

952 write(" xmlns%s=\"%s\"" % ( 

953 k, 

954 _escape_attrib(v) 

955 )) 

956 for k, v in items: 

957 if isinstance(k, QName): 

958 k = k.text 

959 if isinstance(v, QName): 

960 v = qnames[v.text] 

961 else: 

962 v = _escape_attrib_html(v) 

963 # FIXME: handle boolean attributes 

964 write(" %s=\"%s\"" % (qnames[k], v)) 

965 write(">") 

966 ltag = tag.lower() 

967 if text: 

968 if ltag == "script" or ltag == "style": 

969 write(text) 

970 else: 

971 write(_escape_cdata(text)) 

972 for e in elem: 

973 _serialize_html(write, e, qnames, None) 

974 if ltag not in HTML_EMPTY: 

975 write("</" + tag + ">") 

976 if elem.tail: 

977 write(_escape_cdata(elem.tail)) 

978 

979def _serialize_text(write, elem): 

980 for part in elem.itertext(): 

981 write(part) 

982 if elem.tail: 

983 write(elem.tail) 

984 

985_serialize = { 

986 "xml": _serialize_xml, 

987 "html": _serialize_html, 

988 "text": _serialize_text, 

989# this optional method is imported at the end of the module 

990# "c14n": _serialize_c14n, 

991} 

992 

993 

994def register_namespace(prefix, uri): 

995 """Register a namespace prefix. 

996 

997 The registry is global, and any existing mapping for either the 

998 given prefix or the namespace URI will be removed. 

999 

1000 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and 

1001 attributes in this namespace will be serialized with prefix if possible. 

1002 

1003 ValueError is raised if prefix is reserved or is invalid. 

1004 

1005 """ 

1006 if re.match(r"ns\d+$", prefix): 

1007 raise ValueError("Prefix format reserved for internal use") 

1008 for k, v in list(_namespace_map.items()): 

1009 if k == uri or v == prefix: 

1010 del _namespace_map[k] 

1011 _namespace_map[uri] = prefix 

1012 

1013_namespace_map = { 

1014 # "well-known" namespace prefixes 

1015 "http://www.w3.org/XML/1998/namespace": "xml", 

1016 "http://www.w3.org/1999/xhtml": "html", 

1017 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 

1018 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 

1019 # xml schema 

1020 "http://www.w3.org/2001/XMLSchema": "xs", 

1021 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 

1022 # dublin core 

1023 "http://purl.org/dc/elements/1.1/": "dc", 

1024} 

1025# For tests and troubleshooting 

1026register_namespace._namespace_map = _namespace_map 

1027 

1028def _raise_serialization_error(text): 

1029 raise TypeError( 

1030 "cannot serialize %r (type %s)" % (text, type(text).__name__) 

1031 ) 

1032 

1033def _escape_cdata(text): 

1034 # escape character data 

1035 try: 

1036 # it's worth avoiding do-nothing calls for strings that are 

1037 # shorter than 500 characters, or so. assume that's, by far, 

1038 # the most common case in most applications. 

1039 if "&" in text: 

1040 text = text.replace("&", "&amp;") 

1041 if "<" in text: 

1042 text = text.replace("<", "&lt;") 

1043 if ">" in text: 

1044 text = text.replace(">", "&gt;") 

1045 return text 

1046 except (TypeError, AttributeError): 

1047 _raise_serialization_error(text) 

1048 

1049def _escape_attrib(text): 

1050 # escape attribute value 

1051 try: 

1052 if "&" in text: 

1053 text = text.replace("&", "&amp;") 

1054 if "<" in text: 

1055 text = text.replace("<", "&lt;") 

1056 if ">" in text: 

1057 text = text.replace(">", "&gt;") 

1058 if "\"" in text: 

1059 text = text.replace("\"", "&quot;") 

1060 # Although section 2.11 of the XML specification states that CR or 

1061 # CR LN should be replaced with just LN, it applies only to EOLNs 

1062 # which take part of organizing file into lines. Within attributes, 

1063 # we are replacing these with entity numbers, so they do not count. 

1064 # http://www.w3.org/TR/REC-xml/#sec-line-ends 

1065 # The current solution, contained in following six lines, was 

1066 # discussed in issue 17582 and 39011. 

1067 if "\r" in text: 

1068 text = text.replace("\r", "&#13;") 

1069 if "\n" in text: 

1070 text = text.replace("\n", "&#10;") 

1071 if "\t" in text: 

1072 text = text.replace("\t", "&#09;") 

1073 return text 

1074 except (TypeError, AttributeError): 

1075 _raise_serialization_error(text) 

1076 

1077def _escape_attrib_html(text): 

1078 # escape attribute value 

1079 try: 

1080 if "&" in text: 

1081 text = text.replace("&", "&amp;") 

1082 if ">" in text: 

1083 text = text.replace(">", "&gt;") 

1084 if "\"" in text: 

1085 text = text.replace("\"", "&quot;") 

1086 return text 

1087 except (TypeError, AttributeError): 

1088 _raise_serialization_error(text) 

1089 

1090# -------------------------------------------------------------------- 

1091 

1092def tostring(element, encoding=None, method=None, *, 

1093 xml_declaration=None, default_namespace=None, 

1094 short_empty_elements=True): 

1095 """Generate string representation of XML element. 

1096 

1097 All subelements are included. If encoding is "unicode", a string 

1098 is returned. Otherwise a bytestring is returned. 

1099 

1100 *element* is an Element instance, *encoding* is an optional output 

1101 encoding defaulting to US-ASCII, *method* is an optional output which can 

1102 be one of "xml" (default), "html", "text" or "c14n", *default_namespace* 

1103 sets the default XML namespace (for "xmlns"). 

1104 

1105 Returns an (optionally) encoded string containing the XML data. 

1106 

1107 """ 

1108 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() 

1109 ElementTree(element).write(stream, encoding, 

1110 xml_declaration=xml_declaration, 

1111 default_namespace=default_namespace, 

1112 method=method, 

1113 short_empty_elements=short_empty_elements) 

1114 return stream.getvalue() 

1115 

1116class _ListDataStream(io.BufferedIOBase): 

1117 """An auxiliary stream accumulating into a list reference.""" 

1118 def __init__(self, lst): 

1119 self.lst = lst 

1120 

1121 def writable(self): 

1122 return True 

1123 

1124 def seekable(self): 

1125 return True 

1126 

1127 def write(self, b): 

1128 self.lst.append(b) 

1129 

1130 def tell(self): 

1131 return len(self.lst) 

1132 

1133def tostringlist(element, encoding=None, method=None, *, 

1134 xml_declaration=None, default_namespace=None, 

1135 short_empty_elements=True): 

1136 lst = [] 

1137 stream = _ListDataStream(lst) 

1138 ElementTree(element).write(stream, encoding, 

1139 xml_declaration=xml_declaration, 

1140 default_namespace=default_namespace, 

1141 method=method, 

1142 short_empty_elements=short_empty_elements) 

1143 return lst 

1144 

1145 

1146def dump(elem): 

1147 """Write element tree or element structure to sys.stdout. 

1148 

1149 This function should be used for debugging only. 

1150 

1151 *elem* is either an ElementTree, or a single Element. The exact output 

1152 format is implementation dependent. In this version, it's written as an 

1153 ordinary XML file. 

1154 

1155 """ 

1156 # debugging 

1157 if not isinstance(elem, ElementTree): 

1158 elem = ElementTree(elem) 

1159 elem.write(sys.stdout, encoding="unicode") 

1160 tail = elem.getroot().tail 

1161 if not tail or tail[-1] != "\n": 

1162 sys.stdout.write("\n") 

1163 

1164 

1165def indent(tree, space=" ", level=0): 

1166 """Indent an XML document by inserting newlines and indentation space 

1167 after elements. 

1168 

1169 *tree* is the ElementTree or Element to modify. The (root) element 

1170 itself will not be changed, but the tail text of all elements in its 

1171 subtree will be adapted. 

1172 

1173 *space* is the whitespace to insert for each indentation level, two 

1174 space characters by default. 

1175 

1176 *level* is the initial indentation level. Setting this to a higher 

1177 value than 0 can be used for indenting subtrees that are more deeply 

1178 nested inside of a document. 

1179 """ 

1180 if isinstance(tree, ElementTree): 

1181 tree = tree.getroot() 

1182 if level < 0: 

1183 raise ValueError(f"Initial indentation level must be >= 0, got {level}") 

1184 if not len(tree): 

1185 return 

1186 

1187 # Reduce the memory consumption by reusing indentation strings. 

1188 indentations = ["\n" + level * space] 

1189 

1190 def _indent_children(elem, level): 

1191 # Start a new indentation level for the first child. 

1192 child_level = level + 1 

1193 try: 

1194 child_indentation = indentations[child_level] 

1195 except IndexError: 

1196 child_indentation = indentations[level] + space 

1197 indentations.append(child_indentation) 

1198 

1199 if not elem.text or not elem.text.strip(): 

1200 elem.text = child_indentation 

1201 

1202 for child in elem: 

1203 if len(child): 

1204 _indent_children(child, child_level) 

1205 if not child.tail or not child.tail.strip(): 

1206 child.tail = child_indentation 

1207 

1208 # Dedent after the last child by overwriting the previous indentation. 

1209 if not child.tail.strip(): 

1210 child.tail = indentations[level] 

1211 

1212 _indent_children(tree, 0) 

1213 

1214 

1215# -------------------------------------------------------------------- 

1216# parsing 

1217 

1218 

1219def parse(source, parser=None): 

1220 """Parse XML document into element tree. 

1221 

1222 *source* is a filename or file object containing XML data, 

1223 *parser* is an optional parser instance defaulting to XMLParser. 

1224 

1225 Return an ElementTree instance. 

1226 

1227 """ 

1228 tree = ElementTree() 

1229 tree.parse(source, parser) 

1230 return tree 

1231 

1232 

1233def iterparse(source, events=None, parser=None): 

1234 """Incrementally parse XML document into ElementTree. 

1235 

1236 This class also reports what's going on to the user based on the 

1237 *events* it is initialized with. The supported events are the strings 

1238 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get 

1239 detailed namespace information). If *events* is omitted, only 

1240 "end" events are reported. 

1241 

1242 *source* is a filename or file object containing XML data, *events* is 

1243 a list of events to report back, *parser* is an optional parser instance. 

1244 

1245 Returns an iterator providing (event, elem) pairs. 

1246 

1247 """ 

1248 # Use the internal, undocumented _parser argument for now; When the 

1249 # parser argument of iterparse is removed, this can be killed. 

1250 pullparser = XMLPullParser(events=events, _parser=parser) 

1251 def iterator(): 

1252 try: 

1253 while True: 

1254 yield from pullparser.read_events() 

1255 # load event buffer 

1256 data = source.read(16 * 1024) 

1257 if not data: 

1258 break 

1259 pullparser.feed(data) 

1260 root = pullparser._close_and_return_root() 

1261 yield from pullparser.read_events() 

1262 it.root = root 

1263 finally: 

1264 if close_source: 

1265 source.close() 

1266 

1267 class IterParseIterator(collections.abc.Iterator): 

1268 __next__ = iterator().__next__ 

1269 it = IterParseIterator() 

1270 it.root = None 

1271 del iterator, IterParseIterator 

1272 

1273 close_source = False 

1274 if not hasattr(source, "read"): 

1275 source = open(source, "rb") 

1276 close_source = True 

1277 

1278 return it 

1279 

1280 

1281class XMLPullParser: 

1282 

1283 def __init__(self, events=None, *, _parser=None): 

1284 # The _parser argument is for internal use only and must not be relied 

1285 # upon in user code. It will be removed in a future release. 

1286 # See http://bugs.python.org/issue17741 for more details. 

1287 

1288 self._events_queue = collections.deque() 

1289 self._parser = _parser or XMLParser(target=TreeBuilder()) 

1290 # wire up the parser for event reporting 

1291 if events is None: 

1292 events = ("end",) 

1293 self._parser._setevents(self._events_queue, events) 

1294 

1295 def feed(self, data): 

1296 """Feed encoded data to parser.""" 

1297 if self._parser is None: 

1298 raise ValueError("feed() called after end of stream") 

1299 if data: 

1300 try: 

1301 self._parser.feed(data) 

1302 except SyntaxError as exc: 

1303 self._events_queue.append(exc) 

1304 

1305 def _close_and_return_root(self): 

1306 # iterparse needs this to set its root attribute properly :( 

1307 root = self._parser.close() 

1308 self._parser = None 

1309 return root 

1310 

1311 def close(self): 

1312 """Finish feeding data to parser. 

1313 

1314 Unlike XMLParser, does not return the root element. Use 

1315 read_events() to consume elements from XMLPullParser. 

1316 """ 

1317 self._close_and_return_root() 

1318 

1319 def read_events(self): 

1320 """Return an iterator over currently available (event, elem) pairs. 

1321 

1322 Events are consumed from the internal event queue as they are 

1323 retrieved from the iterator. 

1324 """ 

1325 events = self._events_queue 

1326 while events: 

1327 event = events.popleft() 

1328 if isinstance(event, Exception): 

1329 raise event 

1330 else: 

1331 yield event 

1332 

1333 

1334def XML(text, parser=None): 

1335 """Parse XML document from string constant. 

1336 

1337 This function can be used to embed "XML Literals" in Python code. 

1338 

1339 *text* is a string containing XML data, *parser* is an 

1340 optional parser instance, defaulting to the standard XMLParser. 

1341 

1342 Returns an Element instance. 

1343 

1344 """ 

1345 if not parser: 

1346 parser = XMLParser(target=TreeBuilder()) 

1347 parser.feed(text) 

1348 return parser.close() 

1349 

1350 

1351def XMLID(text, parser=None): 

1352 """Parse XML document from string constant for its IDs. 

1353 

1354 *text* is a string containing XML data, *parser* is an 

1355 optional parser instance, defaulting to the standard XMLParser. 

1356 

1357 Returns an (Element, dict) tuple, in which the 

1358 dict maps element id:s to elements. 

1359 

1360 """ 

1361 if not parser: 

1362 parser = XMLParser(target=TreeBuilder()) 

1363 parser.feed(text) 

1364 tree = parser.close() 

1365 ids = {} 

1366 for elem in tree.iter(): 

1367 id = elem.get("id") 

1368 if id: 

1369 ids[id] = elem 

1370 return tree, ids 

1371 

1372# Parse XML document from string constant. Alias for XML(). 

1373fromstring = XML 

1374 

1375def fromstringlist(sequence, parser=None): 

1376 """Parse XML document from sequence of string fragments. 

1377 

1378 *sequence* is a list of other sequence, *parser* is an optional parser 

1379 instance, defaulting to the standard XMLParser. 

1380 

1381 Returns an Element instance. 

1382 

1383 """ 

1384 if not parser: 

1385 parser = XMLParser(target=TreeBuilder()) 

1386 for text in sequence: 

1387 parser.feed(text) 

1388 return parser.close() 

1389 

1390# -------------------------------------------------------------------- 

1391 

1392 

1393class TreeBuilder: 

1394 """Generic element structure builder. 

1395 

1396 This builder converts a sequence of start, data, and end method 

1397 calls to a well-formed element structure. 

1398 

1399 You can use this class to build an element structure using a custom XML 

1400 parser, or a parser for some other XML-like format. 

1401 

1402 *element_factory* is an optional element factory which is called 

1403 to create new Element instances, as necessary. 

1404 

1405 *comment_factory* is a factory to create comments to be used instead of 

1406 the standard factory. If *insert_comments* is false (the default), 

1407 comments will not be inserted into the tree. 

1408 

1409 *pi_factory* is a factory to create processing instructions to be used 

1410 instead of the standard factory. If *insert_pis* is false (the default), 

1411 processing instructions will not be inserted into the tree. 

1412 """ 

1413 def __init__(self, element_factory=None, *, 

1414 comment_factory=None, pi_factory=None, 

1415 insert_comments=False, insert_pis=False): 

1416 self._data = [] # data collector 

1417 self._elem = [] # element stack 

1418 self._last = None # last element 

1419 self._root = None # root element 

1420 self._tail = None # true if we're after an end tag 

1421 if comment_factory is None: 

1422 comment_factory = Comment 

1423 self._comment_factory = comment_factory 

1424 self.insert_comments = insert_comments 

1425 if pi_factory is None: 

1426 pi_factory = ProcessingInstruction 

1427 self._pi_factory = pi_factory 

1428 self.insert_pis = insert_pis 

1429 if element_factory is None: 

1430 element_factory = Element 

1431 self._factory = element_factory 

1432 

1433 def close(self): 

1434 """Flush builder buffers and return toplevel document Element.""" 

1435 assert len(self._elem) == 0, "missing end tags" 

1436 assert self._root is not None, "missing toplevel element" 

1437 return self._root 

1438 

1439 def _flush(self): 

1440 if self._data: 

1441 if self._last is not None: 

1442 text = "".join(self._data) 

1443 if self._tail: 

1444 assert self._last.tail is None, "internal error (tail)" 

1445 self._last.tail = text 

1446 else: 

1447 assert self._last.text is None, "internal error (text)" 

1448 self._last.text = text 

1449 self._data = [] 

1450 

1451 def data(self, data): 

1452 """Add text to current element.""" 

1453 self._data.append(data) 

1454 

1455 def start(self, tag, attrs): 

1456 """Open new element and return it. 

1457 

1458 *tag* is the element name, *attrs* is a dict containing element 

1459 attributes. 

1460 

1461 """ 

1462 self._flush() 

1463 self._last = elem = self._factory(tag, attrs) 

1464 if self._elem: 

1465 self._elem[-1].append(elem) 

1466 elif self._root is None: 

1467 self._root = elem 

1468 self._elem.append(elem) 

1469 self._tail = 0 

1470 return elem 

1471 

1472 def end(self, tag): 

1473 """Close and return current Element. 

1474 

1475 *tag* is the element name. 

1476 

1477 """ 

1478 self._flush() 

1479 self._last = self._elem.pop() 

1480 assert self._last.tag == tag,\ 

1481 "end tag mismatch (expected %s, got %s)" % ( 

1482 self._last.tag, tag) 

1483 self._tail = 1 

1484 return self._last 

1485 

1486 def comment(self, text): 

1487 """Create a comment using the comment_factory. 

1488 

1489 *text* is the text of the comment. 

1490 """ 

1491 return self._handle_single( 

1492 self._comment_factory, self.insert_comments, text) 

1493 

1494 def pi(self, target, text=None): 

1495 """Create a processing instruction using the pi_factory. 

1496 

1497 *target* is the target name of the processing instruction. 

1498 *text* is the data of the processing instruction, or ''. 

1499 """ 

1500 return self._handle_single( 

1501 self._pi_factory, self.insert_pis, target, text) 

1502 

1503 def _handle_single(self, factory, insert, *args): 

1504 elem = factory(*args) 

1505 if insert: 

1506 self._flush() 

1507 self._last = elem 

1508 if self._elem: 

1509 self._elem[-1].append(elem) 

1510 self._tail = 1 

1511 return elem 

1512 

1513 

1514# also see ElementTree and TreeBuilder 

1515class XMLParser: 

1516 """Element structure builder for XML source data based on the expat parser. 

1517 

1518 *target* is an optional target object which defaults to an instance of the 

1519 standard TreeBuilder class, *encoding* is an optional encoding string 

1520 which if given, overrides the encoding specified in the XML file: 

1521 http://www.iana.org/assignments/character-sets 

1522 

1523 """ 

1524 

1525 def __init__(self, *, target=None, encoding=None): 

1526 try: 

1527 from xml.parsers import expat 

1528 except ImportError: 

1529 try: 

1530 import pyexpat as expat 

1531 except ImportError: 

1532 raise ImportError( 

1533 "No module named expat; use SimpleXMLTreeBuilder instead" 

1534 ) 

1535 parser = expat.ParserCreate(encoding, "}") 

1536 if target is None: 

1537 target = TreeBuilder() 

1538 # underscored names are provided for compatibility only 

1539 self.parser = self._parser = parser 

1540 self.target = self._target = target 

1541 self._error = expat.error 

1542 self._names = {} # name memo cache 

1543 # main callbacks 

1544 parser.DefaultHandlerExpand = self._default 

1545 if hasattr(target, 'start'): 

1546 parser.StartElementHandler = self._start 

1547 if hasattr(target, 'end'): 

1548 parser.EndElementHandler = self._end 

1549 if hasattr(target, 'start_ns'): 

1550 parser.StartNamespaceDeclHandler = self._start_ns 

1551 if hasattr(target, 'end_ns'): 

1552 parser.EndNamespaceDeclHandler = self._end_ns 

1553 if hasattr(target, 'data'): 

1554 parser.CharacterDataHandler = target.data 

1555 # miscellaneous callbacks 

1556 if hasattr(target, 'comment'): 

1557 parser.CommentHandler = target.comment 

1558 if hasattr(target, 'pi'): 

1559 parser.ProcessingInstructionHandler = target.pi 

1560 # Configure pyexpat: buffering, new-style attribute handling. 

1561 parser.buffer_text = 1 

1562 parser.ordered_attributes = 1 

1563 parser.specified_attributes = 1 

1564 self._doctype = None 

1565 self.entity = {} 

1566 try: 

1567 self.version = "Expat %d.%d.%d" % expat.version_info 

1568 except AttributeError: 

1569 pass # unknown 

1570 

1571 def _setevents(self, events_queue, events_to_report): 

1572 # Internal API for XMLPullParser 

1573 # events_to_report: a list of events to report during parsing (same as 

1574 # the *events* of XMLPullParser's constructor. 

1575 # events_queue: a list of actual parsing events that will be populated 

1576 # by the underlying parser. 

1577 # 

1578 parser = self._parser 

1579 append = events_queue.append 

1580 for event_name in events_to_report: 

1581 if event_name == "start": 

1582 parser.ordered_attributes = 1 

1583 parser.specified_attributes = 1 

1584 def handler(tag, attrib_in, event=event_name, append=append, 

1585 start=self._start): 

1586 append((event, start(tag, attrib_in))) 

1587 parser.StartElementHandler = handler 

1588 elif event_name == "end": 

1589 def handler(tag, event=event_name, append=append, 

1590 end=self._end): 

1591 append((event, end(tag))) 

1592 parser.EndElementHandler = handler 

1593 elif event_name == "start-ns": 

1594 # TreeBuilder does not implement .start_ns() 

1595 if hasattr(self.target, "start_ns"): 

1596 def handler(prefix, uri, event=event_name, append=append, 

1597 start_ns=self._start_ns): 

1598 append((event, start_ns(prefix, uri))) 

1599 else: 

1600 def handler(prefix, uri, event=event_name, append=append): 

1601 append((event, (prefix or '', uri or ''))) 

1602 parser.StartNamespaceDeclHandler = handler 

1603 elif event_name == "end-ns": 

1604 # TreeBuilder does not implement .end_ns() 

1605 if hasattr(self.target, "end_ns"): 

1606 def handler(prefix, event=event_name, append=append, 

1607 end_ns=self._end_ns): 

1608 append((event, end_ns(prefix))) 

1609 else: 

1610 def handler(prefix, event=event_name, append=append): 

1611 append((event, None)) 

1612 parser.EndNamespaceDeclHandler = handler 

1613 elif event_name == 'comment': 

1614 def handler(text, event=event_name, append=append, self=self): 

1615 append((event, self.target.comment(text))) 

1616 parser.CommentHandler = handler 

1617 elif event_name == 'pi': 

1618 def handler(pi_target, data, event=event_name, append=append, 

1619 self=self): 

1620 append((event, self.target.pi(pi_target, data))) 

1621 parser.ProcessingInstructionHandler = handler 

1622 else: 

1623 raise ValueError("unknown event %r" % event_name) 

1624 

1625 def _raiseerror(self, value): 

1626 err = ParseError(value) 

1627 err.code = value.code 

1628 err.position = value.lineno, value.offset 

1629 raise err 

1630 

1631 def _fixname(self, key): 

1632 # expand qname, and convert name string to ascii, if possible 

1633 try: 

1634 name = self._names[key] 

1635 except KeyError: 

1636 name = key 

1637 if "}" in name: 

1638 name = "{" + name 

1639 self._names[key] = name 

1640 return name 

1641 

1642 def _start_ns(self, prefix, uri): 

1643 return self.target.start_ns(prefix or '', uri or '') 

1644 

1645 def _end_ns(self, prefix): 

1646 return self.target.end_ns(prefix or '') 

1647 

1648 def _start(self, tag, attr_list): 

1649 # Handler for expat's StartElementHandler. Since ordered_attributes 

1650 # is set, the attributes are reported as a list of alternating 

1651 # attribute name,value. 

1652 fixname = self._fixname 

1653 tag = fixname(tag) 

1654 attrib = {} 

1655 if attr_list: 

1656 for i in range(0, len(attr_list), 2): 

1657 attrib[fixname(attr_list[i])] = attr_list[i+1] 

1658 return self.target.start(tag, attrib) 

1659 

1660 def _end(self, tag): 

1661 return self.target.end(self._fixname(tag)) 

1662 

1663 def _default(self, text): 

1664 prefix = text[:1] 

1665 if prefix == "&": 

1666 # deal with undefined entities 

1667 try: 

1668 data_handler = self.target.data 

1669 except AttributeError: 

1670 return 

1671 try: 

1672 data_handler(self.entity[text[1:-1]]) 

1673 except KeyError: 

1674 from xml.parsers import expat 

1675 err = expat.error( 

1676 "undefined entity %s: line %d, column %d" % 

1677 (text, self.parser.ErrorLineNumber, 

1678 self.parser.ErrorColumnNumber) 

1679 ) 

1680 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 

1681 err.lineno = self.parser.ErrorLineNumber 

1682 err.offset = self.parser.ErrorColumnNumber 

1683 raise err 

1684 elif prefix == "<" and text[:9] == "<!DOCTYPE": 

1685 self._doctype = [] # inside a doctype declaration 

1686 elif self._doctype is not None: 

1687 # parse doctype contents 

1688 if prefix == ">": 

1689 self._doctype = None 

1690 return 

1691 text = text.strip() 

1692 if not text: 

1693 return 

1694 self._doctype.append(text) 

1695 n = len(self._doctype) 

1696 if n > 2: 

1697 type = self._doctype[1] 

1698 if type == "PUBLIC" and n == 4: 

1699 name, type, pubid, system = self._doctype 

1700 if pubid: 

1701 pubid = pubid[1:-1] 

1702 elif type == "SYSTEM" and n == 3: 

1703 name, type, system = self._doctype 

1704 pubid = None 

1705 else: 

1706 return 

1707 if hasattr(self.target, "doctype"): 

1708 self.target.doctype(name, pubid, system[1:-1]) 

1709 elif hasattr(self, "doctype"): 

1710 warnings.warn( 

1711 "The doctype() method of XMLParser is ignored. " 

1712 "Define doctype() method on the TreeBuilder target.", 

1713 RuntimeWarning) 

1714 

1715 self._doctype = None 

1716 

1717 def feed(self, data): 

1718 """Feed encoded data to parser.""" 

1719 try: 

1720 self.parser.Parse(data, False) 

1721 except self._error as v: 

1722 self._raiseerror(v) 

1723 

1724 def close(self): 

1725 """Finish feeding data to parser and return element structure.""" 

1726 try: 

1727 self.parser.Parse(b"", True) # end of data 

1728 except self._error as v: 

1729 self._raiseerror(v) 

1730 try: 

1731 close_handler = self.target.close 

1732 except AttributeError: 

1733 pass 

1734 else: 

1735 return close_handler() 

1736 finally: 

1737 # get rid of circular references 

1738 del self.parser, self._parser 

1739 del self.target, self._target 

1740 

1741 

1742# -------------------------------------------------------------------- 

1743# C14N 2.0 

1744 

1745def canonicalize(xml_data=None, *, out=None, from_file=None, **options): 

1746 """Convert XML to its C14N 2.0 serialised form. 

1747 

1748 If *out* is provided, it must be a file or file-like object that receives 

1749 the serialised canonical XML output (text, not bytes) through its ``.write()`` 

1750 method. To write to a file, open it in text mode with encoding "utf-8". 

1751 If *out* is not provided, this function returns the output as text string. 

1752 

1753 Either *xml_data* (an XML string) or *from_file* (a file path or 

1754 file-like object) must be provided as input. 

1755 

1756 The configuration options are the same as for the ``C14NWriterTarget``. 

1757 """ 

1758 if xml_data is None and from_file is None: 

1759 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input") 

1760 sio = None 

1761 if out is None: 

1762 sio = out = io.StringIO() 

1763 

1764 parser = XMLParser(target=C14NWriterTarget(out.write, **options)) 

1765 

1766 if xml_data is not None: 

1767 parser.feed(xml_data) 

1768 parser.close() 

1769 elif from_file is not None: 

1770 parse(from_file, parser=parser) 

1771 

1772 return sio.getvalue() if sio is not None else None 

1773 

1774 

1775_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match 

1776 

1777 

1778class C14NWriterTarget: 

1779 """ 

1780 Canonicalization writer target for the XMLParser. 

1781 

1782 Serialises parse events to XML C14N 2.0. 

1783 

1784 The *write* function is used for writing out the resulting data stream 

1785 as text (not bytes). To write to a file, open it in text mode with encoding 

1786 "utf-8" and pass its ``.write`` method. 

1787 

1788 Configuration options: 

1789 

1790 - *with_comments*: set to true to include comments 

1791 - *strip_text*: set to true to strip whitespace before and after text content 

1792 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" 

1793 - *qname_aware_tags*: a set of qname aware tag names in which prefixes 

1794 should be replaced in text content 

1795 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes 

1796 should be replaced in text content 

1797 - *exclude_attrs*: a set of attribute names that should not be serialised 

1798 - *exclude_tags*: a set of tag names that should not be serialised 

1799 """ 

1800 def __init__(self, write, *, 

1801 with_comments=False, strip_text=False, rewrite_prefixes=False, 

1802 qname_aware_tags=None, qname_aware_attrs=None, 

1803 exclude_attrs=None, exclude_tags=None): 

1804 self._write = write 

1805 self._data = [] 

1806 self._with_comments = with_comments 

1807 self._strip_text = strip_text 

1808 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None 

1809 self._exclude_tags = set(exclude_tags) if exclude_tags else None 

1810 

1811 self._rewrite_prefixes = rewrite_prefixes 

1812 if qname_aware_tags: 

1813 self._qname_aware_tags = set(qname_aware_tags) 

1814 else: 

1815 self._qname_aware_tags = None 

1816 if qname_aware_attrs: 

1817 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection 

1818 else: 

1819 self._find_qname_aware_attrs = None 

1820 

1821 # Stack with globally and newly declared namespaces as (uri, prefix) pairs. 

1822 self._declared_ns_stack = [[ 

1823 ("http://www.w3.org/XML/1998/namespace", "xml"), 

1824 ]] 

1825 # Stack with user declared namespace prefixes as (uri, prefix) pairs. 

1826 self._ns_stack = [] 

1827 if not rewrite_prefixes: 

1828 self._ns_stack.append(list(_namespace_map.items())) 

1829 self._ns_stack.append([]) 

1830 self._prefix_map = {} 

1831 self._preserve_space = [False] 

1832 self._pending_start = None 

1833 self._root_seen = False 

1834 self._root_done = False 

1835 self._ignored_depth = 0 

1836 

1837 def _iter_namespaces(self, ns_stack, _reversed=reversed): 

1838 for namespaces in _reversed(ns_stack): 

1839 if namespaces: # almost no element declares new namespaces 

1840 yield from namespaces 

1841 

1842 def _resolve_prefix_name(self, prefixed_name): 

1843 prefix, name = prefixed_name.split(':', 1) 

1844 for uri, p in self._iter_namespaces(self._ns_stack): 

1845 if p == prefix: 

1846 return f'{{{uri}}}{name}' 

1847 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope') 

1848 

1849 def _qname(self, qname, uri=None): 

1850 if uri is None: 

1851 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname) 

1852 else: 

1853 tag = qname 

1854 

1855 prefixes_seen = set() 

1856 for u, prefix in self._iter_namespaces(self._declared_ns_stack): 

1857 if u == uri and prefix not in prefixes_seen: 

1858 return f'{prefix}:{tag}' if prefix else tag, tag, uri 

1859 prefixes_seen.add(prefix) 

1860 

1861 # Not declared yet => add new declaration. 

1862 if self._rewrite_prefixes: 

1863 if uri in self._prefix_map: 

1864 prefix = self._prefix_map[uri] 

1865 else: 

1866 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}' 

1867 self._declared_ns_stack[-1].append((uri, prefix)) 

1868 return f'{prefix}:{tag}', tag, uri 

1869 

1870 if not uri and '' not in prefixes_seen: 

1871 # No default namespace declared => no prefix needed. 

1872 return tag, tag, uri 

1873 

1874 for u, prefix in self._iter_namespaces(self._ns_stack): 

1875 if u == uri: 

1876 self._declared_ns_stack[-1].append((uri, prefix)) 

1877 return f'{prefix}:{tag}' if prefix else tag, tag, uri 

1878 

1879 if not uri: 

1880 # As soon as a default namespace is defined, 

1881 # anything that has no namespace (and thus, no prefix) goes there. 

1882 return tag, tag, uri 

1883 

1884 raise ValueError(f'Namespace "{uri}" is not declared in scope') 

1885 

1886 def data(self, data): 

1887 if not self._ignored_depth: 

1888 self._data.append(data) 

1889 

1890 def _flush(self, _join_text=''.join): 

1891 data = _join_text(self._data) 

1892 del self._data[:] 

1893 if self._strip_text and not self._preserve_space[-1]: 

1894 data = data.strip() 

1895 if self._pending_start is not None: 

1896 args, self._pending_start = self._pending_start, None 

1897 qname_text = data if data and _looks_like_prefix_name(data) else None 

1898 self._start(*args, qname_text) 

1899 if qname_text is not None: 

1900 return 

1901 if data and self._root_seen: 

1902 self._write(_escape_cdata_c14n(data)) 

1903 

1904 def start_ns(self, prefix, uri): 

1905 if self._ignored_depth: 

1906 return 

1907 # we may have to resolve qnames in text content 

1908 if self._data: 

1909 self._flush() 

1910 self._ns_stack[-1].append((uri, prefix)) 

1911 

1912 def start(self, tag, attrs): 

1913 if self._exclude_tags is not None and ( 

1914 self._ignored_depth or tag in self._exclude_tags): 

1915 self._ignored_depth += 1 

1916 return 

1917 if self._data: 

1918 self._flush() 

1919 

1920 new_namespaces = [] 

1921 self._declared_ns_stack.append(new_namespaces) 

1922 

1923 if self._qname_aware_tags is not None and tag in self._qname_aware_tags: 

1924 # Need to parse text first to see if it requires a prefix declaration. 

1925 self._pending_start = (tag, attrs, new_namespaces) 

1926 return 

1927 self._start(tag, attrs, new_namespaces) 

1928 

1929 def _start(self, tag, attrs, new_namespaces, qname_text=None): 

1930 if self._exclude_attrs is not None and attrs: 

1931 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs} 

1932 

1933 qnames = {tag, *attrs} 

1934 resolved_names = {} 

1935 

1936 # Resolve prefixes in attribute and tag text. 

1937 if qname_text is not None: 

1938 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text) 

1939 qnames.add(qname) 

1940 if self._find_qname_aware_attrs is not None and attrs: 

1941 qattrs = self._find_qname_aware_attrs(attrs) 

1942 if qattrs: 

1943 for attr_name in qattrs: 

1944 value = attrs[attr_name] 

1945 if _looks_like_prefix_name(value): 

1946 qname = resolved_names[value] = self._resolve_prefix_name(value) 

1947 qnames.add(qname) 

1948 else: 

1949 qattrs = None 

1950 else: 

1951 qattrs = None 

1952 

1953 # Assign prefixes in lexicographical order of used URIs. 

1954 parse_qname = self._qname 

1955 parsed_qnames = {n: parse_qname(n) for n in sorted( 

1956 qnames, key=lambda n: n.split('}', 1))} 

1957 

1958 # Write namespace declarations in prefix order ... 

1959 if new_namespaces: 

1960 attr_list = [ 

1961 ('xmlns:' + prefix if prefix else 'xmlns', uri) 

1962 for uri, prefix in new_namespaces 

1963 ] 

1964 attr_list.sort() 

1965 else: 

1966 # almost always empty 

1967 attr_list = [] 

1968 

1969 # ... followed by attributes in URI+name order 

1970 if attrs: 

1971 for k, v in sorted(attrs.items()): 

1972 if qattrs is not None and k in qattrs and v in resolved_names: 

1973 v = parsed_qnames[resolved_names[v]][0] 

1974 attr_qname, attr_name, uri = parsed_qnames[k] 

1975 # No prefix for attributes in default ('') namespace. 

1976 attr_list.append((attr_qname if uri else attr_name, v)) 

1977 

1978 # Honour xml:space attributes. 

1979 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space') 

1980 self._preserve_space.append( 

1981 space_behaviour == 'preserve' if space_behaviour 

1982 else self._preserve_space[-1]) 

1983 

1984 # Write the tag. 

1985 write = self._write 

1986 write('<' + parsed_qnames[tag][0]) 

1987 if attr_list: 

1988 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list])) 

1989 write('>') 

1990 

1991 # Write the resolved qname text content. 

1992 if qname_text is not None: 

1993 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0])) 

1994 

1995 self._root_seen = True 

1996 self._ns_stack.append([]) 

1997 

1998 def end(self, tag): 

1999 if self._ignored_depth: 

2000 self._ignored_depth -= 1 

2001 return 

2002 if self._data: 

2003 self._flush() 

2004 self._write(f'</{self._qname(tag)[0]}>') 

2005 self._preserve_space.pop() 

2006 self._root_done = len(self._preserve_space) == 1 

2007 self._declared_ns_stack.pop() 

2008 self._ns_stack.pop() 

2009 

2010 def comment(self, text): 

2011 if not self._with_comments: 

2012 return 

2013 if self._ignored_depth: 

2014 return 

2015 if self._root_done: 

2016 self._write('\n') 

2017 elif self._root_seen and self._data: 

2018 self._flush() 

2019 self._write(f'<!--{_escape_cdata_c14n(text)}-->') 

2020 if not self._root_seen: 

2021 self._write('\n') 

2022 

2023 def pi(self, target, data): 

2024 if self._ignored_depth: 

2025 return 

2026 if self._root_done: 

2027 self._write('\n') 

2028 elif self._root_seen and self._data: 

2029 self._flush() 

2030 self._write( 

2031 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>') 

2032 if not self._root_seen: 

2033 self._write('\n') 

2034 

2035 

2036def _escape_cdata_c14n(text): 

2037 # escape character data 

2038 try: 

2039 # it's worth avoiding do-nothing calls for strings that are 

2040 # shorter than 500 character, or so. assume that's, by far, 

2041 # the most common case in most applications. 

2042 if '&' in text: 

2043 text = text.replace('&', '&amp;') 

2044 if '<' in text: 

2045 text = text.replace('<', '&lt;') 

2046 if '>' in text: 

2047 text = text.replace('>', '&gt;') 

2048 if '\r' in text: 

2049 text = text.replace('\r', '&#xD;') 

2050 return text 

2051 except (TypeError, AttributeError): 

2052 _raise_serialization_error(text) 

2053 

2054 

2055def _escape_attrib_c14n(text): 

2056 # escape attribute value 

2057 try: 

2058 if '&' in text: 

2059 text = text.replace('&', '&amp;') 

2060 if '<' in text: 

2061 text = text.replace('<', '&lt;') 

2062 if '"' in text: 

2063 text = text.replace('"', '&quot;') 

2064 if '\t' in text: 

2065 text = text.replace('\t', '&#x9;') 

2066 if '\n' in text: 

2067 text = text.replace('\n', '&#xA;') 

2068 if '\r' in text: 

2069 text = text.replace('\r', '&#xD;') 

2070 return text 

2071 except (TypeError, AttributeError): 

2072 _raise_serialization_error(text) 

2073 

2074 

2075# -------------------------------------------------------------------- 

2076 

2077# Import the C accelerators 

2078try: 

2079 # Element is going to be shadowed by the C implementation. We need to keep 

2080 # the Python version of it accessible for some "creative" by external code 

2081 # (see tests) 

2082 _Element_Py = Element 

2083 

2084 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories 

2085 from _elementtree import * 

2086 from _elementtree import _set_factories 

2087except ImportError: 

2088 pass 

2089else: 

2090 _set_factories(Comment, ProcessingInstruction)