Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/xml/sax/xmlreader.py: 43%

161 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers 

2should be based on this code. """ 

3 

4from . import handler 

5 

6from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException 

7 

8 

9# ===== XMLREADER ===== 

10 

11class XMLReader: 

12 """Interface for reading an XML document using callbacks. 

13 

14 XMLReader is the interface that an XML parser's SAX2 driver must 

15 implement. This interface allows an application to set and query 

16 features and properties in the parser, to register event handlers 

17 for document processing, and to initiate a document parse. 

18 

19 All SAX interfaces are assumed to be synchronous: the parse 

20 methods must not return until parsing is complete, and readers 

21 must wait for an event-handler callback to return before reporting 

22 the next event.""" 

23 

24 def __init__(self): 

25 self._cont_handler = handler.ContentHandler() 

26 self._dtd_handler = handler.DTDHandler() 

27 self._ent_handler = handler.EntityResolver() 

28 self._err_handler = handler.ErrorHandler() 

29 

30 def parse(self, source): 

31 "Parse an XML document from a system identifier or an InputSource." 

32 raise NotImplementedError("This method must be implemented!") 

33 

34 def getContentHandler(self): 

35 "Returns the current ContentHandler." 

36 return self._cont_handler 

37 

38 def setContentHandler(self, handler): 

39 "Registers a new object to receive document content events." 

40 self._cont_handler = handler 

41 

42 def getDTDHandler(self): 

43 "Returns the current DTD handler." 

44 return self._dtd_handler 

45 

46 def setDTDHandler(self, handler): 

47 "Register an object to receive basic DTD-related events." 

48 self._dtd_handler = handler 

49 

50 def getEntityResolver(self): 

51 "Returns the current EntityResolver." 

52 return self._ent_handler 

53 

54 def setEntityResolver(self, resolver): 

55 "Register an object to resolve external entities." 

56 self._ent_handler = resolver 

57 

58 def getErrorHandler(self): 

59 "Returns the current ErrorHandler." 

60 return self._err_handler 

61 

62 def setErrorHandler(self, handler): 

63 "Register an object to receive error-message events." 

64 self._err_handler = handler 

65 

66 def setLocale(self, locale): 

67 """Allow an application to set the locale for errors and warnings. 

68 

69 SAX parsers are not required to provide localization for errors 

70 and warnings; if they cannot support the requested locale, 

71 however, they must raise a SAX exception. Applications may 

72 request a locale change in the middle of a parse.""" 

73 raise SAXNotSupportedException("Locale support not implemented") 

74 

75 def getFeature(self, name): 

76 "Looks up and returns the state of a SAX2 feature." 

77 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 

78 

79 def setFeature(self, name, state): 

80 "Sets the state of a SAX2 feature." 

81 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 

82 

83 def getProperty(self, name): 

84 "Looks up and returns the value of a SAX2 property." 

85 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 

86 

87 def setProperty(self, name, value): 

88 "Sets the value of a SAX2 property." 

89 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 

90 

91class IncrementalParser(XMLReader): 

92 """This interface adds three extra methods to the XMLReader 

93 interface that allow XML parsers to support incremental 

94 parsing. Support for this interface is optional, since not all 

95 underlying XML parsers support this functionality. 

96 

97 When the parser is instantiated it is ready to begin accepting 

98 data from the feed method immediately. After parsing has been 

99 finished with a call to close the reset method must be called to 

100 make the parser ready to accept new data, either from feed or 

101 using the parse method. 

102 

103 Note that these methods must _not_ be called during parsing, that 

104 is, after parse has been called and before it returns. 

105 

106 By default, the class also implements the parse method of the XMLReader 

107 interface using the feed, close and reset methods of the 

108 IncrementalParser interface as a convenience to SAX 2.0 driver 

109 writers.""" 

110 

111 def __init__(self, bufsize=2**16): 

112 self._bufsize = bufsize 

113 XMLReader.__init__(self) 

114 

115 def parse(self, source): 

116 from . import saxutils 

117 source = saxutils.prepare_input_source(source) 

118 

119 self.prepareParser(source) 

120 file = source.getCharacterStream() 

121 if file is None: 

122 file = source.getByteStream() 

123 buffer = file.read(self._bufsize) 

124 while buffer: 

125 self.feed(buffer) 

126 buffer = file.read(self._bufsize) 

127 self.close() 

128 

129 def feed(self, data): 

130 """This method gives the raw XML data in the data parameter to 

131 the parser and makes it parse the data, emitting the 

132 corresponding events. It is allowed for XML constructs to be 

133 split across several calls to feed. 

134 

135 feed may raise SAXException.""" 

136 raise NotImplementedError("This method must be implemented!") 

137 

138 def prepareParser(self, source): 

139 """This method is called by the parse implementation to allow 

140 the SAX 2.0 driver to prepare itself for parsing.""" 

141 raise NotImplementedError("prepareParser must be overridden!") 

142 

143 def close(self): 

144 """This method is called when the entire XML document has been 

145 passed to the parser through the feed method, to notify the 

146 parser that there are no more data. This allows the parser to 

147 do the final checks on the document and empty the internal 

148 data buffer. 

149 

150 The parser will not be ready to parse another document until 

151 the reset method has been called. 

152 

153 close may raise SAXException.""" 

154 raise NotImplementedError("This method must be implemented!") 

155 

156 def reset(self): 

157 """This method is called after close has been called to reset 

158 the parser so that it is ready to parse new documents. The 

159 results of calling parse or feed after close without calling 

160 reset are undefined.""" 

161 raise NotImplementedError("This method must be implemented!") 

162 

163# ===== LOCATOR ===== 

164 

165class Locator: 

166 """Interface for associating a SAX event with a document 

167 location. A locator object will return valid results only during 

168 calls to DocumentHandler methods; at any other time, the 

169 results are unpredictable.""" 

170 

171 def getColumnNumber(self): 

172 "Return the column number where the current event ends." 

173 return -1 

174 

175 def getLineNumber(self): 

176 "Return the line number where the current event ends." 

177 return -1 

178 

179 def getPublicId(self): 

180 "Return the public identifier for the current event." 

181 return None 

182 

183 def getSystemId(self): 

184 "Return the system identifier for the current event." 

185 return None 

186 

187# ===== INPUTSOURCE ===== 

188 

189class InputSource: 

190 """Encapsulation of the information needed by the XMLReader to 

191 read entities. 

192 

193 This class may include information about the public identifier, 

194 system identifier, byte stream (possibly with character encoding 

195 information) and/or the character stream of an entity. 

196 

197 Applications will create objects of this class for use in the 

198 XMLReader.parse method and for returning from 

199 EntityResolver.resolveEntity. 

200 

201 An InputSource belongs to the application, the XMLReader is not 

202 allowed to modify InputSource objects passed to it from the 

203 application, although it may make copies and modify those.""" 

204 

205 def __init__(self, system_id = None): 

206 self.__system_id = system_id 

207 self.__public_id = None 

208 self.__encoding = None 

209 self.__bytefile = None 

210 self.__charfile = None 

211 

212 def setPublicId(self, public_id): 

213 "Sets the public identifier of this InputSource." 

214 self.__public_id = public_id 

215 

216 def getPublicId(self): 

217 "Returns the public identifier of this InputSource." 

218 return self.__public_id 

219 

220 def setSystemId(self, system_id): 

221 "Sets the system identifier of this InputSource." 

222 self.__system_id = system_id 

223 

224 def getSystemId(self): 

225 "Returns the system identifier of this InputSource." 

226 return self.__system_id 

227 

228 def setEncoding(self, encoding): 

229 """Sets the character encoding of this InputSource. 

230 

231 The encoding must be a string acceptable for an XML encoding 

232 declaration (see section 4.3.3 of the XML recommendation). 

233 

234 The encoding attribute of the InputSource is ignored if the 

235 InputSource also contains a character stream.""" 

236 self.__encoding = encoding 

237 

238 def getEncoding(self): 

239 "Get the character encoding of this InputSource." 

240 return self.__encoding 

241 

242 def setByteStream(self, bytefile): 

243 """Set the byte stream (a Python file-like object which does 

244 not perform byte-to-character conversion) for this input 

245 source. 

246 

247 The SAX parser will ignore this if there is also a character 

248 stream specified, but it will use a byte stream in preference 

249 to opening a URI connection itself. 

250 

251 If the application knows the character encoding of the byte 

252 stream, it should set it with the setEncoding method.""" 

253 self.__bytefile = bytefile 

254 

255 def getByteStream(self): 

256 """Get the byte stream for this input source. 

257 

258 The getEncoding method will return the character encoding for 

259 this byte stream, or None if unknown.""" 

260 return self.__bytefile 

261 

262 def setCharacterStream(self, charfile): 

263 """Set the character stream for this input source. (The stream 

264 must be a Python 2.0 Unicode-wrapped file-like that performs 

265 conversion to Unicode strings.) 

266 

267 If there is a character stream specified, the SAX parser will 

268 ignore any byte stream and will not attempt to open a URI 

269 connection to the system identifier.""" 

270 self.__charfile = charfile 

271 

272 def getCharacterStream(self): 

273 "Get the character stream for this input source." 

274 return self.__charfile 

275 

276# ===== ATTRIBUTESIMPL ===== 

277 

278class AttributesImpl: 

279 

280 def __init__(self, attrs): 

281 """Non-NS-aware implementation. 

282 

283 attrs should be of the form {name : value}.""" 

284 self._attrs = attrs 

285 

286 def getLength(self): 

287 return len(self._attrs) 

288 

289 def getType(self, name): 

290 return "CDATA" 

291 

292 def getValue(self, name): 

293 return self._attrs[name] 

294 

295 def getValueByQName(self, name): 

296 return self._attrs[name] 

297 

298 def getNameByQName(self, name): 

299 if name not in self._attrs: 

300 raise KeyError(name) 

301 return name 

302 

303 def getQNameByName(self, name): 

304 if name not in self._attrs: 

305 raise KeyError(name) 

306 return name 

307 

308 def getNames(self): 

309 return list(self._attrs.keys()) 

310 

311 def getQNames(self): 

312 return list(self._attrs.keys()) 

313 

314 def __len__(self): 

315 return len(self._attrs) 

316 

317 def __getitem__(self, name): 

318 return self._attrs[name] 

319 

320 def keys(self): 

321 return list(self._attrs.keys()) 

322 

323 def __contains__(self, name): 

324 return name in self._attrs 

325 

326 def get(self, name, alternative=None): 

327 return self._attrs.get(name, alternative) 

328 

329 def copy(self): 

330 return self.__class__(self._attrs) 

331 

332 def items(self): 

333 return list(self._attrs.items()) 

334 

335 def values(self): 

336 return list(self._attrs.values()) 

337 

338# ===== ATTRIBUTESNSIMPL ===== 

339 

340class AttributesNSImpl(AttributesImpl): 

341 

342 def __init__(self, attrs, qnames): 

343 """NS-aware implementation. 

344 

345 attrs should be of the form {(ns_uri, lname): value, ...}. 

346 qnames of the form {(ns_uri, lname): qname, ...}.""" 

347 self._attrs = attrs 

348 self._qnames = qnames 

349 

350 def getValueByQName(self, name): 

351 for (nsname, qname) in self._qnames.items(): 

352 if qname == name: 

353 return self._attrs[nsname] 

354 

355 raise KeyError(name) 

356 

357 def getNameByQName(self, name): 

358 for (nsname, qname) in self._qnames.items(): 

359 if qname == name: 

360 return nsname 

361 

362 raise KeyError(name) 

363 

364 def getQNameByName(self, name): 

365 return self._qnames[name] 

366 

367 def getQNames(self): 

368 return list(self._qnames.values()) 

369 

370 def copy(self): 

371 return self.__class__(self._attrs, self._qnames) 

372 

373 

374def _test(): 

375 XMLReader() 

376 IncrementalParser() 

377 Locator() 

378 

379if __name__ == "__main__": 

380 _test()