/src/poco/XML/src/ParserEngine.cpp
Line | Count | Source |
1 | | // |
2 | | // ParserEngine.cpp |
3 | | // |
4 | | // Library: XML |
5 | | // Package: XML |
6 | | // Module: ParserEngine |
7 | | // |
8 | | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. |
9 | | // and Contributors. |
10 | | // |
11 | | // SPDX-License-Identifier: BSL-1.0 |
12 | | // |
13 | | |
14 | | |
15 | | #include "ParserEngine.h" |
16 | | #include "Poco/XML/NamespaceStrategy.h" |
17 | | #include "Poco/XML/XMLException.h" |
18 | | #include "Poco/SAX/EntityResolver.h" |
19 | | #include "Poco/SAX/EntityResolverImpl.h" |
20 | | #include "Poco/SAX/DTDHandler.h" |
21 | | #include "Poco/SAX/DeclHandler.h" |
22 | | #include "Poco/SAX/ContentHandler.h" |
23 | | #include "Poco/SAX/LexicalHandler.h" |
24 | | #include "Poco/SAX/ErrorHandler.h" |
25 | | #include "Poco/SAX/InputSource.h" |
26 | | #include "Poco/SAX/Locator.h" |
27 | | #include "Poco/SAX/LocatorImpl.h" |
28 | | #include "Poco/SAX/SAXException.h" |
29 | | #include "Poco/URI.h" |
30 | | #include <cstring> |
31 | | |
32 | | |
33 | | using Poco::URI; |
34 | | using Poco::TextEncoding; |
35 | | |
36 | | |
37 | | namespace Poco { |
38 | | namespace XML { |
39 | | |
40 | | |
41 | | class ContextLocator: public Locator |
42 | | { |
43 | | public: |
44 | | ContextLocator(XML_Parser parser, const XMLString& publicId, const XMLString& systemId): |
45 | 130k | _parser(parser), |
46 | 130k | _publicId(publicId), |
47 | 130k | _systemId(systemId) |
48 | 130k | { |
49 | 130k | } |
50 | | |
51 | | ~ContextLocator() |
52 | 130k | { |
53 | 130k | } |
54 | | |
55 | | XMLString getPublicId() const |
56 | 93.2k | { |
57 | 93.2k | return _publicId; |
58 | 93.2k | } |
59 | | |
60 | | XMLString getSystemId() const |
61 | 179k | { |
62 | 179k | return _systemId; |
63 | 179k | } |
64 | | |
65 | | int getLineNumber() const |
66 | 93.2k | { |
67 | 93.2k | return XML_GetCurrentLineNumber(_parser); |
68 | 93.2k | } |
69 | | |
70 | | int getColumnNumber() const |
71 | 93.2k | { |
72 | 93.2k | return XML_GetCurrentColumnNumber(_parser); |
73 | 93.2k | } |
74 | | |
75 | | private: |
76 | | XML_Parser _parser; |
77 | | XMLString _publicId; |
78 | | XMLString _systemId; |
79 | | }; |
80 | | |
81 | | |
82 | | const int ParserEngine::PARSE_BUFFER_SIZE = 4096; |
83 | | const XMLString ParserEngine::EMPTY_STRING; |
84 | | |
85 | | |
86 | | ParserEngine::ParserEngine(): |
87 | 52.1k | _parser(nullptr), |
88 | 52.1k | _pBuffer(nullptr), |
89 | 52.1k | _encodingSpecified(false), |
90 | 52.1k | _expandInternalEntities(true), |
91 | 52.1k | _externalGeneralEntities(false), |
92 | 52.1k | _externalParameterEntities(false), |
93 | 52.1k | _enablePartialReads(false), |
94 | 52.1k | _pNamespaceStrategy(new NoNamespacesStrategy()), |
95 | 52.1k | _pEntityResolver(nullptr), |
96 | 52.1k | _pDTDHandler(nullptr), |
97 | 52.1k | _pDeclHandler(nullptr), |
98 | 52.1k | _pContentHandler(nullptr), |
99 | 52.1k | _pLexicalHandler(nullptr), |
100 | 52.1k | _pErrorHandler(nullptr), |
101 | 52.1k | _maximumAmplificationFactor(0.0), |
102 | 52.1k | _activationThresholdBytes(0) |
103 | 52.1k | { |
104 | 52.1k | } |
105 | | |
106 | | |
107 | | ParserEngine::ParserEngine(const XMLString& encoding): |
108 | 0 | _parser(nullptr), |
109 | 0 | _pBuffer(nullptr), |
110 | 0 | _encodingSpecified(true), |
111 | 0 | _encoding(encoding), |
112 | 0 | _expandInternalEntities(true), |
113 | 0 | _externalGeneralEntities(false), |
114 | 0 | _externalParameterEntities(false), |
115 | 0 | _enablePartialReads(false), |
116 | 0 | _pNamespaceStrategy(new NoNamespacesStrategy()), |
117 | 0 | _pEntityResolver(nullptr), |
118 | 0 | _pDTDHandler(nullptr), |
119 | 0 | _pDeclHandler(nullptr), |
120 | 0 | _pContentHandler(nullptr), |
121 | 0 | _pLexicalHandler(nullptr), |
122 | 0 | _pErrorHandler(nullptr), |
123 | 0 | _maximumAmplificationFactor(0.0), |
124 | 0 | _activationThresholdBytes(0) |
125 | 0 | { |
126 | 0 | } |
127 | | |
128 | | |
129 | | ParserEngine::~ParserEngine() |
130 | 52.1k | { |
131 | 52.1k | resetContext(); |
132 | 52.1k | if (_parser) XML_ParserFree(_parser); |
133 | 52.1k | delete [] _pBuffer; |
134 | 52.1k | delete _pNamespaceStrategy; |
135 | 52.1k | } |
136 | | |
137 | | |
138 | | void ParserEngine::setEncoding(const XMLString& encoding) |
139 | 0 | { |
140 | 0 | _encoding = encoding; |
141 | 0 | _encodingSpecified = true; |
142 | 0 | } |
143 | | |
144 | | |
145 | | void ParserEngine::addEncoding(const XMLString& name, TextEncoding* pEncoding) |
146 | 0 | { |
147 | 0 | poco_check_ptr (pEncoding); |
148 | |
|
149 | 0 | if (_encodings.find(name) == _encodings.end()) |
150 | 0 | _encodings[name] = pEncoding; |
151 | 0 | else |
152 | 0 | throw XMLException("Encoding already defined"); |
153 | 0 | } |
154 | | |
155 | | |
156 | | void ParserEngine::setNamespaceStrategy(NamespaceStrategy* pStrategy) |
157 | 52.1k | { |
158 | 52.1k | poco_check_ptr (pStrategy); |
159 | | |
160 | 52.1k | delete _pNamespaceStrategy; |
161 | 52.1k | _pNamespaceStrategy = pStrategy; |
162 | 52.1k | } |
163 | | |
164 | | |
165 | | void ParserEngine::setExpandInternalEntities(bool flag) |
166 | 0 | { |
167 | 0 | _expandInternalEntities = flag; |
168 | 0 | } |
169 | | |
170 | | |
171 | | void ParserEngine::setExternalGeneralEntities(bool flag) |
172 | 52.1k | { |
173 | 52.1k | _externalGeneralEntities = flag; |
174 | 52.1k | } |
175 | | |
176 | | |
177 | | void ParserEngine::setExternalParameterEntities(bool flag) |
178 | 52.1k | { |
179 | 52.1k | _externalParameterEntities = flag; |
180 | 52.1k | } |
181 | | |
182 | | |
183 | | void ParserEngine::setEntityResolver(EntityResolver* pResolver) |
184 | 38.5k | { |
185 | 38.5k | _pEntityResolver = pResolver; |
186 | 38.5k | } |
187 | | |
188 | | |
189 | | void ParserEngine::setDTDHandler(DTDHandler* pDTDHandler) |
190 | 52.1k | { |
191 | 52.1k | _pDTDHandler = pDTDHandler; |
192 | 52.1k | } |
193 | | |
194 | | |
195 | | void ParserEngine::setDeclHandler(DeclHandler* pDeclHandler) |
196 | 0 | { |
197 | 0 | _pDeclHandler = pDeclHandler; |
198 | 0 | } |
199 | | |
200 | | |
201 | | void ParserEngine::setContentHandler(ContentHandler* pContentHandler) |
202 | 52.1k | { |
203 | 52.1k | _pContentHandler = pContentHandler; |
204 | 52.1k | } |
205 | | |
206 | | |
207 | | void ParserEngine::setLexicalHandler(LexicalHandler* pLexicalHandler) |
208 | 26.0k | { |
209 | 26.0k | _pLexicalHandler = pLexicalHandler; |
210 | 26.0k | } |
211 | | |
212 | | |
213 | | void ParserEngine::setErrorHandler(ErrorHandler* pErrorHandler) |
214 | 38.5k | { |
215 | 38.5k | _pErrorHandler = pErrorHandler; |
216 | 38.5k | } |
217 | | |
218 | | |
219 | | void ParserEngine::setEnablePartialReads(bool flag) |
220 | 26.0k | { |
221 | 26.0k | _enablePartialReads = flag; |
222 | 26.0k | } |
223 | | |
224 | | |
225 | | void ParserEngine::setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor) |
226 | 0 | { |
227 | 0 | _maximumAmplificationFactor = maximumAmplificationFactor; |
228 | 0 | } |
229 | | |
230 | | |
231 | | void ParserEngine::setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes) |
232 | 0 | { |
233 | 0 | _activationThresholdBytes = activationThresholdBytes; |
234 | 0 | } |
235 | | |
236 | | |
237 | | void ParserEngine::parse(InputSource* pInputSource) |
238 | 0 | { |
239 | 0 | init(); |
240 | 0 | resetContext(); |
241 | 0 | pushContext(_parser, pInputSource); |
242 | 0 | if (_pContentHandler) _pContentHandler->setDocumentLocator(this); |
243 | 0 | if (_pContentHandler) _pContentHandler->startDocument(); |
244 | 0 | if (pInputSource->getCharacterStream()) |
245 | 0 | parseCharInputStream(*pInputSource->getCharacterStream()); |
246 | 0 | else if (pInputSource->getByteStream()) |
247 | 0 | parseByteInputStream(*pInputSource->getByteStream()); |
248 | 0 | else throw XMLException("Input source has no stream"); |
249 | 0 | if (_pContentHandler) _pContentHandler->endDocument(); |
250 | 0 | popContext(); |
251 | 0 | } |
252 | | |
253 | | |
254 | | void ParserEngine::parse(const char* pBuffer, std::size_t size) |
255 | 52.1k | { |
256 | 52.1k | init(); |
257 | 52.1k | resetContext(); |
258 | 52.1k | InputSource src; |
259 | 52.1k | pushContext(_parser, &src); |
260 | 52.1k | if (_pContentHandler) _pContentHandler->setDocumentLocator(this); |
261 | 52.1k | if (_pContentHandler) _pContentHandler->startDocument(); |
262 | 52.1k | std::size_t processed = 0; |
263 | 310k | while (processed < size) |
264 | 258k | { |
265 | 258k | const int bufferSize = processed + PARSE_BUFFER_SIZE < size ? PARSE_BUFFER_SIZE : static_cast<int>(size - processed); |
266 | 258k | if (!XML_Parse(_parser, pBuffer + processed, bufferSize, 0)) |
267 | 24.4k | handleError(XML_GetErrorCode(_parser)); |
268 | 258k | processed += bufferSize; |
269 | 258k | } |
270 | 52.1k | if (!XML_Parse(_parser, pBuffer+processed, 0, 1)) |
271 | 21.3k | handleError(XML_GetErrorCode(_parser)); |
272 | 52.1k | if (_pContentHandler) _pContentHandler->endDocument(); |
273 | 52.1k | popContext(); |
274 | 52.1k | } |
275 | | |
276 | | |
277 | | void ParserEngine::parseByteInputStream(XMLByteInputStream& istr) |
278 | 0 | { |
279 | 0 | std::streamsize n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE); |
280 | 0 | while (n > 0) |
281 | 0 | { |
282 | 0 | if (!XML_Parse(_parser, _pBuffer, static_cast<int>(n), 0)) |
283 | 0 | handleError(XML_GetErrorCode(_parser)); |
284 | 0 | if (istr.good()) |
285 | 0 | n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE); |
286 | 0 | else |
287 | 0 | n = 0; |
288 | 0 | } |
289 | 0 | if (!XML_Parse(_parser, _pBuffer, 0, 1)) |
290 | 0 | handleError(XML_GetErrorCode(_parser)); |
291 | 0 | } |
292 | | |
293 | | |
294 | | void ParserEngine::parseCharInputStream(XMLCharInputStream& istr) |
295 | 0 | { |
296 | 0 | std::streamsize n = readChars(istr, reinterpret_cast<XMLChar*>(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
297 | 0 | while (n > 0) |
298 | 0 | { |
299 | 0 | if (!XML_Parse(_parser, _pBuffer, static_cast<int>(n*sizeof(XMLChar)), 0)) |
300 | 0 | handleError(XML_GetErrorCode(_parser)); |
301 | 0 | if (istr.good()) |
302 | 0 | n = readChars(istr, reinterpret_cast<XMLChar*>(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
303 | 0 | else |
304 | 0 | n = 0; |
305 | 0 | } |
306 | 0 | if (!XML_Parse(_parser, _pBuffer, 0, 1)) |
307 | 0 | handleError(XML_GetErrorCode(_parser)); |
308 | 0 | } |
309 | | |
310 | | |
311 | | void ParserEngine::parseExternal(XML_Parser extParser, InputSource* pInputSource) |
312 | 78.6k | { |
313 | 78.6k | pushContext(extParser, pInputSource); |
314 | 78.6k | if (pInputSource->getCharacterStream()) |
315 | 0 | parseExternalCharInputStream(extParser, *pInputSource->getCharacterStream()); |
316 | 78.6k | else if (pInputSource->getByteStream()) |
317 | 78.6k | parseExternalByteInputStream(extParser, *pInputSource->getByteStream()); |
318 | 0 | else throw XMLException("Input source has no stream"); |
319 | 78.6k | popContext(); |
320 | 78.6k | } |
321 | | |
322 | | |
323 | | void ParserEngine::parseExternalByteInputStream(XML_Parser extParser, XMLByteInputStream& istr) |
324 | 78.6k | { |
325 | 78.6k | char *pBuffer = new char[PARSE_BUFFER_SIZE]; |
326 | 78.6k | try |
327 | 78.6k | { |
328 | 78.6k | std::streamsize n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE); |
329 | 78.6k | while (n > 0) |
330 | 0 | { |
331 | 0 | if (!XML_Parse(extParser, pBuffer, static_cast<int>(n), 0)) |
332 | 0 | handleError(XML_GetErrorCode(extParser)); |
333 | 0 | if (istr.good()) |
334 | 0 | n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE); |
335 | 0 | else |
336 | 0 | n = 0; |
337 | 0 | } |
338 | 78.6k | if (!XML_Parse(extParser, pBuffer, 0, 1)) |
339 | 0 | handleError(XML_GetErrorCode(extParser)); |
340 | 78.6k | } |
341 | 78.6k | catch (...) |
342 | 78.6k | { |
343 | 0 | delete [] pBuffer; |
344 | 0 | throw; |
345 | 0 | } |
346 | 78.6k | delete [] pBuffer; |
347 | 78.6k | } |
348 | | |
349 | | |
350 | | void ParserEngine::parseExternalCharInputStream(XML_Parser extParser, XMLCharInputStream& istr) |
351 | 0 | { |
352 | 0 | XMLChar *pBuffer = new XMLChar[PARSE_BUFFER_SIZE/sizeof(XMLChar)]; |
353 | 0 | try |
354 | 0 | { |
355 | 0 | std::streamsize n = readChars(istr, pBuffer, PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
356 | 0 | while (n > 0) |
357 | 0 | { |
358 | 0 | if (!XML_Parse(extParser, reinterpret_cast<char*>(pBuffer), static_cast<int>(n*sizeof(XMLChar)), 0)) |
359 | 0 | handleError(XML_GetErrorCode(extParser)); |
360 | 0 | if (istr.good()) |
361 | 0 | n = readChars(istr, pBuffer, static_cast<int>(PARSE_BUFFER_SIZE/sizeof(XMLChar))); |
362 | 0 | else |
363 | 0 | n = 0; |
364 | 0 | } |
365 | 0 | if (!XML_Parse(extParser, reinterpret_cast<char*>(pBuffer), 0, 1)) |
366 | 0 | handleError(XML_GetErrorCode(extParser)); |
367 | 0 | } |
368 | 0 | catch (...) |
369 | 0 | { |
370 | 0 | delete [] pBuffer; |
371 | 0 | throw; |
372 | 0 | } |
373 | 0 | delete [] pBuffer; |
374 | 0 | } |
375 | | |
376 | | |
377 | | std::streamsize ParserEngine::readBytes(XMLByteInputStream& istr, char* pBuffer, std::streamsize bufferSize) |
378 | 78.6k | { |
379 | 78.6k | if (_enablePartialReads) |
380 | 16.1k | { |
381 | 16.1k | istr.read(pBuffer, 1); |
382 | 16.1k | if (istr.gcount() == 1) |
383 | 0 | { |
384 | 0 | std::streamsize n = istr.readsome(pBuffer + 1, bufferSize - 1); |
385 | 0 | return n + 1; |
386 | 0 | } |
387 | 16.1k | else return 0; |
388 | 16.1k | } |
389 | 62.5k | else |
390 | 62.5k | { |
391 | 62.5k | istr.read(pBuffer, bufferSize); |
392 | 62.5k | return istr.gcount(); |
393 | 62.5k | } |
394 | 78.6k | } |
395 | | |
396 | | |
397 | | std::streamsize ParserEngine::readChars(XMLCharInputStream& istr, XMLChar* pBuffer, std::streamsize bufferSize) |
398 | 0 | { |
399 | 0 | if (_enablePartialReads) |
400 | 0 | { |
401 | 0 | istr.read(pBuffer, 1); |
402 | 0 | if (istr.gcount() == 1) |
403 | 0 | { |
404 | 0 | std::streamsize n = istr.readsome(pBuffer + 1, bufferSize - 1); |
405 | 0 | return n + 1; |
406 | 0 | } |
407 | 0 | else return 0; |
408 | 0 | } |
409 | 0 | else |
410 | 0 | { |
411 | 0 | istr.read(pBuffer, bufferSize); |
412 | 0 | return istr.gcount(); |
413 | 0 | } |
414 | 0 | } |
415 | | |
416 | | |
417 | | XMLString ParserEngine::getPublicId() const |
418 | 0 | { |
419 | 0 | return locator().getPublicId(); |
420 | 0 | } |
421 | | |
422 | | |
423 | | XMLString ParserEngine::getSystemId() const |
424 | 0 | { |
425 | 0 | return locator().getSystemId(); |
426 | 0 | } |
427 | | |
428 | | |
429 | | int ParserEngine::getLineNumber() const |
430 | 0 | { |
431 | 0 | return locator().getLineNumber(); |
432 | 0 | } |
433 | | |
434 | | |
435 | | int ParserEngine::getColumnNumber() const |
436 | 0 | { |
437 | 0 | return locator().getColumnNumber(); |
438 | 0 | } |
439 | | |
440 | | |
441 | | namespace |
442 | | { |
443 | | static LocatorImpl nullLocator; |
444 | | } |
445 | | |
446 | | |
447 | | const Locator& ParserEngine::locator() const |
448 | 46.6k | { |
449 | 46.6k | if (_context.empty()) |
450 | 0 | return nullLocator; |
451 | 46.6k | else |
452 | 46.6k | return *_context.back(); |
453 | 46.6k | } |
454 | | |
455 | | |
456 | | void ParserEngine::init() |
457 | 52.1k | { |
458 | 52.1k | if (_parser) |
459 | 0 | XML_ParserFree(_parser); |
460 | | |
461 | 52.1k | if (!_pBuffer) |
462 | 52.1k | _pBuffer = new char[PARSE_BUFFER_SIZE]; |
463 | | |
464 | 52.1k | if (dynamic_cast<NoNamespacePrefixesStrategy*>(_pNamespaceStrategy)) |
465 | 6.61k | { |
466 | 6.61k | _parser = XML_ParserCreateNS(_encodingSpecified ? _encoding.c_str() : nullptr, '\t'); |
467 | 6.61k | if (_parser) |
468 | 6.61k | { |
469 | 6.61k | XML_SetNamespaceDeclHandler(_parser, handleStartNamespaceDecl, handleEndNamespaceDecl); |
470 | 6.61k | } |
471 | 6.61k | } |
472 | 45.4k | else if (dynamic_cast<NamespacePrefixesStrategy*>(_pNamespaceStrategy)) |
473 | 16.4k | { |
474 | 16.4k | _parser = XML_ParserCreateNS(_encodingSpecified ? _encoding.c_str() : nullptr, '\t'); |
475 | 16.4k | if (_parser) |
476 | 16.4k | { |
477 | 16.4k | XML_SetReturnNSTriplet(_parser, 1); |
478 | 16.4k | XML_SetNamespaceDeclHandler(_parser, handleStartNamespaceDecl, handleEndNamespaceDecl); |
479 | 16.4k | } |
480 | 16.4k | } |
481 | 29.0k | else |
482 | 29.0k | { |
483 | 29.0k | _parser = XML_ParserCreate(_encodingSpecified ? _encoding.c_str() : nullptr); |
484 | 29.0k | } |
485 | | |
486 | 52.1k | if (!_parser) throw XMLException("Cannot create Expat parser"); |
487 | | |
488 | 52.1k | XML_SetUserData(_parser, this); |
489 | 52.1k | XML_SetElementHandler(_parser, handleStartElement, handleEndElement); |
490 | 52.1k | XML_SetCharacterDataHandler(_parser, handleCharacterData); |
491 | 52.1k | XML_SetProcessingInstructionHandler(_parser, handleProcessingInstruction); |
492 | 52.1k | if (_expandInternalEntities) |
493 | 52.1k | XML_SetDefaultHandlerExpand(_parser, handleDefault); |
494 | 0 | else |
495 | 0 | XML_SetDefaultHandler(_parser, handleDefault); |
496 | 52.1k | XML_SetUnparsedEntityDeclHandler(_parser, handleUnparsedEntityDecl); |
497 | 52.1k | XML_SetNotationDeclHandler(_parser, handleNotationDecl); |
498 | 52.1k | XML_SetExternalEntityRefHandler(_parser, handleExternalEntityRef); |
499 | 52.1k | XML_SetCommentHandler(_parser, handleComment); |
500 | 52.1k | XML_SetCdataSectionHandler(_parser, handleStartCdataSection, handleEndCdataSection); |
501 | 52.1k | XML_SetDoctypeDeclHandler(_parser, handleStartDoctypeDecl, handleEndDoctypeDecl); |
502 | 52.1k | XML_SetEntityDeclHandler(_parser, handleEntityDecl); |
503 | 52.1k | XML_SetSkippedEntityHandler(_parser, handleSkippedEntity); |
504 | 52.1k | XML_SetParamEntityParsing(_parser, _externalParameterEntities ? XML_PARAM_ENTITY_PARSING_ALWAYS : XML_PARAM_ENTITY_PARSING_NEVER); |
505 | 52.1k | XML_SetUnknownEncodingHandler(_parser, handleUnknownEncoding, this); |
506 | | |
507 | 52.1k | #if defined(XML_DTD) && (XML_MAJOR_VERSION > 2 || (XML_MAJOR_VERSION == 2 && XML_MINOR_VERSION >= 4)) |
508 | 52.1k | if (_maximumAmplificationFactor > 1.0) |
509 | 0 | { |
510 | 0 | XML_SetBillionLaughsAttackProtectionMaximumAmplification(_parser, _maximumAmplificationFactor); |
511 | 0 | } |
512 | 52.1k | if (_activationThresholdBytes > 0) |
513 | 0 | { |
514 | 0 | XML_SetBillionLaughsAttackProtectionActivationThreshold(_parser, _activationThresholdBytes); |
515 | 0 | } |
516 | 52.1k | #endif |
517 | 52.1k | } |
518 | | |
519 | | |
520 | | void ParserEngine::handleError(int errorNo) |
521 | 45.7k | { |
522 | 45.7k | try |
523 | 45.7k | { |
524 | 45.7k | switch (errorNo) |
525 | 45.7k | { |
526 | 0 | case XML_ERROR_NO_MEMORY: |
527 | 0 | throw XMLException("No memory"); |
528 | 3.84k | case XML_ERROR_SYNTAX: |
529 | 3.84k | throw SAXParseException("Syntax error", locator()); |
530 | 8.47k | case XML_ERROR_NO_ELEMENTS: |
531 | 8.47k | throw SAXParseException("No element found", locator()); |
532 | 16.0k | case XML_ERROR_INVALID_TOKEN: |
533 | 16.0k | throw SAXParseException("Invalid token", locator()); |
534 | 7.12k | case XML_ERROR_UNCLOSED_TOKEN: |
535 | 7.12k | throw SAXParseException("Unclosed token", locator()); |
536 | 2.14k | case XML_ERROR_PARTIAL_CHAR: |
537 | 2.14k | throw SAXParseException("Partial character", locator()); |
538 | 372 | case XML_ERROR_TAG_MISMATCH: |
539 | 372 | throw SAXParseException("Tag mismatch", locator()); |
540 | 466 | case XML_ERROR_DUPLICATE_ATTRIBUTE: |
541 | 466 | throw SAXParseException("Duplicate attribute", locator()); |
542 | 620 | case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: |
543 | 620 | throw SAXParseException("Junk after document element", locator()); |
544 | 84 | case XML_ERROR_PARAM_ENTITY_REF: |
545 | 84 | throw SAXParseException("Illegal parameter entity reference", locator()); |
546 | 786 | case XML_ERROR_UNDEFINED_ENTITY: |
547 | 786 | throw SAXParseException("Undefined entity", locator()); |
548 | 38 | case XML_ERROR_RECURSIVE_ENTITY_REF: |
549 | 38 | throw SAXParseException("Recursive entity reference", locator()); |
550 | 533 | case XML_ERROR_ASYNC_ENTITY: |
551 | 533 | throw SAXParseException("Asynchronous entity", locator()); |
552 | 462 | case XML_ERROR_BAD_CHAR_REF: |
553 | 462 | throw SAXParseException("Reference to invalid character number", locator()); |
554 | 4 | case XML_ERROR_BINARY_ENTITY_REF: |
555 | 4 | throw SAXParseException("Reference to binary entity", locator()); |
556 | 11 | case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: |
557 | 11 | throw SAXParseException("Reference to external entity in attribute", locator()); |
558 | 30 | case XML_ERROR_MISPLACED_XML_PI: |
559 | 30 | throw SAXParseException("XML processing instruction not at start of external entity", locator()); |
560 | 172 | case XML_ERROR_UNKNOWN_ENCODING: |
561 | 172 | throw SAXParseException("Unknown encoding", locator()); |
562 | 8 | case XML_ERROR_INCORRECT_ENCODING: |
563 | 8 | throw SAXParseException("Encoding specified in XML declaration is incorrect", locator()); |
564 | 1.19k | case XML_ERROR_UNCLOSED_CDATA_SECTION: |
565 | 1.19k | throw SAXParseException("Unclosed CDATA section", locator()); |
566 | 2.26k | case XML_ERROR_EXTERNAL_ENTITY_HANDLING: |
567 | 2.26k | throw SAXParseException("Error in processing external entity reference", locator()); |
568 | 0 | case XML_ERROR_NOT_STANDALONE: |
569 | 0 | throw SAXParseException("Document is not standalone", locator()); |
570 | 0 | case XML_ERROR_UNEXPECTED_STATE: |
571 | 0 | throw SAXParseException("Unexpected parser state - please send a bug report", locator()); |
572 | 0 | case XML_ERROR_ENTITY_DECLARED_IN_PE: |
573 | 0 | throw SAXParseException("Entity declared in parameter entity", locator()); |
574 | 0 | case XML_ERROR_FEATURE_REQUIRES_XML_DTD: |
575 | 0 | throw SAXParseException("Requested feature requires XML_DTD support in Expat", locator()); |
576 | 0 | case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: |
577 | 0 | throw SAXParseException("Cannot change setting once parsing has begun", locator()); |
578 | 149 | case XML_ERROR_UNBOUND_PREFIX: |
579 | 149 | throw SAXParseException("Unbound prefix", locator()); |
580 | 9 | case XML_ERROR_UNDECLARING_PREFIX: |
581 | 9 | throw SAXParseException("Must not undeclare prefix", locator()); |
582 | 16 | case XML_ERROR_INCOMPLETE_PE: |
583 | 16 | throw SAXParseException("Incomplete markup in parameter entity", locator()); |
584 | 470 | case XML_ERROR_XML_DECL: |
585 | 470 | throw SAXParseException("XML declaration not well-formed", locator()); |
586 | 0 | case XML_ERROR_TEXT_DECL: |
587 | 0 | throw SAXParseException("Text declaration not well-formed", locator()); |
588 | 304 | case XML_ERROR_PUBLICID: |
589 | 304 | throw SAXParseException("Illegal character(s) in public identifier", locator()); |
590 | 0 | case XML_ERROR_SUSPENDED: |
591 | 0 | throw SAXParseException("Parser suspended", locator()); |
592 | 0 | case XML_ERROR_NOT_SUSPENDED: |
593 | 0 | throw SAXParseException("Parser not suspended", locator()); |
594 | 0 | case XML_ERROR_ABORTED: |
595 | 0 | throw SAXParseException("Parsing aborted", locator()); |
596 | 0 | case XML_ERROR_FINISHED: |
597 | 0 | throw SAXParseException("Parsing finished", locator()); |
598 | 0 | case XML_ERROR_SUSPEND_PE: |
599 | 0 | throw SAXParseException("Cannot suspend in external parameter entity", locator()); |
600 | 0 | #if XML_MAJOR_VERSION >= 2 |
601 | 27 | case XML_ERROR_RESERVED_PREFIX_XML: |
602 | 27 | throw SAXParseException("Reserved prefix 'xml' must not be undeclared or bound to another namespace name", locator()); |
603 | 5 | case XML_ERROR_RESERVED_PREFIX_XMLNS: |
604 | 5 | throw SAXParseException("Reserved prefix 'xmlns' must not be declared or undeclared", locator()); |
605 | 6 | case XML_ERROR_RESERVED_NAMESPACE_URI: |
606 | 6 | throw SAXParseException("Prefix must not be bound to one of the reserved namespace names", locator()); |
607 | 0 | #if XML_MAJOR_VERSION > 2 || (XML_MINOR_VERSION >= 2 && XML_MICRO_VERSION >= 1) |
608 | 0 | case XML_ERROR_INVALID_ARGUMENT: |
609 | 0 | throw SAXParseException("Invalid argument", locator()); |
610 | 0 | #endif |
611 | 0 | #if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 3 |
612 | 0 | case XML_ERROR_NO_BUFFER: |
613 | 0 | throw SAXParseException("Internal error: a successful prior call to function XML_GetBuffer is required", locator()); |
614 | 0 | #endif |
615 | 0 | #if XML_MAJOR_VERSION > 2 || XML_MINOR_VERSION >= 4 |
616 | 96 | case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: |
617 | 96 | throw SAXParseException("Limit on input amplification factor (from DTD and entities) breached", locator()); |
618 | 45.7k | #endif |
619 | 45.7k | #endif // XML_MAJOR_VERSION |
620 | 45.7k | } |
621 | 0 | throw XMLException("Unknown Expat error code"); |
622 | 45.7k | } |
623 | 45.7k | catch (SAXException& exc) |
624 | 45.7k | { |
625 | 45.7k | if (_pErrorHandler) _pErrorHandler->error(exc); |
626 | 45.7k | throw; |
627 | 45.7k | } |
628 | 45.7k | catch (Poco::Exception& exc) |
629 | 45.7k | { |
630 | 0 | if (_pErrorHandler) _pErrorHandler->fatalError(SAXParseException("Fatal error", locator(), exc)); |
631 | 0 | throw; |
632 | 0 | } |
633 | 45.7k | } |
634 | | |
635 | | |
636 | | void ParserEngine::pushContext(XML_Parser parser, InputSource* pInputSource) |
637 | 130k | { |
638 | 130k | ContextLocator* pLocator = new ContextLocator(parser, pInputSource->getPublicId(), pInputSource->getSystemId()); |
639 | 130k | _context.push_back(pLocator); |
640 | 130k | } |
641 | | |
642 | | |
643 | | void ParserEngine::popContext() |
644 | 78.8k | { |
645 | 78.8k | poco_assert (!_context.empty()); |
646 | 78.8k | delete _context.back(); |
647 | 78.8k | _context.pop_back(); |
648 | 78.8k | } |
649 | | |
650 | | |
651 | | void ParserEngine::resetContext() |
652 | 104k | { |
653 | 104k | for (auto p: _context) |
654 | 51.8k | { |
655 | 51.8k | delete p; |
656 | 51.8k | } |
657 | 104k | _context.clear(); |
658 | 104k | } |
659 | | |
660 | | |
661 | | void ParserEngine::handleStartElement(void* userData, const XML_Char* name, const XML_Char** atts) |
662 | 3.41M | { |
663 | 3.41M | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
664 | | |
665 | 3.41M | if (pThis->_pContentHandler) |
666 | 3.41M | { |
667 | 3.41M | try |
668 | 3.41M | { |
669 | 3.41M | pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler); |
670 | 3.41M | } |
671 | 3.41M | catch (XMLException& exc) |
672 | 3.41M | { |
673 | 842 | throw SAXParseException(exc.message(), pThis->locator()); |
674 | 842 | } |
675 | 3.41M | } |
676 | 3.41M | } |
677 | | |
678 | | |
679 | | void ParserEngine::handleEndElement(void* userData, const XML_Char* name) |
680 | 1.87M | { |
681 | 1.87M | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
682 | | |
683 | 1.87M | if (pThis->_pContentHandler) |
684 | 1.87M | { |
685 | 1.87M | try |
686 | 1.87M | { |
687 | 1.87M | pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler); |
688 | 1.87M | } |
689 | 1.87M | catch (XMLException& exc) |
690 | 1.87M | { |
691 | 0 | throw SAXParseException(exc.message(), pThis->locator()); |
692 | 0 | } |
693 | 1.87M | } |
694 | 1.87M | } |
695 | | |
696 | | |
697 | | void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int len) |
698 | 13.1M | { |
699 | 13.1M | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
700 | | |
701 | 13.1M | if (pThis->_pContentHandler) |
702 | 13.1M | pThis->_pContentHandler->characters(s, 0, len); |
703 | 13.1M | } |
704 | | |
705 | | |
706 | | void ParserEngine::handleProcessingInstruction(void* userData, const XML_Char* target, const XML_Char* data) |
707 | 297k | { |
708 | 297k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
709 | | |
710 | 297k | if (pThis->_pContentHandler) |
711 | 297k | pThis->_pContentHandler->processingInstruction(target, data); |
712 | 297k | } |
713 | | |
714 | | |
715 | | void ParserEngine::handleDefault(void* userData, const XML_Char* s, int len) |
716 | 12.3M | { |
717 | 12.3M | } |
718 | | |
719 | | |
720 | | void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) |
721 | 1.81k | { |
722 | 1.81k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
723 | | |
724 | 1.81k | XMLString pubId; |
725 | 1.81k | if (publicId) pubId.assign(publicId); |
726 | 1.81k | if (pThis->_pDTDHandler) |
727 | 1.81k | pThis->_pDTDHandler->unparsedEntityDecl(entityName, publicId ? &pubId : nullptr, systemId, notationName); |
728 | 1.81k | } |
729 | | |
730 | | |
731 | | void ParserEngine::handleNotationDecl(void* userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
732 | 59.5k | { |
733 | 59.5k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
734 | | |
735 | 59.5k | XMLString pubId; |
736 | 59.5k | if (publicId) pubId.assign(publicId); |
737 | 59.5k | XMLString sysId; |
738 | 59.5k | if (systemId) sysId.assign(systemId); |
739 | 59.5k | if (pThis->_pDTDHandler) |
740 | 59.5k | pThis->_pDTDHandler->notationDecl(notationName, publicId ? &pubId : nullptr, systemId ? &sysId : nullptr); |
741 | 59.5k | } |
742 | | |
743 | | |
744 | | int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* context, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
745 | 86.1k | { |
746 | 86.1k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(XML_GetUserData(parser)); |
747 | | |
748 | 86.1k | if (!context && !pThis->_externalParameterEntities) return XML_STATUS_ERROR; |
749 | 86.1k | if (context && !pThis->_externalGeneralEntities) return XML_STATUS_ERROR; |
750 | | |
751 | 86.0k | InputSource* pInputSource = nullptr; |
752 | 86.0k | EntityResolver* pEntityResolver = nullptr; |
753 | 86.0k | EntityResolverImpl defaultResolver; |
754 | | |
755 | 86.0k | XMLString sysId(systemId); |
756 | 86.0k | XMLString pubId; |
757 | 86.0k | if (publicId) pubId.assign(publicId); |
758 | | |
759 | 86.0k | URI uri(fromXMLString(pThis->_context.back()->getSystemId())); |
760 | 86.0k | uri.resolve(fromXMLString(sysId)); |
761 | | |
762 | 86.0k | if (pThis->_pEntityResolver) |
763 | 61.4k | { |
764 | 61.4k | pEntityResolver = pThis->_pEntityResolver; |
765 | 61.4k | pInputSource = pEntityResolver->resolveEntity(publicId ? &pubId : nullptr, toXMLString(uri.toString())); |
766 | 61.4k | } |
767 | 86.0k | if (!pInputSource && pThis->_externalGeneralEntities) |
768 | 83.4k | { |
769 | 83.4k | pEntityResolver = &defaultResolver; |
770 | 83.4k | pInputSource = pEntityResolver->resolveEntity(publicId ? &pubId : nullptr, toXMLString(uri.toString())); |
771 | 83.4k | } |
772 | | |
773 | 86.0k | if (pInputSource) |
774 | 78.6k | { |
775 | 78.6k | XML_Parser extParser = XML_ExternalEntityParserCreate(pThis->_parser, context, nullptr); |
776 | 78.6k | if (!extParser) throw XMLException("Cannot create external entity parser"); |
777 | | |
778 | 78.6k | try |
779 | 78.6k | { |
780 | 78.6k | pThis->parseExternal(extParser, pInputSource); |
781 | 78.6k | } |
782 | 78.6k | catch (XMLException&) |
783 | 78.6k | { |
784 | 0 | pEntityResolver->releaseInputSource(pInputSource); |
785 | 0 | XML_ParserFree(extParser); |
786 | 0 | throw; |
787 | 0 | } |
788 | 78.6k | pEntityResolver->releaseInputSource(pInputSource); |
789 | 78.6k | XML_ParserFree(extParser); |
790 | 78.6k | return XML_STATUS_OK; |
791 | 78.6k | } |
792 | 7.45k | else return XML_STATUS_ERROR; |
793 | 86.0k | } |
794 | | |
795 | | |
796 | | int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info) |
797 | 1.49k | { |
798 | 1.49k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(encodingHandlerData); |
799 | | |
800 | 1.49k | XMLString encoding(name); |
801 | 1.49k | TextEncoding* knownEncoding = nullptr; |
802 | | |
803 | 1.49k | EncodingMap::const_iterator it = pThis->_encodings.find(encoding); |
804 | 1.49k | if (it != pThis->_encodings.end()) |
805 | 0 | knownEncoding = it->second; |
806 | 1.49k | else |
807 | 1.49k | knownEncoding = Poco::TextEncoding::find(fromXMLString(encoding)); |
808 | | |
809 | 1.49k | if (knownEncoding) |
810 | 1.32k | { |
811 | 1.32k | const TextEncoding::CharacterMap& map = knownEncoding->characterMap(); |
812 | 340k | for (int i = 0; i < 256; ++i) |
813 | 339k | info->map[i] = map[i]; |
814 | | |
815 | 1.32k | info->data = knownEncoding; |
816 | 1.32k | info->convert = &ParserEngine::convert; |
817 | 1.32k | info->release = nullptr; |
818 | 1.32k | return XML_STATUS_OK; |
819 | 1.32k | } |
820 | 168 | else return XML_STATUS_ERROR; |
821 | 1.49k | } |
822 | | |
823 | | |
824 | | void ParserEngine::handleComment(void* userData, const XML_Char* data) |
825 | 882k | { |
826 | 882k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
827 | | |
828 | | #if defined(XML_UNICODE_WCHAR_T) |
829 | | if (pThis->_pLexicalHandler) |
830 | | pThis->_pLexicalHandler->comment(data, 0, (int) std::wcslen(data)); |
831 | | #else |
832 | 882k | if (pThis->_pLexicalHandler) |
833 | 441k | pThis->_pLexicalHandler->comment(data, 0, (int) std::strlen(data)); |
834 | 882k | #endif |
835 | 882k | } |
836 | | |
837 | | |
838 | | void ParserEngine::handleStartCdataSection(void* userData) |
839 | 1.10M | { |
840 | 1.10M | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
841 | | |
842 | 1.10M | if (pThis->_pLexicalHandler) |
843 | 553k | pThis->_pLexicalHandler->startCDATA(); |
844 | 1.10M | } |
845 | | |
846 | | |
847 | | void ParserEngine::handleEndCdataSection(void* userData) |
848 | 1.10M | { |
849 | 1.10M | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
850 | | |
851 | 1.10M | if (pThis->_pLexicalHandler) |
852 | 552k | pThis->_pLexicalHandler->endCDATA(); |
853 | 1.10M | } |
854 | | |
855 | | |
856 | | void ParserEngine::handleStartNamespaceDecl(void* userData, const XML_Char* prefix, const XML_Char* uri) |
857 | 86.4k | { |
858 | 86.4k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
859 | | |
860 | 86.4k | if (pThis->_pContentHandler) |
861 | 86.4k | pThis->_pContentHandler->startPrefixMapping((prefix ? XMLString(prefix) : EMPTY_STRING), (uri ? XMLString(uri) : EMPTY_STRING)); |
862 | 86.4k | } |
863 | | |
864 | | |
865 | | void ParserEngine::handleEndNamespaceDecl(void* userData, const XML_Char* prefix) |
866 | 48.1k | { |
867 | 48.1k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
868 | | |
869 | 48.1k | if (pThis->_pContentHandler) |
870 | 48.1k | pThis->_pContentHandler->endPrefixMapping(prefix ? XMLString(prefix) : EMPTY_STRING); |
871 | 48.1k | } |
872 | | |
873 | | |
874 | | void ParserEngine::handleStartDoctypeDecl(void* userData, const XML_Char* doctypeName, const XML_Char *systemId, const XML_Char* publicId, int hasInternalSubset) |
875 | 21.1k | { |
876 | 21.1k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
877 | | |
878 | 21.1k | if (pThis->_pLexicalHandler) |
879 | 10.5k | { |
880 | 10.5k | XMLString sysId = systemId ? XMLString(systemId) : EMPTY_STRING; |
881 | 10.5k | XMLString pubId = publicId ? XMLString(publicId) : EMPTY_STRING; |
882 | 10.5k | pThis->_pLexicalHandler->startDTD(doctypeName, pubId, sysId); |
883 | 10.5k | } |
884 | 21.1k | } |
885 | | |
886 | | |
887 | | void ParserEngine::handleEndDoctypeDecl(void* userData) |
888 | 8.80k | { |
889 | 8.80k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
890 | | |
891 | 8.80k | if (pThis->_pLexicalHandler) |
892 | 4.40k | pThis->_pLexicalHandler->endDTD(); |
893 | 8.80k | } |
894 | | |
895 | | |
896 | | void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, |
897 | | const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) |
898 | 21.0k | { |
899 | 21.0k | if (value) |
900 | 13.1k | handleInternalParsedEntityDecl(userData, entityName, value, valueLength); |
901 | 7.92k | else |
902 | 7.92k | handleExternalParsedEntityDecl(userData, entityName, base, systemId, publicId); |
903 | 21.0k | } |
904 | | |
905 | | |
906 | | void ParserEngine::handleExternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
907 | 7.92k | { |
908 | 7.92k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
909 | | |
910 | 7.92k | XMLString pubId; |
911 | 7.92k | if (publicId) pubId.assign(publicId); |
912 | 7.92k | if (pThis->_pDeclHandler) |
913 | 0 | pThis->_pDeclHandler->externalEntityDecl(entityName, publicId ? &pubId : nullptr, systemId); |
914 | 7.92k | } |
915 | | |
916 | | |
917 | | void ParserEngine::handleInternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* replacementText, int replacementTextLength) |
918 | 13.1k | { |
919 | 13.1k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
920 | | |
921 | 13.1k | XMLString replText(replacementText, replacementTextLength); |
922 | 13.1k | if (pThis->_pDeclHandler) |
923 | 0 | pThis->_pDeclHandler->internalEntityDecl(entityName, replText); |
924 | 13.1k | } |
925 | | |
926 | | |
927 | | void ParserEngine::handleSkippedEntity(void* userData, const XML_Char* entityName, int isParameterEntity) |
928 | 128k | { |
929 | 128k | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
930 | | |
931 | 128k | if (pThis->_pContentHandler) |
932 | 128k | pThis->_pContentHandler->skippedEntity(entityName); |
933 | 128k | } |
934 | | |
935 | | |
936 | | int ParserEngine::convert(void* data, const char* s) |
937 | 16.6k | { |
938 | 16.6k | TextEncoding* pEncoding = reinterpret_cast<TextEncoding*>(data); |
939 | 16.6k | return pEncoding->convert((const unsigned char*) s); |
940 | 16.6k | } |
941 | | |
942 | | |
943 | | } } // namespace Poco::XML |