/src/libreoffice/sax/source/fastparser/fastparser.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sax/fastparser.hxx> |
21 | | #include <sax/fastattribs.hxx> |
22 | | #include <utility> |
23 | | #include <xml2utf.hxx> |
24 | | |
25 | | #include <com/sun/star/io/XSeekable.hpp> |
26 | | #include <com/sun/star/lang/DisposedException.hpp> |
27 | | #include <com/sun/star/lang/IllegalArgumentException.hpp> |
28 | | #include <com/sun/star/uno/XComponentContext.hpp> |
29 | | #include <com/sun/star/container/XMap.hpp> |
30 | | #include <com/sun/star/xml/sax/FastToken.hpp> |
31 | | #include <com/sun/star/xml/sax/SAXParseException.hpp> |
32 | | #include <com/sun/star/xml/sax/XFastContextHandler.hpp> |
33 | | #include <cppuhelper/implbase.hxx> |
34 | | #include <cppuhelper/supportsservice.hxx> |
35 | | #include <cppuhelper/exc_hlp.hxx> |
36 | | #include <osl/conditn.hxx> |
37 | | #include <rtl/ref.hxx> |
38 | | #include <sal/log.hxx> |
39 | | #include <salhelper/thread.hxx> |
40 | | #include <comphelper/diagnose_ex.hxx> |
41 | | #include <comphelper/string.hxx> |
42 | | #include <o3tl/string_view.hxx> |
43 | | |
44 | | #include <queue> |
45 | | #include <memory> |
46 | | #include <mutex> |
47 | | #include <optional> |
48 | | #include <stack> |
49 | | #include <string_view> |
50 | | #include <unordered_map> |
51 | | #include <vector> |
52 | | #include <cassert> |
53 | | #include <cstring> |
54 | | #include <libxml/parser.h> |
55 | | |
56 | | // Inverse of libxml's BAD_CAST. |
57 | 132M | #define XML_CAST( str ) reinterpret_cast< const char* >( str ) |
58 | | |
59 | | using namespace ::osl; |
60 | | using namespace ::cppu; |
61 | | using namespace ::com::sun::star::uno; |
62 | | using namespace ::com::sun::star::lang; |
63 | | using namespace ::com::sun::star::xml::sax; |
64 | | using namespace ::com::sun::star::io; |
65 | | using namespace com::sun::star; |
66 | | using namespace sax_fastparser; |
67 | | |
68 | | static void NormalizeURI( OUString& rName ); |
69 | | |
70 | | namespace { |
71 | | |
72 | | struct Event; |
73 | | class FastLocatorImpl; |
74 | | struct NamespaceDefine; |
75 | | struct Entity; |
76 | | |
77 | | typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap; |
78 | | |
79 | | struct EventList |
80 | | { |
81 | | std::vector<Event> maEvents; |
82 | | bool mbIsAttributesEmpty; |
83 | | }; |
84 | | |
85 | | enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION }; |
86 | | |
87 | | struct Event |
88 | | { |
89 | | CallbackType maType; |
90 | | sal_Int32 mnElementToken; |
91 | | OUString msNamespace; |
92 | | OUString msElementName; |
93 | | rtl::Reference< FastAttributeList > mxAttributes; |
94 | | rtl::Reference< FastAttributeList > mxDeclAttributes; |
95 | | OUString msChars; |
96 | | }; |
97 | | |
98 | | struct NameWithToken |
99 | | { |
100 | | OUString msName; |
101 | | sal_Int32 mnToken; |
102 | | |
103 | | NameWithToken(OUString sName, sal_Int32 nToken) : |
104 | 32.6M | msName(std::move(sName)), mnToken(nToken) {} |
105 | | }; |
106 | | |
107 | | struct SaxContext |
108 | | { |
109 | | Reference< XFastContextHandler > mxContext; |
110 | | sal_Int32 mnElementToken; |
111 | | std::optional<OUString> moNamespace; |
112 | | std::optional<OUString> moElementName; |
113 | | |
114 | | SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ): |
115 | 32.6M | mnElementToken(nElementToken) |
116 | 32.6M | { |
117 | 32.6M | if (nElementToken == FastToken::DONTKNOW) |
118 | 16.1M | { |
119 | 16.1M | moNamespace = aNamespace; |
120 | 16.1M | moElementName = aElementName; |
121 | 16.1M | } |
122 | 32.6M | } |
123 | | }; |
124 | | |
125 | | struct ParserData |
126 | | { |
127 | | css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler; |
128 | | rtl::Reference<FastTokenHandlerBase> mxTokenHandler; |
129 | | css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler; |
130 | | css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler; |
131 | | |
132 | | ParserData(); |
133 | | }; |
134 | | |
135 | | struct NamespaceDefine |
136 | | { |
137 | | OString maPrefix; |
138 | | sal_Int32 mnToken; |
139 | | OUString maNamespaceURL; |
140 | | |
141 | | NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL ) |
142 | 1.29M | : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {} |
143 | 12.7M | NamespaceDefine() : mnToken(-1) {} |
144 | | }; |
145 | | |
146 | | // Entity binds all information needed for a single file | single call of parseStream |
147 | | struct Entity : public ParserData |
148 | | { |
149 | | // Amount of work producer sends to consumer in one iteration: |
150 | | static const size_t mnEventListSize = 1000; |
151 | | |
152 | | // unique for each Entity instance: |
153 | | |
154 | | // Number of valid events in mxProducedEvents: |
155 | | size_t mnProducedEventsSize; |
156 | | std::optional<EventList> mxProducedEvents; |
157 | | std::queue<EventList> maPendingEvents; |
158 | | std::queue<EventList> maUsedEvents; |
159 | | std::mutex maEventProtector; |
160 | | |
161 | | static const size_t mnEventLowWater = 4; |
162 | | static const size_t mnEventHighWater = 8; |
163 | | osl::Condition maConsumeResume; |
164 | | osl::Condition maProduceResume; |
165 | | // Event we use to store data if threading is disabled: |
166 | | Event maSharedEvent; |
167 | | |
168 | | // copied in copy constructor: |
169 | | |
170 | | // Allow to disable threading for small documents: |
171 | | bool mbEnableThreads; |
172 | | css::xml::sax::InputSource maStructSource; |
173 | | xmlParserCtxtPtr mpParser; |
174 | | ::sax_expatwrap::XMLFile2UTFConverter maConverter; |
175 | | |
176 | | // Exceptions cannot be thrown through the C-XmlParser (possible |
177 | | // resource leaks), therefore any exception thrown by a UNO callback |
178 | | // must be saved somewhere until the C-XmlParser is stopped. |
179 | | css::uno::Any maSavedException; |
180 | | std::mutex maSavedExceptionMutex; |
181 | | void saveException( const Any & e ); |
182 | | // Thread-safe check if maSavedException has value |
183 | | bool hasException(); |
184 | | void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, |
185 | | bool mbDuringParse ); |
186 | | |
187 | | std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack; |
188 | | /* Context for main thread consuming events. |
189 | | * startElement() stores the data, which characters() and endElement() uses |
190 | | */ |
191 | | std::stack< SaxContext, std::vector<SaxContext> > maContextStack; |
192 | | // Determines which elements of maNamespaceDefines are valid in current context |
193 | | std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount; |
194 | | std::vector< NamespaceDefine > maNamespaceDefines; |
195 | | |
196 | | explicit Entity( const ParserData& rData ); |
197 | | Entity( const Entity& rEntity ) = delete; |
198 | | Entity& operator=( const Entity& rEntity ) = delete; |
199 | | void startElement( Event const *pEvent ); |
200 | | void characters( const OUString& sChars ); |
201 | | void endElement(); |
202 | | void processingInstruction( const OUString& rTarget, const OUString& rData ); |
203 | | void transferUsedEvents(); |
204 | | EventList& getEventList(); |
205 | | Event& getEvent( CallbackType aType ); |
206 | | }; |
207 | | |
208 | | // Stuff for custom entity names |
209 | | struct ReplacementPair |
210 | | { |
211 | | OUString name; |
212 | | OUString replacement; |
213 | | }; |
214 | | inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs) |
215 | 0 | { |
216 | 0 | return lhs.name < rhs.name; |
217 | 0 | } |
218 | | inline bool operator<(const ReplacementPair& lhs, const char* rhs) |
219 | 0 | { |
220 | 0 | return lhs.name.compareToAscii(rhs) < 0; |
221 | 0 | } |
222 | | |
223 | | } // namespace |
224 | | |
225 | | namespace sax_fastparser { |
226 | | |
227 | | class FastSaxParserImpl |
228 | | { |
229 | | public: |
230 | | explicit FastSaxParserImpl(); |
231 | | ~FastSaxParserImpl(); |
232 | | |
233 | | private: |
234 | | std::vector<ReplacementPair> m_Replacements; |
235 | | std::vector<xmlEntityPtr> m_TemporalEntities; |
236 | | |
237 | | public: |
238 | | // XFastParser |
239 | | /// @throws css::xml::sax::SAXException |
240 | | /// @throws css::io::IOException |
241 | | /// @throws css::uno::RuntimeException |
242 | | void parseStream( const css::xml::sax::InputSource& aInputSource ); |
243 | | /// @throws css::uno::RuntimeException |
244 | | void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler ); |
245 | | /// @throws css::uno::RuntimeException |
246 | | void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler ); |
247 | | /// @throws css::lang::IllegalArgumentException |
248 | | /// @throws css::uno::RuntimeException |
249 | | void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ); |
250 | | /// @throws css::lang::IllegalArgumentException |
251 | | /// @throws css::uno::RuntimeException |
252 | | OUString const & getNamespaceURL( std::u16string_view rPrefix ); |
253 | | /// @throws css::uno::RuntimeException |
254 | | void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler ); |
255 | | /// @throws css::uno::RuntimeException |
256 | | void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler); |
257 | | // Fake DTD file |
258 | | void setCustomEntityNames( |
259 | | const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements); |
260 | | |
261 | | // called by the C callbacks of the expat parser |
262 | | void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, |
263 | | int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes ); |
264 | | void callbackEndElement(); |
265 | | void callbackCharacters( const xmlChar* s, int nLen ); |
266 | | void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data ); |
267 | | xmlEntityPtr callbackGetEntity( const xmlChar *name ); |
268 | | |
269 | | void pushEntity(const ParserData&, xml::sax::InputSource const&); |
270 | | void popEntity(); |
271 | 131M | Entity& getEntity() { return *mpTop; } |
272 | | void parse(); |
273 | | void produce( bool bForceFlush = false ); |
274 | | bool m_bIgnoreMissingNSDecl; |
275 | | bool m_bDisableThreadedParser; |
276 | | css::uno::Reference<css::container::XMap> mxMap; /// _ prefix string mapper for translation |
277 | | |
278 | | private: |
279 | | bool consume(EventList&); |
280 | | void deleteUsedEvents(); |
281 | | void sendPendingCharacters(); |
282 | | void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes); |
283 | | |
284 | | sal_Int32 GetToken( const xmlChar* pName ); |
285 | | /// @throws css::xml::sax::SAXException |
286 | | sal_Int32 GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName ); |
287 | | /// @throws css::xml::sax::SAXException |
288 | | OUString const & GetNamespaceURL( std::string_view rPrefix ); |
289 | | sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL ); |
290 | | sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName ); |
291 | | void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL ); |
292 | | |
293 | | private: |
294 | | std::mutex maMutex; ///< Protecting whole parseStream() execution |
295 | | ::rtl::Reference< FastLocatorImpl > mxDocumentLocator; |
296 | | NamespaceMap maNamespaceMap; |
297 | | |
298 | | ParserData maData; /// Cached parser configuration for next call of parseStream(). |
299 | | |
300 | | Entity *mpTop; /// std::stack::top() is amazingly slow => cache this. |
301 | | std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). |
302 | | std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent. |
303 | | }; |
304 | | |
305 | | } // namespace sax_fastparser |
306 | | |
307 | | namespace { |
308 | | |
309 | | class ParserThread: public salhelper::Thread |
310 | | { |
311 | | FastSaxParserImpl *mpParser; |
312 | | public: |
313 | 0 | explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {} |
314 | | private: |
315 | | virtual void execute() override |
316 | 0 | { |
317 | 0 | try |
318 | 0 | { |
319 | 0 | mpParser->parse(); |
320 | 0 | } |
321 | 0 | catch (...) |
322 | 0 | { |
323 | 0 | Entity &rEntity = mpParser->getEntity(); |
324 | 0 | rEntity.getEvent( CallbackType::EXCEPTION ); |
325 | 0 | mpParser->produce( true ); |
326 | 0 | } |
327 | 0 | } |
328 | | }; |
329 | | |
330 | | extern "C" { |
331 | | |
332 | | static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, |
333 | | int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes) |
334 | 32.6M | { |
335 | 32.6M | FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); |
336 | 32.6M | pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes ); |
337 | 32.6M | } |
338 | | |
339 | | static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/) |
340 | 15.9M | { |
341 | 15.9M | FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); |
342 | 15.9M | pFastParser->callbackEndElement(); |
343 | 15.9M | } |
344 | | |
345 | | static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen ) |
346 | 11.9M | { |
347 | 11.9M | FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); |
348 | 11.9M | pFastParser->callbackCharacters( s, nLen ); |
349 | 11.9M | } |
350 | | |
351 | | static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data ) |
352 | 53.5k | { |
353 | 53.5k | FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); |
354 | 53.5k | pFastParser->callbackProcessingInstruction( target, data ); |
355 | 53.5k | } |
356 | | |
357 | | static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name) |
358 | 55.1k | { |
359 | 55.1k | FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); |
360 | 55.1k | return pFastParser->callbackGetEntity( name ); |
361 | 55.1k | } |
362 | | |
363 | | } |
364 | | |
365 | | class FastLocatorImpl : public WeakImplHelper< XLocator > |
366 | | { |
367 | | public: |
368 | 322k | explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {} |
369 | | |
370 | 322k | void dispose() { mpParser = nullptr; } |
371 | | /// @throws RuntimeException |
372 | 1.09M | void checkDispose() const { if( !mpParser ) throw DisposedException(); } |
373 | | |
374 | | //XLocator |
375 | | virtual sal_Int32 SAL_CALL getColumnNumber() override; |
376 | | virtual sal_Int32 SAL_CALL getLineNumber() override; |
377 | | virtual OUString SAL_CALL getPublicId() override; |
378 | | virtual OUString SAL_CALL getSystemId() override; |
379 | | |
380 | | private: |
381 | | FastSaxParserImpl *mpParser; |
382 | | }; |
383 | | |
384 | | sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber() |
385 | 228k | { |
386 | 228k | checkDispose(); |
387 | 228k | return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser ); |
388 | 228k | } |
389 | | |
390 | | sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber() |
391 | 318k | { |
392 | 318k | checkDispose(); |
393 | 318k | return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser ); |
394 | 318k | } |
395 | | |
396 | | OUString SAL_CALL FastLocatorImpl::getPublicId() |
397 | 228k | { |
398 | 228k | checkDispose(); |
399 | 228k | return mpParser->getEntity().maStructSource.sPublicId; |
400 | 228k | } |
401 | | |
402 | | OUString SAL_CALL FastLocatorImpl::getSystemId() |
403 | 318k | { |
404 | 318k | checkDispose(); |
405 | 318k | return mpParser->getEntity().maStructSource.sSystemId; |
406 | 318k | } |
407 | | |
408 | | ParserData::ParserData() |
409 | 322k | {} |
410 | | |
411 | | Entity::Entity(const ParserData& rData) |
412 | 219k | : ParserData(rData) |
413 | 219k | , mnProducedEventsSize(0) |
414 | 219k | , mbEnableThreads(false) |
415 | 219k | , mpParser(nullptr) |
416 | 219k | { |
417 | 219k | } |
418 | | |
419 | | void Entity::startElement( Event const *pEvent ) |
420 | 32.6M | { |
421 | 32.6M | const sal_Int32& nElementToken = pEvent->mnElementToken; |
422 | 32.6M | const OUString& aNamespace = pEvent->msNamespace; |
423 | 32.6M | const OUString& aElementName = pEvent->msElementName; |
424 | | |
425 | | // Use un-wrapped pointers to avoid significant acquire/release overhead |
426 | 32.6M | XFastContextHandler *pParentContext = nullptr; |
427 | 32.6M | if( !maContextStack.empty() ) |
428 | 32.4M | { |
429 | 32.4M | pParentContext = maContextStack.top().mxContext.get(); |
430 | 32.4M | if( !pParentContext ) |
431 | 135k | { |
432 | 135k | maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) ); |
433 | 135k | return; |
434 | 135k | } |
435 | 32.4M | } |
436 | | |
437 | 32.5M | maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) ); |
438 | | |
439 | 32.5M | try |
440 | 32.5M | { |
441 | 32.5M | const Reference< XFastAttributeList > xAttr( pEvent->mxAttributes ); |
442 | 32.5M | Reference< XFastContextHandler > xContext; |
443 | | |
444 | 32.5M | if ( mxNamespaceHandler.is() ) |
445 | 20.8M | { |
446 | 20.8M | const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes(); |
447 | 20.8M | for (const auto& rNSDeclAttrib : NSDeclAttribs) |
448 | 1.01M | { |
449 | 1.01M | mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value ); |
450 | 1.01M | } |
451 | 20.8M | } |
452 | | |
453 | 32.5M | if( nElementToken == FastToken::DONTKNOW ) |
454 | 16.1M | { |
455 | 16.1M | if( pParentContext ) |
456 | 16.0M | xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); |
457 | 37.9k | else if( mxDocumentHandler.is() ) |
458 | 37.9k | xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); |
459 | | |
460 | 16.1M | if( xContext.is() ) |
461 | 16.1M | { |
462 | 16.1M | xContext->startUnknownElement( aNamespace, aElementName, xAttr ); |
463 | 16.1M | } |
464 | 16.1M | } |
465 | 16.4M | else |
466 | 16.4M | { |
467 | 16.4M | if( pParentContext ) |
468 | 16.2M | xContext = pParentContext->createFastChildContext( nElementToken, xAttr ); |
469 | 159k | else if( mxDocumentHandler.is() ) |
470 | 162k | xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); |
471 | | |
472 | 16.4M | if( xContext.is() ) |
473 | 15.1M | xContext->startFastElement( nElementToken, xAttr ); |
474 | 16.4M | } |
475 | | // swap the reference we own in to avoid referencing thrash. |
476 | 32.5M | maContextStack.top().mxContext = std::move( xContext ); |
477 | 32.5M | } |
478 | 32.5M | catch (...) |
479 | 32.5M | { |
480 | 6.42k | saveException( ::cppu::getCaughtException() ); |
481 | 6.42k | } |
482 | 32.5M | } |
483 | | |
484 | | void Entity::characters( const OUString& sChars ) |
485 | 10.6M | { |
486 | 10.6M | if (maContextStack.empty()) |
487 | 368 | { |
488 | | // Malformed XML stream !? |
489 | 368 | return; |
490 | 368 | } |
491 | | |
492 | 10.6M | XFastContextHandler * pContext( maContextStack.top().mxContext.get() ); |
493 | 10.6M | if( pContext ) try |
494 | 10.5M | { |
495 | 10.5M | pContext->characters( sChars ); |
496 | 10.5M | } |
497 | 10.5M | catch (...) |
498 | 10.5M | { |
499 | 0 | saveException( ::cppu::getCaughtException() ); |
500 | 0 | } |
501 | 10.6M | } |
502 | | |
503 | | void Entity::endElement() |
504 | 15.9M | { |
505 | 15.9M | if (maContextStack.empty()) |
506 | 2.11k | { |
507 | | // Malformed XML stream !? |
508 | 2.11k | return; |
509 | 2.11k | } |
510 | | |
511 | 15.9M | const SaxContext& aContext = maContextStack.top(); |
512 | 15.9M | XFastContextHandler* pContext( aContext.mxContext.get() ); |
513 | 15.9M | if( pContext ) |
514 | 14.5M | try |
515 | 14.5M | { |
516 | 14.5M | sal_Int32 nElementToken = aContext.mnElementToken; |
517 | 14.5M | if( nElementToken != FastToken::DONTKNOW ) |
518 | 13.5M | pContext->endFastElement( nElementToken ); |
519 | 925k | else |
520 | 925k | pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName ); |
521 | 14.5M | } |
522 | 14.5M | catch (...) |
523 | 14.5M | { |
524 | 54.4k | saveException( ::cppu::getCaughtException() ); |
525 | 54.4k | } |
526 | 15.9M | maContextStack.pop(); |
527 | 15.9M | } |
528 | | |
529 | | void Entity::processingInstruction( const OUString& rTarget, const OUString& rData ) |
530 | 53.5k | { |
531 | 53.5k | if( mxDocumentHandler.is() ) try |
532 | 53.5k | { |
533 | 53.5k | mxDocumentHandler->processingInstruction( rTarget, rData ); |
534 | 53.5k | } |
535 | 53.5k | catch (...) |
536 | 53.5k | { |
537 | 0 | saveException( ::cppu::getCaughtException() ); |
538 | 0 | } |
539 | 53.5k | } |
540 | | |
541 | | void Entity::transferUsedEvents() |
542 | 0 | { |
543 | 0 | std::unique_lock aGuard(maEventProtector); |
544 | 0 | if (!maUsedEvents.empty()) |
545 | 0 | { |
546 | 0 | mxProducedEvents = std::move(maUsedEvents.front()); |
547 | 0 | maUsedEvents.pop(); |
548 | 0 | aGuard.unlock(); // unlock |
549 | 0 | mnProducedEventsSize = 0; |
550 | 0 | } |
551 | 0 | } |
552 | | |
553 | | EventList& Entity::getEventList() |
554 | 0 | { |
555 | 0 | if (!mxProducedEvents) |
556 | 0 | { |
557 | 0 | transferUsedEvents(); |
558 | 0 | if (!mxProducedEvents) |
559 | 0 | { |
560 | 0 | mxProducedEvents.emplace(); |
561 | 0 | mxProducedEvents->maEvents.resize(mnEventListSize); |
562 | 0 | mxProducedEvents->mbIsAttributesEmpty = false; |
563 | 0 | mnProducedEventsSize = 0; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | return *mxProducedEvents; |
567 | 0 | } |
568 | | |
569 | | Event& Entity::getEvent( CallbackType aType ) |
570 | 48.7M | { |
571 | 48.7M | if (!mbEnableThreads) |
572 | 48.7M | return maSharedEvent; |
573 | | |
574 | 18.4E | EventList& rEventList = getEventList(); |
575 | 18.4E | if (mnProducedEventsSize == rEventList.maEvents.size()) |
576 | 0 | { |
577 | 0 | SAL_WARN_IF(!maSavedException.hasValue(), "sax", |
578 | 0 | "Event vector should only exceed " << mnEventListSize << |
579 | 0 | " temporarily while an exception is pending"); |
580 | 0 | rEventList.maEvents.resize(mnProducedEventsSize + 1); |
581 | 0 | } |
582 | 18.4E | Event& rEvent = rEventList.maEvents[mnProducedEventsSize++]; |
583 | 18.4E | rEvent.maType = aType; |
584 | 18.4E | return rEvent; |
585 | 18.4E | } |
586 | | |
587 | | OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine ) |
588 | 90.4k | { |
589 | 90.4k | const char* pMessage; |
590 | 90.4k | const xmlError* error = xmlCtxtGetLastError( ctxt ); |
591 | 90.4k | if( error && error->message ) |
592 | 89.2k | pMessage = error->message; |
593 | 1.17k | else |
594 | 1.17k | pMessage = "unknown error"; |
595 | 90.4k | return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " + |
596 | 90.4k | OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US); |
597 | 90.4k | } |
598 | | |
599 | | // throw an exception, but avoid callback if |
600 | | // during a threaded produce |
601 | | void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, |
602 | | bool mbDuringParse ) |
603 | 90.4k | { |
604 | | // Error during parsing ! |
605 | 90.4k | Any savedException; |
606 | 90.4k | { |
607 | 90.4k | std::scoped_lock g(maSavedExceptionMutex); |
608 | 90.4k | if (maSavedException.hasValue()) |
609 | 4.12k | { |
610 | 4.12k | savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get()); |
611 | 4.12k | } |
612 | 90.4k | } |
613 | 90.4k | SAXParseException aExcept( |
614 | 90.4k | lclGetErrorMessage( mpParser, |
615 | 90.4k | xDocumentLocator->getSystemId(), |
616 | 90.4k | xDocumentLocator->getLineNumber() ), |
617 | 90.4k | Reference< XInterface >(), |
618 | 90.4k | savedException, |
619 | 90.4k | xDocumentLocator->getPublicId(), |
620 | 90.4k | xDocumentLocator->getSystemId(), |
621 | 90.4k | xDocumentLocator->getLineNumber(), |
622 | 90.4k | xDocumentLocator->getColumnNumber() |
623 | 90.4k | ); |
624 | | |
625 | | // error handler is set, it may throw the exception |
626 | 90.4k | if( !mbDuringParse || !mbEnableThreads ) |
627 | 90.4k | { |
628 | 90.4k | if (mxErrorHandler.is() ) |
629 | 0 | mxErrorHandler->fatalError( Any( aExcept ) ); |
630 | 90.4k | } |
631 | | |
632 | | // error handler has not thrown, but parsing must stop => throw ourselves |
633 | 90.4k | throw aExcept; |
634 | 90.4k | } |
635 | | |
636 | | // In the single threaded case we emit events via our C |
637 | | // callbacks, so any exception caught must be queued up until |
638 | | // we can safely re-throw it from our C++ parent of parse() |
639 | | |
640 | | // If multi-threaded, we need to push an EXCEPTION event, at |
641 | | // which point we transfer ownership of maSavedException to |
642 | | // the consuming thread. |
643 | | void Entity::saveException( const Any & e ) |
644 | 63.3k | { |
645 | | // fdo#81214 - allow the parser to run on after an exception, |
646 | | // unexpectedly some 'startElements' produce a UNO_QUERY_THROW |
647 | | // for XComponent; and yet expect to continue parsing. |
648 | 63.3k | SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e)); |
649 | 63.3k | std::scoped_lock g(maSavedExceptionMutex); |
650 | 63.3k | if (maSavedException.hasValue()) |
651 | 59.2k | { |
652 | 59.2k | SAL_INFO("sax.fastparser", "discarding exception, already have one"); |
653 | 59.2k | } |
654 | 4.12k | else |
655 | 4.12k | { |
656 | 4.12k | maSavedException = e; |
657 | 4.12k | } |
658 | 63.3k | } |
659 | | |
660 | | bool Entity::hasException() |
661 | 361k | { |
662 | 361k | std::scoped_lock g(maSavedExceptionMutex); |
663 | 361k | return maSavedException.hasValue(); |
664 | 361k | } |
665 | | |
666 | | } // namespace |
667 | | |
668 | | namespace sax_fastparser { |
669 | | |
670 | | FastSaxParserImpl::FastSaxParserImpl() : |
671 | 322k | m_bIgnoreMissingNSDecl(false), |
672 | 322k | m_bDisableThreadedParser(false), |
673 | 322k | mpTop(nullptr) |
674 | 322k | { |
675 | 322k | mxDocumentLocator.set( new FastLocatorImpl( this ) ); |
676 | 322k | } |
677 | | |
678 | | FastSaxParserImpl::~FastSaxParserImpl() |
679 | 322k | { |
680 | 322k | if( mxDocumentLocator.is() ) |
681 | 322k | mxDocumentLocator->dispose(); |
682 | 322k | for (auto& entity : m_TemporalEntities) |
683 | 0 | { |
684 | 0 | if (!entity) |
685 | 0 | continue; |
686 | 0 | xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity); |
687 | 0 | xmlUnlinkNode(pPtr); |
688 | 0 | xmlFreeNode(pPtr); |
689 | 0 | } |
690 | 322k | } |
691 | | |
692 | | void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL ) |
693 | 1.29M | { |
694 | 1.29M | Entity& rEntity = getEntity(); |
695 | 1.29M | assert(!rEntity.maNamespaceCount.empty()); // need a context! |
696 | | |
697 | 1.29M | sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++; |
698 | 1.29M | if( rEntity.maNamespaceDefines.size() <= nOffset ) |
699 | 198k | rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); |
700 | | |
701 | 1.29M | rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL ); |
702 | 1.29M | } |
703 | | |
704 | | sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName) |
705 | 50.8M | { |
706 | 50.8M | return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(), |
707 | 50.8M | XML_CAST( pName ) ); // uses utf-8 |
708 | 50.8M | } |
709 | | |
710 | | sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName ) |
711 | 18.6M | { |
712 | 18.6M | Entity& rEntity = getEntity(); |
713 | 18.6M | if (rEntity.maNamespaceCount.empty()) |
714 | 0 | return FastToken::DONTKNOW; |
715 | | |
716 | 18.6M | sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); |
717 | 182M | while( nNamespace-- ) |
718 | 180M | { |
719 | 180M | const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace]; |
720 | 180M | if( rNamespaceDefine.maPrefix == sPrefix ) |
721 | 17.6M | return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName); |
722 | 180M | } |
723 | | |
724 | 1.04M | if (!m_bIgnoreMissingNSDecl) |
725 | 2.49k | throw SAXException("No namespace defined for " + OStringToOUString(sPrefix, |
726 | 2.49k | RTL_TEXTENCODING_UTF8), {}, {}); |
727 | | |
728 | 1.04M | return FastToken::DONTKNOW; |
729 | 1.04M | } |
730 | | |
731 | | sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL ) |
732 | 21.0M | { |
733 | 21.0M | NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); |
734 | 21.0M | if( aIter != maNamespaceMap.end() ) |
735 | 1.00M | return (*aIter).second; |
736 | 20.0M | else |
737 | 20.0M | return FastToken::DONTKNOW; |
738 | 21.0M | } |
739 | | |
740 | | OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix ) |
741 | 0 | { |
742 | 0 | Entity& rEntity = getEntity(); |
743 | 0 | if( !rEntity.maNamespaceCount.empty() ) |
744 | 0 | { |
745 | 0 | sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); |
746 | 0 | while( nNamespace-- ) |
747 | 0 | if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix ) |
748 | 0 | return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL; |
749 | 0 | } |
750 | | |
751 | 0 | throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix), |
752 | 0 | Reference< XInterface >(), Any()); |
753 | 0 | } |
754 | | |
755 | | sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName ) |
756 | 23.0M | { |
757 | 23.0M | if( nNamespaceToken != FastToken::DONTKNOW ) |
758 | 22.2M | { |
759 | 22.2M | sal_Int32 nNameToken = GetToken( pName ); |
760 | 22.2M | if( nNameToken != FastToken::DONTKNOW ) |
761 | 21.1M | return nNamespaceToken | nNameToken; |
762 | 22.2M | } |
763 | | |
764 | 1.89M | return FastToken::DONTKNOW; |
765 | 23.0M | } |
766 | | |
767 | | namespace |
768 | | { |
769 | | class ParserCleanup |
770 | | { |
771 | | private: |
772 | | FastSaxParserImpl& m_rParser; |
773 | | Entity& m_rEntity; |
774 | | rtl::Reference<ParserThread> m_xParser; |
775 | | public: |
776 | | ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity) |
777 | 219k | : m_rParser(rParser) |
778 | 219k | , m_rEntity(rEntity) |
779 | 219k | { |
780 | 219k | } |
781 | | ~ParserCleanup() |
782 | 219k | { |
783 | 219k | if (m_rEntity.mpParser) |
784 | 213k | { |
785 | 213k | if (m_rEntity.mpParser->myDoc) |
786 | 1.57k | xmlFreeDoc(m_rEntity.mpParser->myDoc); |
787 | 213k | xmlFreeParserCtxt(m_rEntity.mpParser); |
788 | 213k | } |
789 | 219k | joinThread(); |
790 | 219k | m_rParser.popEntity(); |
791 | 219k | } |
792 | | void setThread(const rtl::Reference<ParserThread> &xParser) |
793 | 0 | { |
794 | 0 | m_xParser = xParser; |
795 | 0 | } |
796 | | void joinThread() |
797 | 219k | { |
798 | 219k | if (m_xParser.is()) |
799 | 0 | { |
800 | 0 | rtl::Reference<ParserThread> xToJoin = m_xParser; |
801 | 0 | m_xParser.clear(); |
802 | 0 | xToJoin->join(); |
803 | 0 | } |
804 | 219k | } |
805 | | }; |
806 | | } |
807 | | /*************** |
808 | | * |
809 | | * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does |
810 | | * the file-specific initialization work. (During a parser run, external files may be opened) |
811 | | * |
812 | | ****************/ |
813 | | void FastSaxParserImpl::parseStream(const InputSource& rStructSource) |
814 | 235k | { |
815 | 235k | xmlInitParser(); |
816 | | |
817 | | // Only one text at one time |
818 | 235k | std::unique_lock guard( maMutex ); |
819 | | |
820 | 235k | pushEntity(maData, rStructSource); |
821 | 235k | Entity& rEntity = getEntity(); |
822 | 235k | ParserCleanup aEnsureFree(*this, rEntity); |
823 | | |
824 | | // start the document |
825 | 235k | if( rEntity.mxDocumentHandler.is() ) |
826 | 219k | { |
827 | 219k | rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator ); |
828 | 219k | rEntity.mxDocumentHandler->startDocument(); |
829 | 219k | } |
830 | | |
831 | | #ifdef EMSCRIPTEN |
832 | | rEntity.mbEnableThreads = false; |
833 | | #else |
834 | 235k | if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser) |
835 | 0 | { |
836 | 0 | Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY); |
837 | | // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams |
838 | 0 | rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000) |
839 | 0 | || (rEntity.maStructSource.aInputStream->available() > 10000); |
840 | 0 | } |
841 | 235k | #endif |
842 | | |
843 | 235k | if (rEntity.mbEnableThreads) |
844 | 0 | { |
845 | 0 | rtl::Reference<ParserThread> xParser = new ParserThread(this); |
846 | 0 | xParser->launch(); |
847 | 0 | aEnsureFree.setThread(xParser); |
848 | 0 | bool done = false; |
849 | 0 | do { |
850 | 0 | rEntity.maConsumeResume.wait(); |
851 | 0 | rEntity.maConsumeResume.reset(); |
852 | |
|
853 | 0 | std::unique_lock aGuard(rEntity.maEventProtector); |
854 | 0 | while (!rEntity.maPendingEvents.empty()) |
855 | 0 | { |
856 | 0 | if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater) |
857 | 0 | rEntity.maProduceResume.set(); // start producer again |
858 | |
|
859 | 0 | EventList aEventList = std::move(rEntity.maPendingEvents.front()); |
860 | 0 | rEntity.maPendingEvents.pop(); |
861 | 0 | aGuard.unlock(); // unlock |
862 | |
|
863 | 0 | if (!consume(aEventList)) |
864 | 0 | done = true; |
865 | |
|
866 | 0 | aGuard.lock(); // lock |
867 | |
|
868 | 0 | if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater ) |
869 | 0 | { |
870 | 0 | aGuard.unlock(); |
871 | 0 | for (auto& rEvent : aEventList.maEvents) |
872 | 0 | { |
873 | 0 | if (rEvent.mxAttributes.is()) |
874 | 0 | { |
875 | 0 | rEvent.mxAttributes->clear(); |
876 | 0 | if( rEntity.mxNamespaceHandler.is() ) |
877 | 0 | rEvent.mxDeclAttributes->clear(); |
878 | 0 | } |
879 | 0 | aEventList.mbIsAttributesEmpty = true; |
880 | 0 | } |
881 | 0 | aGuard.lock(); |
882 | 0 | } |
883 | |
|
884 | 0 | rEntity.maUsedEvents.push(std::move(aEventList)); |
885 | 0 | } |
886 | 0 | } while (!done); |
887 | 0 | aEnsureFree.joinThread(); |
888 | 0 | deleteUsedEvents(); |
889 | | |
890 | | // callbacks used inside XML_Parse may have caught an exception No need |
891 | | // to lock maSavedExceptionMutex here because parser thread is joined. |
892 | | // coverity[missing_lock : SUPPRESS] 2024.6.1 |
893 | 0 | if( rEntity.maSavedException.hasValue() ) |
894 | 0 | rEntity.throwException( mxDocumentLocator, true ); |
895 | 0 | } |
896 | 235k | else |
897 | 235k | { |
898 | 235k | parse(); |
899 | 235k | } |
900 | | |
901 | | // finish document |
902 | 235k | if( rEntity.mxDocumentHandler.is() ) |
903 | 123k | { |
904 | 123k | rEntity.mxDocumentHandler->endDocument(); |
905 | 123k | } |
906 | 235k | } |
907 | | |
908 | | void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) |
909 | 397k | { |
910 | 397k | maData.mxDocumentHandler = Handler; |
911 | 397k | } |
912 | | |
913 | | void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) |
914 | 322k | { |
915 | 322k | assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" ); |
916 | 322k | maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ); |
917 | 322k | } |
918 | | |
919 | | void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) |
920 | 18.6M | { |
921 | 18.6M | if( NamespaceToken < FastToken::NAMESPACE ) |
922 | 0 | throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0); |
923 | | |
924 | 18.6M | if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) |
925 | 18.6M | { |
926 | 18.6M | maNamespaceMap[ NamespaceURL ] = NamespaceToken; |
927 | 18.6M | return; |
928 | 18.6M | } |
929 | 0 | throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0); |
930 | 18.6M | } |
931 | | |
932 | | OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix ) |
933 | 0 | { |
934 | 0 | try |
935 | 0 | { |
936 | 0 | return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) ); |
937 | 0 | } |
938 | 0 | catch (const Exception&) |
939 | 0 | { |
940 | 0 | } |
941 | 0 | throw IllegalArgumentException(); |
942 | 0 | } |
943 | | |
944 | | void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler) |
945 | 0 | { |
946 | 0 | maData.mxErrorHandler = Handler; |
947 | 0 | } |
948 | | |
949 | | void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler ) |
950 | 117k | { |
951 | 117k | maData.mxNamespaceHandler = Handler; |
952 | 117k | } |
953 | | |
954 | | void FastSaxParserImpl::setCustomEntityNames( |
955 | | const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) |
956 | 0 | { |
957 | 0 | m_Replacements.resize(replacements.size()); |
958 | 0 | for (size_t i = 0; i < replacements.size(); ++i) |
959 | 0 | { |
960 | 0 | m_Replacements[i].name = replacements[i].First; |
961 | 0 | m_Replacements[i].replacement = replacements[i].Second; |
962 | 0 | } |
963 | 0 | if (m_Replacements.size() > 1) |
964 | 0 | std::sort(m_Replacements.begin(), m_Replacements.end()); |
965 | 0 | } |
966 | | |
967 | | void FastSaxParserImpl::deleteUsedEvents() |
968 | 0 | { |
969 | 0 | Entity& rEntity = getEntity(); |
970 | 0 | std::unique_lock aGuard(rEntity.maEventProtector); |
971 | |
|
972 | 0 | while (!rEntity.maUsedEvents.empty()) |
973 | 0 | { |
974 | 0 | { // the block makes sure that aEventList is destructed outside the lock |
975 | 0 | EventList aEventList = std::move(rEntity.maUsedEvents.front()); |
976 | 0 | rEntity.maUsedEvents.pop(); |
977 | |
|
978 | 0 | aGuard.unlock(); // unlock |
979 | 0 | } |
980 | |
|
981 | 0 | aGuard.lock(); // lock |
982 | 0 | } |
983 | 0 | } |
984 | | |
985 | | void FastSaxParserImpl::produce( bool bForceFlush ) |
986 | 0 | { |
987 | 0 | Entity& rEntity = getEntity(); |
988 | 0 | if (!(bForceFlush || |
989 | 0 | rEntity.mnProducedEventsSize >= Entity::mnEventListSize)) |
990 | 0 | return; |
991 | | |
992 | 0 | std::unique_lock aGuard(rEntity.maEventProtector); |
993 | |
|
994 | 0 | while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater) |
995 | 0 | { // pause parsing for a bit |
996 | 0 | aGuard.unlock(); // unlock |
997 | 0 | rEntity.maProduceResume.wait(); |
998 | 0 | rEntity.maProduceResume.reset(); |
999 | 0 | aGuard.lock(); // lock |
1000 | 0 | } |
1001 | |
|
1002 | 0 | rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents)); |
1003 | |
|
1004 | 0 | aGuard.unlock(); // unlock |
1005 | |
|
1006 | 0 | rEntity.mxProducedEvents.reset(); |
1007 | 0 | assert(!rEntity.mxProducedEvents); |
1008 | |
|
1009 | 0 | rEntity.maConsumeResume.set(); |
1010 | 0 | } |
1011 | | |
1012 | | bool FastSaxParserImpl::consume(EventList& rEventList) |
1013 | 0 | { |
1014 | 0 | Entity& rEntity = getEntity(); |
1015 | 0 | rEventList.mbIsAttributesEmpty = false; |
1016 | 0 | for (auto& rEvent : rEventList.maEvents) |
1017 | 0 | { |
1018 | 0 | switch (rEvent.maType) |
1019 | 0 | { |
1020 | 0 | case CallbackType::START_ELEMENT: |
1021 | 0 | rEntity.startElement( &rEvent ); |
1022 | 0 | break; |
1023 | 0 | case CallbackType::END_ELEMENT: |
1024 | 0 | rEntity.endElement(); |
1025 | 0 | break; |
1026 | 0 | case CallbackType::CHARACTERS: |
1027 | 0 | rEntity.characters( rEvent.msChars ); |
1028 | 0 | break; |
1029 | 0 | case CallbackType::PROCESSING_INSTRUCTION: |
1030 | 0 | rEntity.processingInstruction( |
1031 | 0 | rEvent.msNamespace, rEvent.msElementName ); // ( target, data ) |
1032 | 0 | break; |
1033 | 0 | case CallbackType::DONE: |
1034 | 0 | return false; |
1035 | 0 | case CallbackType::EXCEPTION: |
1036 | 0 | rEntity.throwException( mxDocumentLocator, false ); |
1037 | 0 | [[fallthrough]]; // avoid unreachable code warning with some compilers |
1038 | 0 | default: |
1039 | 0 | assert(false); |
1040 | 0 | return false; |
1041 | 0 | } |
1042 | 0 | } |
1043 | 0 | return true; |
1044 | 0 | } |
1045 | | |
1046 | | void FastSaxParserImpl::pushEntity(const ParserData& rEntityData, |
1047 | | xml::sax::InputSource const& rSource) |
1048 | 235k | { |
1049 | 235k | if (!rSource.aInputStream.is()) |
1050 | 16.6k | throw SAXException(u"No input source"_ustr, Reference<XInterface>(), Any()); |
1051 | | |
1052 | 219k | maEntities.emplace(rEntityData); |
1053 | 219k | mpTop = &maEntities.top(); |
1054 | | |
1055 | 219k | mpTop->maStructSource = rSource; |
1056 | | |
1057 | 219k | mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream); |
1058 | 219k | if (!mpTop->maStructSource.sEncoding.isEmpty()) |
1059 | 0 | { |
1060 | 0 | mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US)); |
1061 | 0 | } |
1062 | 219k | } |
1063 | | |
1064 | | void FastSaxParserImpl::popEntity() |
1065 | 219k | { |
1066 | 219k | maEntities.pop(); |
1067 | 219k | mpTop = !maEntities.empty() ? &maEntities.top() : nullptr; |
1068 | 219k | } |
1069 | | |
1070 | | // starts parsing with actual parser ! |
1071 | | void FastSaxParserImpl::parse() |
1072 | 219k | { |
1073 | 219k | const int BUFFER_SIZE = 16 * 1024; |
1074 | 219k | Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); |
1075 | | |
1076 | 219k | Entity& rEntity = getEntity(); |
1077 | | |
1078 | | // set all necessary C-Callbacks |
1079 | 219k | static xmlSAXHandler callbacks; |
1080 | 219k | callbacks.startElementNs = call_callbackStartElement; |
1081 | 219k | callbacks.endElementNs = call_callbackEndElement; |
1082 | 219k | callbacks.characters = call_callbackCharacters; |
1083 | 219k | callbacks.processingInstruction = call_callbackProcessingInstruction; |
1084 | 219k | callbacks.getEntity = call_callbackGetEntity; |
1085 | 219k | callbacks.initialized = XML_SAX2_MAGIC; |
1086 | 219k | int nRead = 0; |
1087 | 219k | do |
1088 | 456k | { |
1089 | 456k | nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); |
1090 | 456k | if( nRead <= 0 ) |
1091 | 203k | { |
1092 | 203k | if( rEntity.mpParser != nullptr ) |
1093 | 203k | { |
1094 | 203k | if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK ) |
1095 | 79.2k | rEntity.throwException( mxDocumentLocator, true ); |
1096 | 203k | if (rEntity.hasException()) |
1097 | 847 | rEntity.throwException(mxDocumentLocator, true); |
1098 | 203k | } |
1099 | 203k | break; |
1100 | 203k | } |
1101 | | |
1102 | 252k | bool bContinue = true; |
1103 | 252k | if( rEntity.mpParser == nullptr ) |
1104 | 213k | { |
1105 | | // create parser with proper encoding (needs the first chunk of data) |
1106 | 213k | rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this, |
1107 | 213k | reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr ); |
1108 | 213k | if( !rEntity.mpParser ) |
1109 | 0 | throw SAXException(u"Couldn't create parser"_ustr, Reference< XInterface >(), Any() ); |
1110 | | |
1111 | | // Tell libxml2 parser to decode entities in attribute values. |
1112 | | // Also allow XML attribute values which are larger than 10MB, because this used to work |
1113 | | // with expat. |
1114 | | // coverity[unsafe_xml_parse_config] - entity support is required |
1115 | 213k | xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE); |
1116 | 213k | } |
1117 | 39.2k | else |
1118 | 39.2k | { |
1119 | 39.2k | bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 ) |
1120 | 39.2k | == XML_ERR_OK; |
1121 | 39.2k | } |
1122 | | |
1123 | | // callbacks used inside XML_Parse may have caught an exception |
1124 | 252k | if (!bContinue) |
1125 | 9.82k | { |
1126 | 9.82k | rEntity.throwException( mxDocumentLocator, true ); |
1127 | 9.82k | } |
1128 | 252k | if (rEntity.hasException()) |
1129 | 475 | { |
1130 | 475 | rEntity.throwException( mxDocumentLocator, true ); |
1131 | 475 | } |
1132 | 252k | } while( nRead > 0 ); |
1133 | 219k | rEntity.getEvent( CallbackType::DONE ); |
1134 | 219k | if( rEntity.mbEnableThreads ) |
1135 | 0 | produce( true ); |
1136 | 219k | } |
1137 | | |
1138 | | // The C-Callbacks |
1139 | | void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, |
1140 | | int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes) |
1141 | 32.6M | { |
1142 | 32.6M | if (!pendingCharacters.empty()) |
1143 | 7.70M | sendPendingCharacters(); |
1144 | 32.6M | Entity& rEntity = getEntity(); |
1145 | 32.6M | if( rEntity.maNamespaceCount.empty() ) |
1146 | 197k | { |
1147 | 197k | rEntity.maNamespaceCount.push(0); |
1148 | 197k | DefineNamespace( "xml"_ostr, u"http://www.w3.org/XML/1998/namespace"_ustr); |
1149 | 197k | } |
1150 | 32.4M | else |
1151 | 32.4M | { |
1152 | 32.4M | rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() ); |
1153 | 32.4M | } |
1154 | | |
1155 | | // create attribute map and process namespace instructions |
1156 | 32.6M | Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT ); |
1157 | 32.6M | bool bIsAttributesEmpty = false; |
1158 | 32.6M | if ( rEntity.mbEnableThreads ) |
1159 | 0 | bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty; |
1160 | | |
1161 | 32.6M | if (rEvent.mxAttributes.is()) |
1162 | 32.4M | { |
1163 | 32.4M | if( !bIsAttributesEmpty ) |
1164 | 32.4M | rEvent.mxAttributes->clear(); |
1165 | 32.4M | } |
1166 | 197k | else |
1167 | 197k | rEvent.mxAttributes.set( |
1168 | 197k | new FastAttributeList( rEntity.mxTokenHandler.get() ) ); |
1169 | | |
1170 | 32.6M | if( rEntity.mxNamespaceHandler.is() ) |
1171 | 20.8M | { |
1172 | 20.8M | if (rEvent.mxDeclAttributes.is()) |
1173 | 20.7M | { |
1174 | 20.7M | if( !bIsAttributesEmpty ) |
1175 | 20.7M | rEvent.mxDeclAttributes->clear(); |
1176 | 20.7M | } |
1177 | 88.1k | else |
1178 | 88.1k | rEvent.mxDeclAttributes.set( |
1179 | 88.1k | new FastAttributeList( rEntity.mxTokenHandler.get() ) ); |
1180 | 20.8M | } |
1181 | | |
1182 | 32.6M | OUString sNamespace; |
1183 | 32.6M | sal_Int32 nNamespaceToken = FastToken::DONTKNOW; |
1184 | 32.6M | if (!rEntity.maNamespaceStack.empty()) |
1185 | 32.4M | { |
1186 | 32.4M | sNamespace = rEntity.maNamespaceStack.top().msName; |
1187 | 32.4M | nNamespaceToken = rEntity.maNamespaceStack.top().mnToken; |
1188 | 32.4M | } |
1189 | | |
1190 | 32.6M | try |
1191 | 32.6M | { |
1192 | | /* #158414# Each element may define new namespaces, also for attributes. |
1193 | | First, process all namespaces, second, process the attributes after namespaces |
1194 | | have been initialized. */ |
1195 | | |
1196 | 32.6M | std::string_view sPrefix; // convert to string_view so we only do strlen() once. |
1197 | 32.6M | if (prefix != nullptr) |
1198 | 11.2M | sPrefix = XML_CAST(prefix); |
1199 | | // #158414# first: get namespaces |
1200 | 33.9M | for (int i = 0; i < numNamespaces * 2; i += 2) |
1201 | 1.32M | { |
1202 | | // namespaces[] is (prefix/URI) |
1203 | 1.32M | if( namespaces[ i ] != nullptr ) |
1204 | 1.10M | { |
1205 | 1.10M | OString aPrefix( XML_CAST( namespaces[ i ] )); |
1206 | 1.10M | OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); |
1207 | 1.10M | NormalizeURI( namespaceURL ); |
1208 | 1.10M | DefineNamespace(aPrefix, namespaceURL); |
1209 | 1.10M | if( rEntity.mxNamespaceHandler.is() ) |
1210 | 857k | rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); |
1211 | 1.10M | } |
1212 | 225k | else |
1213 | 225k | { |
1214 | | // default namespace |
1215 | 225k | sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); |
1216 | 225k | NormalizeURI( sNamespace ); |
1217 | 225k | nNamespaceToken = GetNamespaceToken( sNamespace ); |
1218 | 225k | if( rEntity.mxNamespaceHandler.is() ) |
1219 | 159k | rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) ); |
1220 | 225k | } |
1221 | 1.32M | } |
1222 | | |
1223 | 32.6M | if ( rEntity.mxTokenHandler.is() ) |
1224 | 32.6M | { |
1225 | | // #158414# second: fill attribute list with other attributes |
1226 | 32.6M | rEvent.mxAttributes->reserve( numAttributes ); |
1227 | 52.6M | for (int i = 0; i < numAttributes * 5; i += 5) |
1228 | 20.0M | { |
1229 | | // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd ) |
1230 | 20.0M | if( attributes[ i + 1 ] != nullptr ) |
1231 | 7.40M | { |
1232 | 7.40M | sal_Int32 nAttributeToken = GetTokenWithPrefix(XML_CAST(attributes[ i + 1 ]), attributes[ i ]); |
1233 | 7.40M | if( nAttributeToken != FastToken::DONTKNOW ) |
1234 | 6.28M | rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) ); |
1235 | 1.12M | else |
1236 | 1.12M | addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes); |
1237 | 7.40M | } |
1238 | 12.6M | else |
1239 | 12.6M | { |
1240 | 12.6M | sal_Int32 nAttributeToken = GetToken(attributes[ i ]); |
1241 | 12.6M | if( nAttributeToken != FastToken::DONTKNOW ) |
1242 | 12.2M | rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) ); |
1243 | 416k | else |
1244 | 416k | { |
1245 | 416k | SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" << |
1246 | 416k | OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); |
1247 | 416k | rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), |
1248 | 416k | OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); |
1249 | 416k | } |
1250 | 12.6M | } |
1251 | 20.0M | } |
1252 | | |
1253 | 32.6M | if( !sPrefix.empty() ) |
1254 | 11.2M | rEvent.mnElementToken = GetTokenWithPrefix(sPrefix, localName); |
1255 | 21.3M | else if( !sNamespace.isEmpty() ) |
1256 | 5.42M | rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName); |
1257 | 15.9M | else |
1258 | 15.9M | rEvent.mnElementToken = GetToken(localName); |
1259 | 32.6M | } |
1260 | 18.4E | else |
1261 | 18.4E | { |
1262 | 18.4E | for (int i = 0; i < numAttributes * 5; i += 5) |
1263 | 0 | { |
1264 | 0 | if( attributes[ i + 1 ] != nullptr ) |
1265 | 0 | addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes); |
1266 | 0 | else |
1267 | 0 | rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), |
1268 | 0 | OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); |
1269 | 0 | } |
1270 | | |
1271 | 18.4E | rEvent.mnElementToken = FastToken::DONTKNOW; |
1272 | 18.4E | } |
1273 | | |
1274 | 32.6M | if( rEvent.mnElementToken == FastToken::DONTKNOW ) |
1275 | 16.1M | { |
1276 | 16.1M | OUString aElementPrefix; |
1277 | 16.1M | if( !sPrefix.empty() ) |
1278 | 835k | { |
1279 | 835k | aElementPrefix = OUString( sPrefix.data(), sPrefix.size(), RTL_TEXTENCODING_UTF8 ); |
1280 | 835k | if ( URI != nullptr ) |
1281 | 286k | sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 ); |
1282 | 549k | else if ( m_bIgnoreMissingNSDecl ) |
1283 | 549k | sNamespace.clear(); |
1284 | 0 | else |
1285 | 0 | throw SAXException("No namespace defined for " + aElementPrefix, {}, {}); |
1286 | 835k | nNamespaceToken = GetNamespaceToken( sNamespace ); |
1287 | 835k | } |
1288 | 16.1M | OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 ); |
1289 | 16.1M | rEvent.msNamespace = sNamespace; |
1290 | 16.1M | if( aElementPrefix.isEmpty() ) |
1291 | 15.2M | rEvent.msElementName = std::move(aElementLocalName); |
1292 | 835k | else |
1293 | 835k | rEvent.msElementName = aElementPrefix + ":" + aElementLocalName; |
1294 | 16.1M | } |
1295 | 16.5M | else // token is always preferred. |
1296 | 16.5M | rEvent.msElementName.clear(); |
1297 | | |
1298 | 32.6M | rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) ); |
1299 | 32.6M | if (rEntity.mbEnableThreads) |
1300 | 0 | produce(); |
1301 | 32.6M | else |
1302 | 32.6M | { |
1303 | 32.6M | SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName); |
1304 | 32.6M | rEntity.startElement( &rEvent ); |
1305 | 32.6M | } |
1306 | 32.6M | } |
1307 | 32.6M | catch (...) |
1308 | 32.6M | { |
1309 | 2.49k | rEntity.saveException( ::cppu::getCaughtException() ); |
1310 | 2.49k | } |
1311 | 32.6M | } |
1312 | | |
1313 | | void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes) |
1314 | 1.11M | { |
1315 | 1.11M | OUString aNamespaceURI; |
1316 | 1.11M | if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr ) |
1317 | 628k | aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 ); |
1318 | 1.11M | const OString aPrefix( XML_CAST( attributes[ i + 1 ] )); |
1319 | 1.11M | const OString aLocalName( XML_CAST( attributes[ i ] )); |
1320 | 1.11M | OString aQualifiedName = (aPrefix.isEmpty())? aLocalName : aPrefix + ":" + aLocalName; |
1321 | 1.11M | xAttributes->addUnknown( aNamespaceURI, aQualifiedName, |
1322 | 1.11M | OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); |
1323 | 1.11M | SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI); |
1324 | 1.11M | } |
1325 | | |
1326 | | void FastSaxParserImpl::callbackEndElement() |
1327 | 15.9M | { |
1328 | 15.9M | if (!pendingCharacters.empty()) |
1329 | 2.89M | sendPendingCharacters(); |
1330 | 15.9M | Entity& rEntity = getEntity(); |
1331 | 15.9M | SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount"); |
1332 | 15.9M | if( !rEntity.maNamespaceCount.empty() ) |
1333 | 15.9M | rEntity.maNamespaceCount.pop(); |
1334 | | |
1335 | 15.9M | SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack"); |
1336 | 15.9M | if( !rEntity.maNamespaceStack.empty() ) |
1337 | 15.9M | rEntity.maNamespaceStack.pop(); |
1338 | | |
1339 | 15.9M | rEntity.getEvent( CallbackType::END_ELEMENT ); |
1340 | 15.9M | if (rEntity.mbEnableThreads) |
1341 | 0 | produce(); |
1342 | 15.9M | else |
1343 | 15.9M | rEntity.endElement(); |
1344 | 15.9M | } |
1345 | | |
1346 | | void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen ) |
1347 | 11.9M | { |
1348 | | // SAX interface allows that the characters callback splits content of one XML node |
1349 | | // (e.g. because there's an entity that needs decoding), however for consumers it's |
1350 | | // simpler FastSaxParser's character callback provides the whole string at once, |
1351 | | // so merge data from possible multiple calls and send them at once (before the element |
1352 | | // ends or another one starts). |
1353 | | // |
1354 | | // We use a std::vector<char> to avoid calling into the OUString constructor more than once when |
1355 | | // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly |
1356 | | // often in writer documents. |
1357 | 11.9M | int nOriginalLen = pendingCharacters.size(); |
1358 | 11.9M | pendingCharacters.resize(nOriginalLen + nLen); |
1359 | 11.9M | memcpy(pendingCharacters.data() + nOriginalLen, s, nLen); |
1360 | 11.9M | } |
1361 | | |
1362 | | void FastSaxParserImpl::sendPendingCharacters() |
1363 | 10.6M | { |
1364 | 10.6M | Entity& rEntity = getEntity(); |
1365 | 10.6M | OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 ); |
1366 | | |
1367 | 10.6M | if (sChars[0] == '_' && mxMap) |
1368 | 0 | mxMap->get(uno::Any(sChars)) >>= sChars; |
1369 | | |
1370 | 10.6M | if (rEntity.mbEnableThreads) |
1371 | 0 | { |
1372 | 0 | Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS ); |
1373 | 0 | rEvent.msChars = std::move(sChars); |
1374 | 0 | produce(); |
1375 | 0 | } |
1376 | 10.6M | else |
1377 | 10.6M | rEntity.characters( sChars ); |
1378 | 10.6M | pendingCharacters.resize(0); |
1379 | 10.6M | } |
1380 | | |
1381 | | void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data ) |
1382 | 53.5k | { |
1383 | 53.5k | if (!pendingCharacters.empty()) |
1384 | 6.66k | sendPendingCharacters(); |
1385 | 53.5k | Entity& rEntity = getEntity(); |
1386 | 53.5k | Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION ); |
1387 | | |
1388 | | // This event is very rare, so no need to waste extra space for this |
1389 | | // Using namespace and element strings to be target and data in that order. |
1390 | 53.5k | rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 ); |
1391 | 53.5k | if ( data != nullptr ) |
1392 | 47.2k | rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 ); |
1393 | 6.33k | else |
1394 | 6.33k | rEvent.msElementName.clear(); |
1395 | | |
1396 | 53.5k | if (rEntity.mbEnableThreads) |
1397 | 0 | produce(); |
1398 | 53.5k | else |
1399 | 53.5k | rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName ); |
1400 | 53.5k | } |
1401 | | |
1402 | | xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name ) |
1403 | 55.1k | { |
1404 | 55.1k | if( !name ) |
1405 | 0 | return xmlGetPredefinedEntity(name); |
1406 | 55.1k | const char* dname = XML_CAST(name); |
1407 | 55.1k | int lname = strlen(dname); |
1408 | 55.1k | if( lname == 0 ) |
1409 | 0 | return xmlGetPredefinedEntity(name); |
1410 | 55.1k | if (m_Replacements.size() > 0) |
1411 | 0 | { |
1412 | 0 | auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname); |
1413 | 0 | if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0) |
1414 | 0 | { |
1415 | 0 | xmlEntityPtr entpt = xmlNewEntity( |
1416 | 0 | nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, |
1417 | 0 | BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr())); |
1418 | 0 | m_TemporalEntities.push_back(entpt); |
1419 | 0 | return entpt; |
1420 | 0 | } |
1421 | 0 | } |
1422 | 55.1k | if( lname < 2 ) |
1423 | 18.2k | return xmlGetPredefinedEntity(name); |
1424 | 36.9k | if ( dname[0] == '#' ) |
1425 | 0 | { |
1426 | 0 | sal_uInt32 cval = 0; |
1427 | 0 | if( dname[1] == 'x' || dname[1] == 'X' ) |
1428 | 0 | { |
1429 | 0 | if( lname < 3 ) |
1430 | 0 | return xmlGetPredefinedEntity(name); |
1431 | 0 | cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) ); |
1432 | 0 | if( cval == 0 ) |
1433 | 0 | return xmlGetPredefinedEntity(name); |
1434 | 0 | OUString vname( &cval, 1 ); |
1435 | 0 | xmlEntityPtr entpt |
1436 | 0 | = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, |
1437 | 0 | BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr())); |
1438 | 0 | m_TemporalEntities.push_back(entpt); |
1439 | 0 | return entpt; |
1440 | 0 | } |
1441 | 0 | else |
1442 | 0 | { |
1443 | 0 | cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) ); |
1444 | 0 | if( cval == 0 ) |
1445 | 0 | return xmlGetPredefinedEntity(name); |
1446 | 0 | OUString vname(&cval, 1); |
1447 | 0 | xmlEntityPtr entpt |
1448 | 0 | = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, |
1449 | 0 | BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr())); |
1450 | 0 | m_TemporalEntities.push_back(entpt); |
1451 | 0 | return entpt; |
1452 | 0 | } |
1453 | 0 | } |
1454 | 36.9k | return xmlGetPredefinedEntity(name); |
1455 | 36.9k | } |
1456 | | |
1457 | 322k | FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {} |
1458 | | |
1459 | | FastSaxParser::~FastSaxParser() |
1460 | 322k | { |
1461 | 322k | } |
1462 | | |
1463 | | void SAL_CALL |
1464 | | FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments) |
1465 | 130k | { |
1466 | 130k | if (!rArguments.hasElements()) |
1467 | 0 | return; |
1468 | | |
1469 | 130k | OUString str; |
1470 | 130k | if ( !(rArguments[0] >>= str) ) |
1471 | 0 | throw IllegalArgumentException(); |
1472 | | |
1473 | 130k | auto opts = comphelper::string::split(str, ','); |
1474 | 130k | for (auto &s : opts) |
1475 | 130k | { |
1476 | 130k | if ( s == "IgnoreMissingNSDecl" ) |
1477 | 101k | mpImpl->m_bIgnoreMissingNSDecl = true; |
1478 | 28.7k | else if ( s == "DoSmeplease" ) |
1479 | 0 | ; //just ignore as this is already immune to billion laughs |
1480 | 28.7k | else if ( s == "DisableThreadedParser" ) |
1481 | 28.7k | mpImpl->m_bDisableThreadedParser = true; |
1482 | 0 | else |
1483 | 0 | throw IllegalArgumentException(); |
1484 | 130k | } |
1485 | | |
1486 | 130k | if (rArguments.size() > 1) |
1487 | 101k | rArguments[1] >>= mpImpl->mxMap; |
1488 | 130k | } |
1489 | | |
1490 | | void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource ) |
1491 | 235k | { |
1492 | 235k | mpImpl->parseStream(aInputSource); |
1493 | 235k | } |
1494 | | |
1495 | | void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler ) |
1496 | 397k | { |
1497 | 397k | mpImpl->setFastDocumentHandler(Handler); |
1498 | 397k | } |
1499 | | |
1500 | | void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler ) |
1501 | 322k | { |
1502 | 322k | mpImpl->setTokenHandler(Handler); |
1503 | 322k | } |
1504 | | |
1505 | | void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) |
1506 | 18.6M | { |
1507 | 18.6M | mpImpl->registerNamespace(NamespaceURL, NamespaceToken); |
1508 | 18.6M | } |
1509 | | |
1510 | | OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix ) |
1511 | 0 | { |
1512 | 0 | return mpImpl->getNamespaceURL(rPrefix); |
1513 | 0 | } |
1514 | | |
1515 | | void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler ) |
1516 | 0 | { |
1517 | 0 | mpImpl->setErrorHandler(Handler); |
1518 | 0 | } |
1519 | | |
1520 | | void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& ) |
1521 | 0 | { |
1522 | | // not implemented |
1523 | 0 | } |
1524 | | |
1525 | | void FastSaxParser::setLocale( const lang::Locale& ) |
1526 | 0 | { |
1527 | | // not implemented |
1528 | 0 | } |
1529 | | |
1530 | | void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler) |
1531 | 117k | { |
1532 | 117k | mpImpl->setNamespaceHandler(Handler); |
1533 | 117k | } |
1534 | | |
1535 | | OUString FastSaxParser::getImplementationName() |
1536 | 0 | { |
1537 | 0 | return u"com.sun.star.comp.extensions.xml.sax.FastParser"_ustr; |
1538 | 0 | } |
1539 | | |
1540 | | void FastSaxParser::setCustomEntityNames( |
1541 | | const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) |
1542 | 0 | { |
1543 | 0 | mpImpl->setCustomEntityNames(replacements); |
1544 | 0 | } |
1545 | | |
1546 | | sal_Bool FastSaxParser::supportsService( const OUString& ServiceName ) |
1547 | 0 | { |
1548 | 0 | return cppu::supportsService(this, ServiceName); |
1549 | 0 | } |
1550 | | |
1551 | | uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames() |
1552 | 0 | { |
1553 | 0 | return { u"com.sun.star.xml.sax.FastParser"_ustr }; |
1554 | 0 | } |
1555 | | |
1556 | | } // namespace sax_fastparser |
1557 | | |
1558 | | extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * |
1559 | | com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation( |
1560 | | css::uno::XComponentContext *, |
1561 | | css::uno::Sequence<css::uno::Any> const &) |
1562 | 151k | { |
1563 | 151k | return cppu::acquire(new FastSaxParser); |
1564 | 151k | } |
1565 | | |
1566 | | // ---------------------------------------------------------- |
1567 | | // copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases |
1568 | | // for various dodgy namespace decls in the wild. |
1569 | | |
1570 | | static bool NormalizeW3URI( OUString& rName ); |
1571 | | static bool NormalizeOasisURN( OUString& rName ); |
1572 | | |
1573 | | static void NormalizeURI( OUString& rName ) |
1574 | 1.32M | { |
1575 | | // try OASIS + W3 URI normalization |
1576 | 1.32M | bool bSuccess = NormalizeOasisURN( rName ); |
1577 | 1.32M | if( ! bSuccess ) |
1578 | 1.07M | NormalizeW3URI( rName ); |
1579 | 1.32M | } |
1580 | | |
1581 | | constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/"); |
1582 | | constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms"); |
1583 | | constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms"); |
1584 | | constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg"); |
1585 | | constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"); |
1586 | | constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format"); |
1587 | | constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"); |
1588 | | constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/"); |
1589 | | constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20"); |
1590 | | constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"); |
1591 | | constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc"); |
1592 | | constexpr OUStringLiteral XML_XMLNS(u"xmlns"); |
1593 | | constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument"); |
1594 | | constexpr OUStringLiteral XML_1_0(u"1.0"); |
1595 | | |
1596 | | static bool NormalizeW3URI( OUString& rName ) |
1597 | 1.07M | { |
1598 | | // check if URI matches: |
1599 | | // http://www.w3.org/[0-9]*/[:letter:]* |
1600 | | // (year)/(WG name) |
1601 | | // For the following WG/standards names: |
1602 | | // - xforms |
1603 | | |
1604 | 1.07M | bool bSuccess = false; |
1605 | 1.07M | const OUString sURIPrefix = XML_URI_W3_PREFIX; |
1606 | 1.07M | if( rName.startsWith( sURIPrefix ) ) |
1607 | 106k | { |
1608 | 106k | const OUString sURISuffix = XML_URI_XFORMS_SUFFIX ; |
1609 | 106k | sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength(); |
1610 | 106k | if( rName.subView( nCompareFrom ) == sURISuffix ) |
1611 | 8.58k | { |
1612 | | // found W3 prefix, and xforms suffix |
1613 | 8.58k | rName = XML_N_XFORMS_1_0; |
1614 | 8.58k | bSuccess = true; |
1615 | 8.58k | } |
1616 | 106k | } |
1617 | 1.07M | return bSuccess; |
1618 | 1.07M | } |
1619 | | |
1620 | | static bool NormalizeOasisURN( OUString& rName ) |
1621 | 1.32M | { |
1622 | | // #i38644# |
1623 | | // we exported the wrong namespace for smil, so we correct this here on load |
1624 | | // for older documents |
1625 | 1.32M | if( rName == XML_N_SVG ) |
1626 | 124 | { |
1627 | 124 | rName = XML_N_SVG_COMPAT; |
1628 | 124 | return true; |
1629 | 124 | } |
1630 | 1.32M | else if( rName == XML_N_FO ) |
1631 | 5 | { |
1632 | 5 | rName = XML_N_FO_COMPAT; |
1633 | 5 | return true; |
1634 | 5 | } |
1635 | 1.32M | else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD ) |
1636 | 0 | { |
1637 | 0 | rName = XML_N_SMIL_COMPAT; |
1638 | 0 | return true; |
1639 | 0 | } |
1640 | | |
1641 | | |
1642 | | // Check if URN matches |
1643 | | // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]* |
1644 | | // |---| |---| |-----| |
1645 | | // TC-Id Sub-Id Version |
1646 | | |
1647 | 1.32M | sal_Int32 nNameLen = rName.getLength(); |
1648 | | // :urn:oasis:names:tc.* |
1649 | 1.32M | const OUString aOasisURN = XML_URN_OASIS_NAMES_TC; |
1650 | 1.32M | if( !rName.startsWith( aOasisURN ) ) |
1651 | 1.02M | return false; |
1652 | | |
1653 | | // :urn:oasis:names:tc:.* |
1654 | 305k | sal_Int32 nPos = aOasisURN.getLength(); |
1655 | 305k | if( nPos >= nNameLen || rName[nPos] != ':' ) |
1656 | 3.78k | return false; |
1657 | | |
1658 | | // :urn:oasis:names:tc:[^:]:.* |
1659 | 301k | sal_Int32 nTCIdStart = nPos+1; |
1660 | 301k | sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart ); |
1661 | 301k | if( -1 == nTCIdEnd ) |
1662 | 1.93k | return false; |
1663 | | |
1664 | | // :urn:oasis:names:tc:[^:]:xmlns.* |
1665 | 299k | nPos = nTCIdEnd + 1; |
1666 | 299k | std::u16string_view sTmp( rName.subView( nPos ) ); |
1667 | 299k | const OUString aXMLNS = XML_XMLNS; |
1668 | 299k | if( !o3tl::starts_with(sTmp, aXMLNS ) ) |
1669 | 25.2k | return false; |
1670 | | |
1671 | | // :urn:oasis:names:tc:[^:]:xmlns:.* |
1672 | 274k | nPos += aXMLNS.getLength(); |
1673 | 274k | if( nPos >= nNameLen || rName[nPos] != ':' ) |
1674 | 2.76k | return false; |
1675 | | |
1676 | | // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.* |
1677 | 271k | nPos = rName.indexOf( ':', nPos+1 ); |
1678 | 271k | if( -1 == nPos ) |
1679 | 1.72k | return false; |
1680 | | |
1681 | | // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]* |
1682 | 269k | sal_Int32 nVersionStart = nPos+1; |
1683 | 269k | if( nVersionStart+2 >= nNameLen || |
1684 | 269k | -1 != rName.indexOf( ':', nVersionStart ) ) |
1685 | 4.05k | return false; |
1686 | | |
1687 | | // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]* |
1688 | 265k | if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' ) |
1689 | 16.5k | return false; |
1690 | | |
1691 | | // replace [tcid] with current TCID and version with current version. |
1692 | | |
1693 | 249k | rName = rName.subView( 0, nTCIdStart ) + |
1694 | 249k | XML_OPENDOCUMENT + |
1695 | 249k | rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) + |
1696 | 249k | XML_1_0; |
1697 | | |
1698 | 249k | return true; |
1699 | 265k | } |
1700 | | |
1701 | | |
1702 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |