Coverage Report

Created: 2026-04-09 11:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/sax/source/fastparser/fastparser.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sax/fastparser.hxx>
21
#include <sax/fastattribs.hxx>
22
#include <utility>
23
#include <xml2utf.hxx>
24
25
#include <com/sun/star/io/XSeekable.hpp>
26
#include <com/sun/star/lang/DisposedException.hpp>
27
#include <com/sun/star/lang/IllegalArgumentException.hpp>
28
#include <com/sun/star/uno/XComponentContext.hpp>
29
#include <com/sun/star/container/XMap.hpp>
30
#include <com/sun/star/xml/sax/FastToken.hpp>
31
#include <com/sun/star/xml/sax/SAXParseException.hpp>
32
#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
33
#include <cppuhelper/implbase.hxx>
34
#include <cppuhelper/supportsservice.hxx>
35
#include <cppuhelper/exc_hlp.hxx>
36
#include <osl/conditn.hxx>
37
#include <rtl/ref.hxx>
38
#include <sal/log.hxx>
39
#include <salhelper/thread.hxx>
40
#include <comphelper/diagnose_ex.hxx>
41
#include <comphelper/string.hxx>
42
#include <o3tl/string_view.hxx>
43
44
#include <queue>
45
#include <memory>
46
#include <mutex>
47
#include <optional>
48
#include <stack>
49
#include <string_view>
50
#include <unordered_map>
51
#include <vector>
52
#include <cassert>
53
#include <cstring>
54
#include <libxml/parser.h>
55
56
// Inverse of libxml's BAD_CAST.
57
132M
#define XML_CAST( str ) reinterpret_cast< const char* >( str )
58
59
using namespace ::osl;
60
using namespace ::cppu;
61
using namespace ::com::sun::star::uno;
62
using namespace ::com::sun::star::lang;
63
using namespace ::com::sun::star::xml::sax;
64
using namespace ::com::sun::star::io;
65
using namespace com::sun::star;
66
using namespace sax_fastparser;
67
68
static void NormalizeURI( OUString& rName );
69
70
namespace {
71
72
struct Event;
73
class FastLocatorImpl;
74
struct NamespaceDefine;
75
struct Entity;
76
77
typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
78
79
struct EventList
80
{
81
    std::vector<Event> maEvents;
82
    bool mbIsAttributesEmpty;
83
};
84
85
enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
86
87
struct Event
88
{
89
    CallbackType maType;
90
    sal_Int32 mnElementToken;
91
    OUString msNamespace;
92
    OUString msElementName;
93
    rtl::Reference< FastAttributeList > mxAttributes;
94
    rtl::Reference< FastAttributeList > mxDeclAttributes;
95
    OUString msChars;
96
};
97
98
struct NameWithToken
99
{
100
    OUString msName;
101
    sal_Int32 mnToken;
102
103
    NameWithToken(OUString sName, sal_Int32 nToken) :
104
32.6M
        msName(std::move(sName)), mnToken(nToken) {}
105
};
106
107
struct SaxContext
108
{
109
    Reference< XFastContextHandler > mxContext;
110
    sal_Int32 mnElementToken;
111
    std::optional<OUString>  moNamespace;
112
    std::optional<OUString> moElementName;
113
114
    SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
115
32.6M
            mnElementToken(nElementToken)
116
32.6M
    {
117
32.6M
        if (nElementToken == FastToken::DONTKNOW)
118
16.1M
        {
119
16.1M
            moNamespace = aNamespace;
120
16.1M
            moElementName = aElementName;
121
16.1M
        }
122
32.6M
    }
123
};
124
125
struct ParserData
126
{
127
    css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
128
    rtl::Reference<FastTokenHandlerBase>                       mxTokenHandler;
129
    css::uno::Reference< css::xml::sax::XErrorHandler >        mxErrorHandler;
130
    css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
131
132
    ParserData();
133
};
134
135
struct NamespaceDefine
136
{
137
    OString     maPrefix;
138
    sal_Int32   mnToken;
139
    OUString    maNamespaceURL;
140
141
    NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL )
142
1.29M
        : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {}
143
12.7M
    NamespaceDefine() : mnToken(-1) {}
144
};
145
146
// Entity binds all information needed for a single file | single call of parseStream
147
struct Entity : public ParserData
148
{
149
    // Amount of work producer sends to consumer in one iteration:
150
    static const size_t mnEventListSize = 1000;
151
152
    // unique for each Entity instance:
153
154
    // Number of valid events in mxProducedEvents:
155
    size_t mnProducedEventsSize;
156
    std::optional<EventList> mxProducedEvents;
157
    std::queue<EventList> maPendingEvents;
158
    std::queue<EventList> maUsedEvents;
159
    std::mutex maEventProtector;
160
161
    static const size_t mnEventLowWater = 4;
162
    static const size_t mnEventHighWater = 8;
163
    osl::Condition maConsumeResume;
164
    osl::Condition maProduceResume;
165
    // Event we use to store data if threading is disabled:
166
    Event maSharedEvent;
167
168
    // copied in copy constructor:
169
170
    // Allow to disable threading for small documents:
171
    bool                                    mbEnableThreads;
172
    css::xml::sax::InputSource              maStructSource;
173
    xmlParserCtxtPtr                        mpParser;
174
    ::sax_expatwrap::XMLFile2UTFConverter   maConverter;
175
176
    // Exceptions cannot be thrown through the C-XmlParser (possible
177
    // resource leaks), therefore any exception thrown by a UNO callback
178
    // must be saved somewhere until the C-XmlParser is stopped.
179
    css::uno::Any                           maSavedException;
180
    std::mutex                              maSavedExceptionMutex;
181
    void saveException( const Any & e );
182
    // Thread-safe check if maSavedException has value
183
    bool hasException();
184
    void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
185
                         bool mbDuringParse );
186
187
    std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
188
    /* Context for main thread consuming events.
189
     * startElement() stores the data, which characters() and endElement() uses
190
     */
191
    std::stack< SaxContext, std::vector<SaxContext> >  maContextStack;
192
    // Determines which elements of maNamespaceDefines are valid in current context
193
    std::stack< sal_uInt32, std::vector<sal_uInt32> >  maNamespaceCount;
194
    std::vector< NamespaceDefine >                     maNamespaceDefines;
195
196
    explicit Entity( const ParserData& rData );
197
    Entity( const Entity& rEntity ) = delete;
198
    Entity& operator=( const Entity& rEntity ) = delete;
199
    void startElement( Event const *pEvent );
200
    void characters( const OUString& sChars );
201
    void endElement();
202
    void processingInstruction( const OUString& rTarget, const OUString& rData );
203
    void transferUsedEvents();
204
    EventList& getEventList();
205
    Event& getEvent( CallbackType aType );
206
};
207
208
// Stuff for custom entity names
209
struct ReplacementPair
210
{
211
    OUString name;
212
    OUString replacement;
213
};
214
inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
215
0
{
216
0
    return lhs.name < rhs.name;
217
0
}
218
inline bool operator<(const ReplacementPair& lhs, const char* rhs)
219
0
{
220
0
    return lhs.name.compareToAscii(rhs) < 0;
221
0
}
222
223
} // namespace
224
225
namespace sax_fastparser {
226
227
class FastSaxParserImpl
228
{
229
public:
230
    explicit FastSaxParserImpl();
231
    ~FastSaxParserImpl();
232
233
private:
234
    std::vector<ReplacementPair> m_Replacements;
235
    std::vector<xmlEntityPtr> m_TemporalEntities;
236
237
public:
238
    // XFastParser
239
    /// @throws css::xml::sax::SAXException
240
    /// @throws css::io::IOException
241
    /// @throws css::uno::RuntimeException
242
    void parseStream( const css::xml::sax::InputSource& aInputSource );
243
    /// @throws css::uno::RuntimeException
244
    void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
245
    /// @throws css::uno::RuntimeException
246
    void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
247
    /// @throws css::lang::IllegalArgumentException
248
    /// @throws css::uno::RuntimeException
249
    void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
250
    /// @throws css::lang::IllegalArgumentException
251
    /// @throws css::uno::RuntimeException
252
    OUString const & getNamespaceURL( std::u16string_view rPrefix );
253
    /// @throws css::uno::RuntimeException
254
    void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
255
    /// @throws css::uno::RuntimeException
256
    void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
257
    // Fake DTD file
258
    void setCustomEntityNames(
259
       const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements);
260
261
    // called by the C callbacks of the expat parser
262
    void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
263
        int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
264
    void callbackEndElement();
265
    void callbackCharacters( const xmlChar* s, int nLen );
266
    void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
267
    xmlEntityPtr callbackGetEntity( const xmlChar *name );
268
269
    void pushEntity(const ParserData&, xml::sax::InputSource const&);
270
    void popEntity();
271
131M
    Entity& getEntity()             { return *mpTop; }
272
    void parse();
273
    void produce( bool bForceFlush = false );
274
    bool m_bIgnoreMissingNSDecl;
275
    bool m_bDisableThreadedParser;
276
    css::uno::Reference<css::container::XMap> mxMap; /// _ prefix string mapper for translation
277
278
private:
279
    bool consume(EventList&);
280
    void deleteUsedEvents();
281
    void sendPendingCharacters();
282
    void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
283
284
    sal_Int32 GetToken( const xmlChar* pName );
285
    /// @throws css::xml::sax::SAXException
286
    sal_Int32 GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName );
287
    /// @throws css::xml::sax::SAXException
288
    OUString const & GetNamespaceURL( std::string_view rPrefix );
289
    sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
290
    sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName );
291
    void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
292
293
private:
294
    std::mutex maMutex; ///< Protecting whole parseStream() execution
295
    ::rtl::Reference< FastLocatorImpl >     mxDocumentLocator;
296
    NamespaceMap                            maNamespaceMap;
297
298
    ParserData maData;                      /// Cached parser configuration for next call of parseStream().
299
300
    Entity *mpTop;                          /// std::stack::top() is amazingly slow => cache this.
301
    std::stack< Entity > maEntities;        /// Entity stack for each call of parseStream().
302
    std::vector<char> pendingCharacters;    /// Data from characters() callback that needs to be sent.
303
};
304
305
} // namespace sax_fastparser
306
307
namespace {
308
309
class ParserThread: public salhelper::Thread
310
{
311
    FastSaxParserImpl *mpParser;
312
public:
313
0
    explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
314
private:
315
    virtual void execute() override
316
0
    {
317
0
        try
318
0
        {
319
0
            mpParser->parse();
320
0
        }
321
0
        catch (...)
322
0
        {
323
0
            Entity &rEntity = mpParser->getEntity();
324
0
            rEntity.getEvent( CallbackType::EXCEPTION );
325
0
            mpParser->produce( true );
326
0
        }
327
0
    }
328
};
329
330
extern "C" {
331
332
static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
333
    int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
334
32.6M
{
335
32.6M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
336
32.6M
    pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
337
32.6M
}
338
339
static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
340
15.9M
{
341
15.9M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
342
15.9M
    pFastParser->callbackEndElement();
343
15.9M
}
344
345
static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
346
11.9M
{
347
11.9M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
348
11.9M
    pFastParser->callbackCharacters( s, nLen );
349
11.9M
}
350
351
static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
352
53.5k
{
353
53.5k
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
354
53.5k
    pFastParser->callbackProcessingInstruction( target, data );
355
53.5k
}
356
357
static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name)
358
55.1k
{
359
55.1k
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
360
55.1k
    return pFastParser->callbackGetEntity( name );
361
55.1k
}
362
363
}
364
365
class FastLocatorImpl : public WeakImplHelper< XLocator >
366
{
367
public:
368
322k
    explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
369
370
322k
    void dispose() { mpParser = nullptr; }
371
    /// @throws RuntimeException
372
1.09M
    void checkDispose() const { if( !mpParser ) throw DisposedException(); }
373
374
    //XLocator
375
    virtual sal_Int32 SAL_CALL getColumnNumber() override;
376
    virtual sal_Int32 SAL_CALL getLineNumber() override;
377
    virtual OUString SAL_CALL getPublicId() override;
378
    virtual OUString SAL_CALL getSystemId() override;
379
380
private:
381
    FastSaxParserImpl *mpParser;
382
};
383
384
sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
385
228k
{
386
228k
    checkDispose();
387
228k
    return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
388
228k
}
389
390
sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
391
318k
{
392
318k
    checkDispose();
393
318k
    return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
394
318k
}
395
396
OUString SAL_CALL FastLocatorImpl::getPublicId()
397
228k
{
398
228k
    checkDispose();
399
228k
    return mpParser->getEntity().maStructSource.sPublicId;
400
228k
}
401
402
OUString SAL_CALL FastLocatorImpl::getSystemId()
403
318k
{
404
318k
    checkDispose();
405
318k
    return mpParser->getEntity().maStructSource.sSystemId;
406
318k
}
407
408
ParserData::ParserData()
409
322k
{}
410
411
Entity::Entity(const ParserData& rData)
412
219k
    : ParserData(rData)
413
219k
    , mnProducedEventsSize(0)
414
219k
    , mbEnableThreads(false)
415
219k
    , mpParser(nullptr)
416
219k
{
417
219k
}
418
419
void Entity::startElement( Event const *pEvent )
420
32.6M
{
421
32.6M
    const sal_Int32& nElementToken = pEvent->mnElementToken;
422
32.6M
    const OUString& aNamespace = pEvent->msNamespace;
423
32.6M
    const OUString& aElementName = pEvent->msElementName;
424
425
    // Use un-wrapped pointers to avoid significant acquire/release overhead
426
32.6M
    XFastContextHandler *pParentContext = nullptr;
427
32.6M
    if( !maContextStack.empty() )
428
32.4M
    {
429
32.4M
        pParentContext = maContextStack.top().mxContext.get();
430
32.4M
        if( !pParentContext )
431
135k
        {
432
135k
            maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
433
135k
            return;
434
135k
        }
435
32.4M
    }
436
437
32.5M
    maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
438
439
32.5M
    try
440
32.5M
    {
441
32.5M
        const Reference< XFastAttributeList > xAttr( pEvent->mxAttributes );
442
32.5M
        Reference< XFastContextHandler > xContext;
443
444
32.5M
        if ( mxNamespaceHandler.is() )
445
20.8M
        {
446
20.8M
            const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
447
20.8M
            for (const auto& rNSDeclAttrib : NSDeclAttribs)
448
1.01M
            {
449
1.01M
                mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
450
1.01M
            }
451
20.8M
        }
452
453
32.5M
        if( nElementToken == FastToken::DONTKNOW )
454
16.1M
        {
455
16.1M
            if( pParentContext )
456
16.0M
                xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
457
37.9k
            else if( mxDocumentHandler.is() )
458
37.9k
                xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
459
460
16.1M
            if( xContext.is() )
461
16.1M
            {
462
16.1M
                xContext->startUnknownElement( aNamespace, aElementName, xAttr );
463
16.1M
            }
464
16.1M
        }
465
16.4M
        else
466
16.4M
        {
467
16.4M
            if( pParentContext )
468
16.2M
                xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
469
159k
            else if( mxDocumentHandler.is() )
470
162k
                xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
471
472
16.4M
            if( xContext.is() )
473
15.1M
                xContext->startFastElement( nElementToken, xAttr );
474
16.4M
        }
475
        // swap the reference we own in to avoid referencing thrash.
476
32.5M
        maContextStack.top().mxContext = std::move( xContext );
477
32.5M
    }
478
32.5M
    catch (...)
479
32.5M
    {
480
6.42k
        saveException( ::cppu::getCaughtException() );
481
6.42k
    }
482
32.5M
}
483
484
void Entity::characters( const OUString& sChars )
485
10.6M
{
486
10.6M
    if (maContextStack.empty())
487
368
    {
488
        // Malformed XML stream !?
489
368
        return;
490
368
    }
491
492
10.6M
    XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
493
10.6M
    if( pContext ) try
494
10.5M
    {
495
10.5M
        pContext->characters( sChars );
496
10.5M
    }
497
10.5M
    catch (...)
498
10.5M
    {
499
0
        saveException( ::cppu::getCaughtException() );
500
0
    }
501
10.6M
}
502
503
void Entity::endElement()
504
15.9M
{
505
15.9M
    if (maContextStack.empty())
506
2.11k
    {
507
        // Malformed XML stream !?
508
2.11k
        return;
509
2.11k
    }
510
511
15.9M
    const SaxContext& aContext = maContextStack.top();
512
15.9M
    XFastContextHandler* pContext( aContext.mxContext.get() );
513
15.9M
    if( pContext )
514
14.5M
        try
515
14.5M
        {
516
14.5M
            sal_Int32 nElementToken = aContext.mnElementToken;
517
14.5M
            if( nElementToken != FastToken::DONTKNOW )
518
13.5M
                pContext->endFastElement( nElementToken );
519
925k
            else
520
925k
                pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName );
521
14.5M
        }
522
14.5M
        catch (...)
523
14.5M
        {
524
54.4k
            saveException( ::cppu::getCaughtException() );
525
54.4k
        }
526
15.9M
    maContextStack.pop();
527
15.9M
}
528
529
void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
530
53.5k
{
531
53.5k
    if( mxDocumentHandler.is() ) try
532
53.5k
    {
533
53.5k
        mxDocumentHandler->processingInstruction( rTarget, rData );
534
53.5k
    }
535
53.5k
    catch (...)
536
53.5k
    {
537
0
        saveException( ::cppu::getCaughtException() );
538
0
    }
539
53.5k
}
540
541
void Entity::transferUsedEvents()
542
0
{
543
0
    std::unique_lock aGuard(maEventProtector);
544
0
    if (!maUsedEvents.empty())
545
0
    {
546
0
        mxProducedEvents = std::move(maUsedEvents.front());
547
0
        maUsedEvents.pop();
548
0
        aGuard.unlock(); // unlock
549
0
        mnProducedEventsSize = 0;
550
0
    }
551
0
}
552
553
EventList& Entity::getEventList()
554
0
{
555
0
    if (!mxProducedEvents)
556
0
    {
557
0
        transferUsedEvents();
558
0
        if (!mxProducedEvents)
559
0
        {
560
0
            mxProducedEvents.emplace();
561
0
            mxProducedEvents->maEvents.resize(mnEventListSize);
562
0
            mxProducedEvents->mbIsAttributesEmpty = false;
563
0
            mnProducedEventsSize = 0;
564
0
        }
565
0
    }
566
0
    return *mxProducedEvents;
567
0
}
568
569
Event& Entity::getEvent( CallbackType aType )
570
48.7M
{
571
48.7M
    if (!mbEnableThreads)
572
48.7M
        return maSharedEvent;
573
574
18.4E
    EventList& rEventList = getEventList();
575
18.4E
    if (mnProducedEventsSize == rEventList.maEvents.size())
576
0
    {
577
0
        SAL_WARN_IF(!maSavedException.hasValue(), "sax",
578
0
            "Event vector should only exceed " << mnEventListSize <<
579
0
            " temporarily while an exception is pending");
580
0
        rEventList.maEvents.resize(mnProducedEventsSize + 1);
581
0
    }
582
18.4E
    Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
583
18.4E
    rEvent.maType = aType;
584
18.4E
    return rEvent;
585
18.4E
}
586
587
OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine )
588
90.4k
{
589
90.4k
    const char* pMessage;
590
90.4k
    const xmlError* error = xmlCtxtGetLastError( ctxt );
591
90.4k
    if( error && error->message )
592
89.2k
        pMessage = error->message;
593
1.17k
    else
594
1.17k
        pMessage = "unknown error";
595
90.4k
    return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " +
596
90.4k
           OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US);
597
90.4k
}
598
599
// throw an exception, but avoid callback if
600
// during a threaded produce
601
void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
602
                             bool mbDuringParse )
603
90.4k
{
604
    // Error during parsing !
605
90.4k
    Any savedException;
606
90.4k
    {
607
90.4k
        std::scoped_lock g(maSavedExceptionMutex);
608
90.4k
        if (maSavedException.hasValue())
609
4.12k
        {
610
4.12k
            savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
611
4.12k
        }
612
90.4k
    }
613
90.4k
    SAXParseException aExcept(
614
90.4k
        lclGetErrorMessage( mpParser,
615
90.4k
                            xDocumentLocator->getSystemId(),
616
90.4k
                            xDocumentLocator->getLineNumber() ),
617
90.4k
        Reference< XInterface >(),
618
90.4k
        savedException,
619
90.4k
        xDocumentLocator->getPublicId(),
620
90.4k
        xDocumentLocator->getSystemId(),
621
90.4k
        xDocumentLocator->getLineNumber(),
622
90.4k
        xDocumentLocator->getColumnNumber()
623
90.4k
    );
624
625
    // error handler is set, it may throw the exception
626
90.4k
    if( !mbDuringParse || !mbEnableThreads )
627
90.4k
    {
628
90.4k
        if (mxErrorHandler.is() )
629
0
            mxErrorHandler->fatalError( Any( aExcept ) );
630
90.4k
    }
631
632
    // error handler has not thrown, but parsing must stop => throw ourselves
633
90.4k
    throw aExcept;
634
90.4k
}
635
636
// In the single threaded case we emit events via our C
637
// callbacks, so any exception caught must be queued up until
638
// we can safely re-throw it from our C++ parent of parse()
639
640
// If multi-threaded, we need to push an EXCEPTION event, at
641
// which point we transfer ownership of maSavedException to
642
// the consuming thread.
643
void Entity::saveException( const Any & e )
644
63.3k
{
645
    // fdo#81214 - allow the parser to run on after an exception,
646
    // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
647
    // for XComponent; and yet expect to continue parsing.
648
63.3k
    SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
649
63.3k
    std::scoped_lock g(maSavedExceptionMutex);
650
63.3k
    if (maSavedException.hasValue())
651
59.2k
    {
652
59.2k
        SAL_INFO("sax.fastparser", "discarding exception, already have one");
653
59.2k
    }
654
4.12k
    else
655
4.12k
    {
656
4.12k
        maSavedException = e;
657
4.12k
    }
658
63.3k
}
659
660
bool Entity::hasException()
661
361k
{
662
361k
    std::scoped_lock g(maSavedExceptionMutex);
663
361k
    return maSavedException.hasValue();
664
361k
}
665
666
} // namespace
667
668
namespace sax_fastparser {
669
670
FastSaxParserImpl::FastSaxParserImpl() :
671
322k
    m_bIgnoreMissingNSDecl(false),
672
322k
    m_bDisableThreadedParser(false),
673
322k
    mpTop(nullptr)
674
322k
{
675
322k
    mxDocumentLocator.set( new FastLocatorImpl( this ) );
676
322k
}
677
678
FastSaxParserImpl::~FastSaxParserImpl()
679
322k
{
680
322k
    if( mxDocumentLocator.is() )
681
322k
        mxDocumentLocator->dispose();
682
322k
    for (auto& entity : m_TemporalEntities)
683
0
    {
684
0
        if (!entity)
685
0
            continue;
686
0
        xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity);
687
0
        xmlUnlinkNode(pPtr);
688
0
        xmlFreeNode(pPtr);
689
0
    }
690
322k
}
691
692
void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
693
1.29M
{
694
1.29M
    Entity& rEntity = getEntity();
695
1.29M
    assert(!rEntity.maNamespaceCount.empty()); // need a context!
696
697
1.29M
    sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
698
1.29M
    if( rEntity.maNamespaceDefines.size() <= nOffset )
699
198k
        rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
700
701
1.29M
    rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
702
1.29M
}
703
704
sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName)
705
50.8M
{
706
50.8M
    return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(),
707
50.8M
                                                    XML_CAST( pName ) ); // uses utf-8
708
50.8M
}
709
710
sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName )
711
18.6M
{
712
18.6M
    Entity& rEntity = getEntity();
713
18.6M
    if (rEntity.maNamespaceCount.empty())
714
0
        return FastToken::DONTKNOW;
715
716
18.6M
    sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
717
182M
    while( nNamespace-- )
718
180M
    {
719
180M
        const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
720
180M
        if( rNamespaceDefine.maPrefix == sPrefix )
721
17.6M
            return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName);
722
180M
    }
723
724
1.04M
    if (!m_bIgnoreMissingNSDecl)
725
2.49k
        throw SAXException("No namespace defined for " + OStringToOUString(sPrefix,
726
2.49k
            RTL_TEXTENCODING_UTF8), {}, {});
727
728
1.04M
    return FastToken::DONTKNOW;
729
1.04M
}
730
731
sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
732
21.0M
{
733
21.0M
    NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
734
21.0M
    if( aIter != maNamespaceMap.end() )
735
1.00M
        return (*aIter).second;
736
20.0M
    else
737
20.0M
        return FastToken::DONTKNOW;
738
21.0M
}
739
740
OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix )
741
0
{
742
0
    Entity& rEntity = getEntity();
743
0
    if( !rEntity.maNamespaceCount.empty() )
744
0
    {
745
0
        sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
746
0
        while( nNamespace-- )
747
0
            if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
748
0
                return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
749
0
    }
750
751
0
    throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
752
0
            Reference< XInterface >(), Any());
753
0
}
754
755
sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName )
756
23.0M
{
757
23.0M
    if( nNamespaceToken != FastToken::DONTKNOW )
758
22.2M
    {
759
22.2M
        sal_Int32 nNameToken = GetToken( pName );
760
22.2M
        if( nNameToken != FastToken::DONTKNOW )
761
21.1M
            return nNamespaceToken | nNameToken;
762
22.2M
    }
763
764
1.89M
    return FastToken::DONTKNOW;
765
23.0M
}
766
767
namespace
768
{
769
    class ParserCleanup
770
    {
771
    private:
772
        FastSaxParserImpl& m_rParser;
773
        Entity& m_rEntity;
774
        rtl::Reference<ParserThread> m_xParser;
775
    public:
776
        ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
777
219k
            : m_rParser(rParser)
778
219k
            , m_rEntity(rEntity)
779
219k
        {
780
219k
        }
781
        ~ParserCleanup()
782
219k
        {
783
219k
            if (m_rEntity.mpParser)
784
213k
            {
785
213k
                if (m_rEntity.mpParser->myDoc)
786
1.57k
                    xmlFreeDoc(m_rEntity.mpParser->myDoc);
787
213k
                xmlFreeParserCtxt(m_rEntity.mpParser);
788
213k
            }
789
219k
            joinThread();
790
219k
            m_rParser.popEntity();
791
219k
        }
792
        void setThread(const rtl::Reference<ParserThread> &xParser)
793
0
        {
794
0
            m_xParser = xParser;
795
0
        }
796
        void joinThread()
797
219k
        {
798
219k
            if (m_xParser.is())
799
0
            {
800
0
                rtl::Reference<ParserThread> xToJoin = m_xParser;
801
0
                m_xParser.clear();
802
0
                xToJoin->join();
803
0
            }
804
219k
        }
805
    };
806
}
807
/***************
808
*
809
* parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
810
* the file-specific initialization work. (During a parser run, external files may be opened)
811
*
812
****************/
813
void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
814
235k
{
815
235k
    xmlInitParser();
816
817
    // Only one text at one time
818
235k
    std::unique_lock guard( maMutex );
819
820
235k
    pushEntity(maData, rStructSource);
821
235k
    Entity& rEntity = getEntity();
822
235k
    ParserCleanup aEnsureFree(*this, rEntity);
823
824
    // start the document
825
235k
    if( rEntity.mxDocumentHandler.is() )
826
219k
    {
827
219k
        rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator );
828
219k
        rEntity.mxDocumentHandler->startDocument();
829
219k
    }
830
831
#ifdef EMSCRIPTEN
832
    rEntity.mbEnableThreads = false;
833
#else
834
235k
    if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
835
0
    {
836
0
        Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
837
        // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
838
0
        rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
839
0
                || (rEntity.maStructSource.aInputStream->available() > 10000);
840
0
    }
841
235k
#endif
842
843
235k
    if (rEntity.mbEnableThreads)
844
0
    {
845
0
        rtl::Reference<ParserThread> xParser = new ParserThread(this);
846
0
        xParser->launch();
847
0
        aEnsureFree.setThread(xParser);
848
0
        bool done = false;
849
0
        do {
850
0
            rEntity.maConsumeResume.wait();
851
0
            rEntity.maConsumeResume.reset();
852
853
0
            std::unique_lock aGuard(rEntity.maEventProtector);
854
0
            while (!rEntity.maPendingEvents.empty())
855
0
            {
856
0
                if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
857
0
                    rEntity.maProduceResume.set(); // start producer again
858
859
0
                EventList aEventList = std::move(rEntity.maPendingEvents.front());
860
0
                rEntity.maPendingEvents.pop();
861
0
                aGuard.unlock(); // unlock
862
863
0
                if (!consume(aEventList))
864
0
                    done = true;
865
866
0
                aGuard.lock(); // lock
867
868
0
                if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
869
0
                {
870
0
                    aGuard.unlock();
871
0
                    for (auto& rEvent : aEventList.maEvents)
872
0
                    {
873
0
                        if (rEvent.mxAttributes.is())
874
0
                        {
875
0
                            rEvent.mxAttributes->clear();
876
0
                            if( rEntity.mxNamespaceHandler.is() )
877
0
                                rEvent.mxDeclAttributes->clear();
878
0
                        }
879
0
                        aEventList.mbIsAttributesEmpty = true;
880
0
                    }
881
0
                    aGuard.lock();
882
0
                }
883
884
0
                rEntity.maUsedEvents.push(std::move(aEventList));
885
0
            }
886
0
        } while (!done);
887
0
        aEnsureFree.joinThread();
888
0
        deleteUsedEvents();
889
890
        // callbacks used inside XML_Parse may have caught an exception No need
891
        // to lock maSavedExceptionMutex here because parser thread is joined.
892
        // coverity[missing_lock : SUPPRESS] 2024.6.1
893
0
        if( rEntity.maSavedException.hasValue() )
894
0
            rEntity.throwException( mxDocumentLocator, true );
895
0
    }
896
235k
    else
897
235k
    {
898
235k
        parse();
899
235k
    }
900
901
    // finish document
902
235k
    if( rEntity.mxDocumentHandler.is() )
903
123k
    {
904
123k
        rEntity.mxDocumentHandler->endDocument();
905
123k
    }
906
235k
}
907
908
void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
909
397k
{
910
397k
    maData.mxDocumentHandler = Handler;
911
397k
}
912
913
void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
914
322k
{
915
322k
    assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" );
916
322k
    maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
917
322k
}
918
919
void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
920
18.6M
{
921
18.6M
    if( NamespaceToken < FastToken::NAMESPACE )
922
0
        throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0);
923
924
18.6M
    if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
925
18.6M
    {
926
18.6M
        maNamespaceMap[ NamespaceURL ] = NamespaceToken;
927
18.6M
        return;
928
18.6M
    }
929
0
    throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0);
930
18.6M
}
931
932
OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix )
933
0
{
934
0
    try
935
0
    {
936
0
        return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
937
0
    }
938
0
    catch (const Exception&)
939
0
    {
940
0
    }
941
0
    throw IllegalArgumentException();
942
0
}
943
944
void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
945
0
{
946
0
    maData.mxErrorHandler = Handler;
947
0
}
948
949
void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
950
117k
{
951
117k
    maData.mxNamespaceHandler = Handler;
952
117k
}
953
954
void FastSaxParserImpl::setCustomEntityNames(
955
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
956
0
{
957
0
    m_Replacements.resize(replacements.size());
958
0
    for (size_t i = 0; i < replacements.size(); ++i)
959
0
    {
960
0
        m_Replacements[i].name = replacements[i].First;
961
0
        m_Replacements[i].replacement = replacements[i].Second;
962
0
    }
963
0
    if (m_Replacements.size() > 1)
964
0
        std::sort(m_Replacements.begin(), m_Replacements.end());
965
0
}
966
967
void FastSaxParserImpl::deleteUsedEvents()
968
0
{
969
0
    Entity& rEntity = getEntity();
970
0
    std::unique_lock aGuard(rEntity.maEventProtector);
971
972
0
    while (!rEntity.maUsedEvents.empty())
973
0
    {
974
0
        { // the block makes sure that aEventList is destructed outside the lock
975
0
            EventList aEventList = std::move(rEntity.maUsedEvents.front());
976
0
            rEntity.maUsedEvents.pop();
977
978
0
            aGuard.unlock(); // unlock
979
0
        }
980
981
0
        aGuard.lock(); // lock
982
0
    }
983
0
}
984
985
void FastSaxParserImpl::produce( bool bForceFlush )
986
0
{
987
0
    Entity& rEntity = getEntity();
988
0
    if (!(bForceFlush ||
989
0
        rEntity.mnProducedEventsSize >= Entity::mnEventListSize))
990
0
        return;
991
992
0
    std::unique_lock aGuard(rEntity.maEventProtector);
993
994
0
    while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
995
0
    { // pause parsing for a bit
996
0
        aGuard.unlock(); // unlock
997
0
        rEntity.maProduceResume.wait();
998
0
        rEntity.maProduceResume.reset();
999
0
        aGuard.lock(); // lock
1000
0
    }
1001
1002
0
    rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents));
1003
1004
0
    aGuard.unlock(); // unlock
1005
1006
0
    rEntity.mxProducedEvents.reset();
1007
0
    assert(!rEntity.mxProducedEvents);
1008
1009
0
    rEntity.maConsumeResume.set();
1010
0
}
1011
1012
bool FastSaxParserImpl::consume(EventList& rEventList)
1013
0
{
1014
0
    Entity& rEntity = getEntity();
1015
0
    rEventList.mbIsAttributesEmpty = false;
1016
0
    for (auto& rEvent : rEventList.maEvents)
1017
0
    {
1018
0
        switch (rEvent.maType)
1019
0
        {
1020
0
            case CallbackType::START_ELEMENT:
1021
0
                rEntity.startElement( &rEvent );
1022
0
                break;
1023
0
            case CallbackType::END_ELEMENT:
1024
0
                rEntity.endElement();
1025
0
                break;
1026
0
            case CallbackType::CHARACTERS:
1027
0
                rEntity.characters( rEvent.msChars );
1028
0
                break;
1029
0
            case CallbackType::PROCESSING_INSTRUCTION:
1030
0
                rEntity.processingInstruction(
1031
0
                    rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
1032
0
                break;
1033
0
            case CallbackType::DONE:
1034
0
                return false;
1035
0
            case CallbackType::EXCEPTION:
1036
0
                rEntity.throwException( mxDocumentLocator, false );
1037
0
                [[fallthrough]]; // avoid unreachable code warning with some compilers
1038
0
            default:
1039
0
                assert(false);
1040
0
                return false;
1041
0
        }
1042
0
    }
1043
0
    return true;
1044
0
}
1045
1046
void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
1047
        xml::sax::InputSource const& rSource)
1048
235k
{
1049
235k
    if (!rSource.aInputStream.is())
1050
16.6k
        throw SAXException(u"No input source"_ustr, Reference<XInterface>(), Any());
1051
1052
219k
    maEntities.emplace(rEntityData);
1053
219k
    mpTop = &maEntities.top();
1054
1055
219k
    mpTop->maStructSource = rSource;
1056
1057
219k
    mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
1058
219k
    if (!mpTop->maStructSource.sEncoding.isEmpty())
1059
0
    {
1060
0
        mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
1061
0
    }
1062
219k
}
1063
1064
void FastSaxParserImpl::popEntity()
1065
219k
{
1066
219k
    maEntities.pop();
1067
219k
    mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
1068
219k
}
1069
1070
// starts parsing with actual parser !
1071
void FastSaxParserImpl::parse()
1072
219k
{
1073
219k
    const int BUFFER_SIZE = 16 * 1024;
1074
219k
    Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
1075
1076
219k
    Entity& rEntity = getEntity();
1077
1078
    // set all necessary C-Callbacks
1079
219k
    static xmlSAXHandler callbacks;
1080
219k
    callbacks.startElementNs = call_callbackStartElement;
1081
219k
    callbacks.endElementNs = call_callbackEndElement;
1082
219k
    callbacks.characters = call_callbackCharacters;
1083
219k
    callbacks.processingInstruction = call_callbackProcessingInstruction;
1084
219k
    callbacks.getEntity = call_callbackGetEntity;
1085
219k
    callbacks.initialized = XML_SAX2_MAGIC;
1086
219k
    int nRead = 0;
1087
219k
    do
1088
456k
    {
1089
456k
        nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
1090
456k
        if( nRead <= 0 )
1091
203k
        {
1092
203k
            if( rEntity.mpParser != nullptr )
1093
203k
            {
1094
203k
                if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
1095
79.2k
                    rEntity.throwException( mxDocumentLocator, true );
1096
203k
                if (rEntity.hasException())
1097
847
                    rEntity.throwException(mxDocumentLocator, true);
1098
203k
            }
1099
203k
            break;
1100
203k
        }
1101
1102
252k
        bool bContinue = true;
1103
252k
        if( rEntity.mpParser == nullptr )
1104
213k
        {
1105
            // create parser with proper encoding (needs the first chunk of data)
1106
213k
            rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
1107
213k
                reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
1108
213k
            if( !rEntity.mpParser )
1109
0
                throw SAXException(u"Couldn't create parser"_ustr, Reference< XInterface >(), Any() );
1110
1111
            // Tell libxml2 parser to decode entities in attribute values.
1112
            // Also allow XML attribute values which are larger than 10MB, because this used to work
1113
            // with expat.
1114
            // coverity[unsafe_xml_parse_config] - entity support is required
1115
213k
            xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE);
1116
213k
        }
1117
39.2k
        else
1118
39.2k
        {
1119
39.2k
            bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
1120
39.2k
                            == XML_ERR_OK;
1121
39.2k
        }
1122
1123
        // callbacks used inside XML_Parse may have caught an exception
1124
252k
        if (!bContinue)
1125
9.82k
        {
1126
9.82k
            rEntity.throwException( mxDocumentLocator, true );
1127
9.82k
        }
1128
252k
        if (rEntity.hasException())
1129
475
        {
1130
475
            rEntity.throwException( mxDocumentLocator, true );
1131
475
        }
1132
252k
    } while( nRead > 0 );
1133
219k
    rEntity.getEvent( CallbackType::DONE );
1134
219k
    if( rEntity.mbEnableThreads )
1135
0
        produce( true );
1136
219k
}
1137
1138
// The C-Callbacks
1139
void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
1140
    int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
1141
32.6M
{
1142
32.6M
    if (!pendingCharacters.empty())
1143
7.70M
        sendPendingCharacters();
1144
32.6M
    Entity& rEntity = getEntity();
1145
32.6M
    if( rEntity.maNamespaceCount.empty() )
1146
197k
    {
1147
197k
        rEntity.maNamespaceCount.push(0);
1148
197k
        DefineNamespace( "xml"_ostr, u"http://www.w3.org/XML/1998/namespace"_ustr);
1149
197k
    }
1150
32.4M
    else
1151
32.4M
    {
1152
32.4M
        rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
1153
32.4M
    }
1154
1155
    // create attribute map and process namespace instructions
1156
32.6M
    Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
1157
32.6M
    bool bIsAttributesEmpty = false;
1158
32.6M
    if ( rEntity.mbEnableThreads )
1159
0
        bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
1160
1161
32.6M
    if (rEvent.mxAttributes.is())
1162
32.4M
    {
1163
32.4M
        if( !bIsAttributesEmpty )
1164
32.4M
            rEvent.mxAttributes->clear();
1165
32.4M
    }
1166
197k
    else
1167
197k
        rEvent.mxAttributes.set(
1168
197k
                new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1169
1170
32.6M
    if( rEntity.mxNamespaceHandler.is() )
1171
20.8M
    {
1172
20.8M
        if (rEvent.mxDeclAttributes.is())
1173
20.7M
        {
1174
20.7M
            if( !bIsAttributesEmpty )
1175
20.7M
                rEvent.mxDeclAttributes->clear();
1176
20.7M
        }
1177
88.1k
        else
1178
88.1k
            rEvent.mxDeclAttributes.set(
1179
88.1k
                new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1180
20.8M
    }
1181
1182
32.6M
    OUString sNamespace;
1183
32.6M
    sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
1184
32.6M
    if (!rEntity.maNamespaceStack.empty())
1185
32.4M
    {
1186
32.4M
        sNamespace = rEntity.maNamespaceStack.top().msName;
1187
32.4M
        nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
1188
32.4M
    }
1189
1190
32.6M
    try
1191
32.6M
    {
1192
        /*  #158414# Each element may define new namespaces, also for attributes.
1193
            First, process all namespaces, second, process the attributes after namespaces
1194
            have been initialized. */
1195
1196
32.6M
        std::string_view sPrefix; // convert to string_view so we only do strlen() once.
1197
32.6M
        if (prefix != nullptr)
1198
11.2M
            sPrefix = XML_CAST(prefix);
1199
        // #158414# first: get namespaces
1200
33.9M
        for (int i = 0; i < numNamespaces * 2; i += 2)
1201
1.32M
        {
1202
            // namespaces[] is (prefix/URI)
1203
1.32M
            if( namespaces[ i ] != nullptr )
1204
1.10M
            {
1205
1.10M
                OString aPrefix( XML_CAST( namespaces[ i ] ));
1206
1.10M
                OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1207
1.10M
                NormalizeURI( namespaceURL );
1208
1.10M
                DefineNamespace(aPrefix, namespaceURL);
1209
1.10M
                if( rEntity.mxNamespaceHandler.is() )
1210
857k
                    rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1211
1.10M
            }
1212
225k
            else
1213
225k
            {
1214
                // default namespace
1215
225k
                sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1216
225k
                NormalizeURI( sNamespace );
1217
225k
                nNamespaceToken = GetNamespaceToken( sNamespace );
1218
225k
                if( rEntity.mxNamespaceHandler.is() )
1219
159k
                    rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1220
225k
            }
1221
1.32M
        }
1222
1223
32.6M
        if ( rEntity.mxTokenHandler.is() )
1224
32.6M
        {
1225
            // #158414# second: fill attribute list with other attributes
1226
32.6M
            rEvent.mxAttributes->reserve( numAttributes );
1227
52.6M
            for (int i = 0; i < numAttributes * 5; i += 5)
1228
20.0M
            {
1229
                // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1230
20.0M
                if( attributes[ i + 1 ] != nullptr )
1231
7.40M
                {
1232
7.40M
                    sal_Int32 nAttributeToken = GetTokenWithPrefix(XML_CAST(attributes[ i + 1 ]), attributes[ i ]);
1233
7.40M
                    if( nAttributeToken != FastToken::DONTKNOW )
1234
6.28M
                        rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1235
1.12M
                    else
1236
1.12M
                        addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1237
7.40M
                }
1238
12.6M
                else
1239
12.6M
                {
1240
12.6M
                    sal_Int32 nAttributeToken = GetToken(attributes[ i ]);
1241
12.6M
                    if( nAttributeToken != FastToken::DONTKNOW )
1242
12.2M
                        rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1243
416k
                    else
1244
416k
                    {
1245
416k
                        SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" <<
1246
416k
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1247
416k
                        rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1248
416k
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1249
416k
                    }
1250
12.6M
                }
1251
20.0M
            }
1252
1253
32.6M
            if( !sPrefix.empty() )
1254
11.2M
                rEvent.mnElementToken = GetTokenWithPrefix(sPrefix, localName);
1255
21.3M
            else if( !sNamespace.isEmpty() )
1256
5.42M
                rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName);
1257
15.9M
            else
1258
15.9M
                rEvent.mnElementToken = GetToken(localName);
1259
32.6M
        }
1260
18.4E
        else
1261
18.4E
        {
1262
18.4E
            for (int i = 0; i < numAttributes * 5; i += 5)
1263
0
            {
1264
0
                if( attributes[ i + 1 ] != nullptr )
1265
0
                    addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1266
0
                else
1267
0
                    rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1268
0
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1269
0
            }
1270
1271
18.4E
            rEvent.mnElementToken = FastToken::DONTKNOW;
1272
18.4E
        }
1273
1274
32.6M
        if( rEvent.mnElementToken == FastToken::DONTKNOW )
1275
16.1M
        {
1276
16.1M
            OUString aElementPrefix;
1277
16.1M
            if( !sPrefix.empty() )
1278
835k
            {
1279
835k
                aElementPrefix = OUString( sPrefix.data(), sPrefix.size(), RTL_TEXTENCODING_UTF8 );
1280
835k
                if ( URI != nullptr )
1281
286k
                    sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
1282
549k
                else if ( m_bIgnoreMissingNSDecl )
1283
549k
                    sNamespace.clear();
1284
0
                else
1285
0
                    throw SAXException("No namespace defined for " + aElementPrefix, {}, {});
1286
835k
                nNamespaceToken = GetNamespaceToken( sNamespace );
1287
835k
            }
1288
16.1M
            OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
1289
16.1M
            rEvent.msNamespace = sNamespace;
1290
16.1M
            if( aElementPrefix.isEmpty() )
1291
15.2M
                rEvent.msElementName = std::move(aElementLocalName);
1292
835k
            else
1293
835k
                rEvent.msElementName = aElementPrefix + ":" + aElementLocalName;
1294
16.1M
        }
1295
16.5M
        else // token is always preferred.
1296
16.5M
            rEvent.msElementName.clear();
1297
1298
32.6M
        rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
1299
32.6M
        if (rEntity.mbEnableThreads)
1300
0
            produce();
1301
32.6M
        else
1302
32.6M
        {
1303
32.6M
            SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
1304
32.6M
            rEntity.startElement( &rEvent );
1305
32.6M
        }
1306
32.6M
    }
1307
32.6M
    catch (...)
1308
32.6M
    {
1309
2.49k
        rEntity.saveException( ::cppu::getCaughtException() );
1310
2.49k
    }
1311
32.6M
}
1312
1313
void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
1314
1.11M
{
1315
1.11M
    OUString aNamespaceURI;
1316
1.11M
    if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
1317
628k
        aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
1318
1.11M
    const OString aPrefix( XML_CAST( attributes[ i + 1 ] ));
1319
1.11M
    const OString aLocalName( XML_CAST( attributes[ i ] ));
1320
1.11M
    OString aQualifiedName = (aPrefix.isEmpty())? aLocalName : aPrefix + ":" + aLocalName;
1321
1.11M
    xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
1322
1.11M
        OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1323
1.11M
    SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI);
1324
1.11M
}
1325
1326
void FastSaxParserImpl::callbackEndElement()
1327
15.9M
{
1328
15.9M
    if (!pendingCharacters.empty())
1329
2.89M
        sendPendingCharacters();
1330
15.9M
    Entity& rEntity = getEntity();
1331
15.9M
    SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
1332
15.9M
    if( !rEntity.maNamespaceCount.empty() )
1333
15.9M
        rEntity.maNamespaceCount.pop();
1334
1335
15.9M
    SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
1336
15.9M
    if( !rEntity.maNamespaceStack.empty() )
1337
15.9M
        rEntity.maNamespaceStack.pop();
1338
1339
15.9M
    rEntity.getEvent( CallbackType::END_ELEMENT );
1340
15.9M
    if (rEntity.mbEnableThreads)
1341
0
        produce();
1342
15.9M
    else
1343
15.9M
        rEntity.endElement();
1344
15.9M
}
1345
1346
void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
1347
11.9M
{
1348
    // SAX interface allows that the characters callback splits content of one XML node
1349
    // (e.g. because there's an entity that needs decoding), however for consumers it's
1350
    // simpler FastSaxParser's character callback provides the whole string at once,
1351
    // so merge data from possible multiple calls and send them at once (before the element
1352
    // ends or another one starts).
1353
    //
1354
    // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1355
    // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1356
    // often in writer documents.
1357
11.9M
    int nOriginalLen = pendingCharacters.size();
1358
11.9M
    pendingCharacters.resize(nOriginalLen + nLen);
1359
11.9M
    memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
1360
11.9M
}
1361
1362
void FastSaxParserImpl::sendPendingCharacters()
1363
10.6M
{
1364
10.6M
    Entity& rEntity = getEntity();
1365
10.6M
    OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
1366
1367
10.6M
    if (sChars[0] == '_' && mxMap)
1368
0
        mxMap->get(uno::Any(sChars)) >>= sChars;
1369
1370
10.6M
    if (rEntity.mbEnableThreads)
1371
0
    {
1372
0
        Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
1373
0
        rEvent.msChars = std::move(sChars);
1374
0
        produce();
1375
0
    }
1376
10.6M
    else
1377
10.6M
        rEntity.characters( sChars );
1378
10.6M
    pendingCharacters.resize(0);
1379
10.6M
}
1380
1381
void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
1382
53.5k
{
1383
53.5k
    if (!pendingCharacters.empty())
1384
6.66k
        sendPendingCharacters();
1385
53.5k
    Entity& rEntity = getEntity();
1386
53.5k
    Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
1387
1388
    // This event is very rare, so no need to waste extra space for this
1389
    // Using namespace and element strings to be target and data in that order.
1390
53.5k
    rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
1391
53.5k
    if ( data != nullptr )
1392
47.2k
        rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
1393
6.33k
    else
1394
6.33k
        rEvent.msElementName.clear();
1395
1396
53.5k
    if (rEntity.mbEnableThreads)
1397
0
        produce();
1398
53.5k
    else
1399
53.5k
        rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
1400
53.5k
}
1401
1402
xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name )
1403
55.1k
{
1404
55.1k
    if( !name )
1405
0
        return xmlGetPredefinedEntity(name);
1406
55.1k
    const char* dname = XML_CAST(name);
1407
55.1k
    int lname = strlen(dname);
1408
55.1k
    if( lname == 0 )
1409
0
        return xmlGetPredefinedEntity(name);
1410
55.1k
    if (m_Replacements.size() > 0)
1411
0
    {
1412
0
        auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname);
1413
0
        if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0)
1414
0
        {
1415
0
            xmlEntityPtr entpt = xmlNewEntity(
1416
0
                nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1417
0
                BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr()));
1418
0
            m_TemporalEntities.push_back(entpt);
1419
0
            return entpt;
1420
0
        }
1421
0
    }
1422
55.1k
    if( lname < 2 )
1423
18.2k
        return xmlGetPredefinedEntity(name);
1424
36.9k
    if ( dname[0] == '#' )
1425
0
    {
1426
0
        sal_uInt32 cval = 0;
1427
0
        if( dname[1] == 'x' ||  dname[1] == 'X' )
1428
0
        {
1429
0
            if( lname < 3 )
1430
0
                return xmlGetPredefinedEntity(name);
1431
0
            cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) );
1432
0
            if( cval == 0 )
1433
0
                return xmlGetPredefinedEntity(name);
1434
0
            OUString vname( &cval, 1 );
1435
0
            xmlEntityPtr entpt
1436
0
                = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1437
0
                               BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1438
0
            m_TemporalEntities.push_back(entpt);
1439
0
            return entpt;
1440
0
        }
1441
0
        else
1442
0
        {
1443
0
            cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) );
1444
0
            if( cval == 0 )
1445
0
                return xmlGetPredefinedEntity(name);
1446
0
            OUString vname(&cval, 1);
1447
0
            xmlEntityPtr entpt
1448
0
                = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1449
0
                               BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1450
0
            m_TemporalEntities.push_back(entpt);
1451
0
            return entpt;
1452
0
        }
1453
0
    }
1454
36.9k
    return xmlGetPredefinedEntity(name);
1455
36.9k
}
1456
1457
322k
FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
1458
1459
FastSaxParser::~FastSaxParser()
1460
322k
{
1461
322k
}
1462
1463
void SAL_CALL
1464
FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
1465
130k
{
1466
130k
    if (!rArguments.hasElements())
1467
0
        return;
1468
1469
130k
    OUString str;
1470
130k
    if ( !(rArguments[0] >>= str) )
1471
0
        throw IllegalArgumentException();
1472
1473
130k
    auto opts = comphelper::string::split(str, ',');
1474
130k
    for (auto &s : opts)
1475
130k
    {
1476
130k
        if ( s == "IgnoreMissingNSDecl" )
1477
101k
            mpImpl->m_bIgnoreMissingNSDecl = true;
1478
28.7k
        else if ( s == "DoSmeplease" )
1479
0
            ; //just ignore as this is already immune to billion laughs
1480
28.7k
        else if ( s == "DisableThreadedParser" )
1481
28.7k
            mpImpl->m_bDisableThreadedParser = true;
1482
0
        else
1483
0
            throw IllegalArgumentException();
1484
130k
    }
1485
1486
130k
    if (rArguments.size() > 1)
1487
101k
        rArguments[1] >>= mpImpl->mxMap;
1488
130k
}
1489
1490
void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
1491
235k
{
1492
235k
    mpImpl->parseStream(aInputSource);
1493
235k
}
1494
1495
void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
1496
397k
{
1497
397k
    mpImpl->setFastDocumentHandler(Handler);
1498
397k
}
1499
1500
void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
1501
322k
{
1502
322k
    mpImpl->setTokenHandler(Handler);
1503
322k
}
1504
1505
void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
1506
18.6M
{
1507
18.6M
    mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
1508
18.6M
}
1509
1510
OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
1511
0
{
1512
0
    return mpImpl->getNamespaceURL(rPrefix);
1513
0
}
1514
1515
void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
1516
0
{
1517
0
    mpImpl->setErrorHandler(Handler);
1518
0
}
1519
1520
void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
1521
0
{
1522
    // not implemented
1523
0
}
1524
1525
void FastSaxParser::setLocale( const lang::Locale& )
1526
0
{
1527
    // not implemented
1528
0
}
1529
1530
void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
1531
117k
{
1532
117k
    mpImpl->setNamespaceHandler(Handler);
1533
117k
}
1534
1535
OUString FastSaxParser::getImplementationName()
1536
0
{
1537
0
    return u"com.sun.star.comp.extensions.xml.sax.FastParser"_ustr;
1538
0
}
1539
1540
void FastSaxParser::setCustomEntityNames(
1541
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
1542
0
{
1543
0
    mpImpl->setCustomEntityNames(replacements);
1544
0
}
1545
1546
sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
1547
0
{
1548
0
    return cppu::supportsService(this, ServiceName);
1549
0
}
1550
1551
uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
1552
0
{
1553
0
    return { u"com.sun.star.xml.sax.FastParser"_ustr };
1554
0
}
1555
1556
} // namespace sax_fastparser
1557
1558
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
1559
com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1560
    css::uno::XComponentContext *,
1561
    css::uno::Sequence<css::uno::Any> const &)
1562
151k
{
1563
151k
    return cppu::acquire(new FastSaxParser);
1564
151k
}
1565
1566
// ----------------------------------------------------------
1567
// copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases
1568
// for various dodgy namespace decls in the wild.
1569
1570
static bool NormalizeW3URI( OUString& rName );
1571
static bool NormalizeOasisURN( OUString& rName );
1572
1573
static void NormalizeURI( OUString& rName )
1574
1.32M
{
1575
    // try OASIS + W3 URI normalization
1576
1.32M
    bool bSuccess = NormalizeOasisURN( rName );
1577
1.32M
    if( ! bSuccess )
1578
1.07M
        NormalizeW3URI( rName );
1579
1.32M
}
1580
1581
constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/");
1582
constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms");
1583
constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms");
1584
constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg");
1585
constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
1586
constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format");
1587
constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
1588
constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/");
1589
constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20");
1590
constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
1591
constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc");
1592
constexpr OUStringLiteral XML_XMLNS(u"xmlns");
1593
constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument");
1594
constexpr OUStringLiteral XML_1_0(u"1.0");
1595
1596
static bool NormalizeW3URI( OUString& rName )
1597
1.07M
{
1598
    // check if URI matches:
1599
    // http://www.w3.org/[0-9]*/[:letter:]*
1600
    //                   (year)/(WG name)
1601
    // For the following WG/standards names:
1602
    // - xforms
1603
1604
1.07M
    bool bSuccess = false;
1605
1.07M
    const OUString sURIPrefix = XML_URI_W3_PREFIX;
1606
1.07M
    if( rName.startsWith( sURIPrefix ) )
1607
106k
    {
1608
106k
        const OUString sURISuffix = XML_URI_XFORMS_SUFFIX ;
1609
106k
        sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength();
1610
106k
        if( rName.subView( nCompareFrom ) == sURISuffix )
1611
8.58k
        {
1612
            // found W3 prefix, and xforms suffix
1613
8.58k
            rName = XML_N_XFORMS_1_0;
1614
8.58k
            bSuccess = true;
1615
8.58k
        }
1616
106k
    }
1617
1.07M
    return bSuccess;
1618
1.07M
}
1619
1620
static bool NormalizeOasisURN( OUString& rName )
1621
1.32M
{
1622
    // #i38644#
1623
    // we exported the wrong namespace for smil, so we correct this here on load
1624
    // for older documents
1625
1.32M
    if( rName == XML_N_SVG )
1626
124
    {
1627
124
        rName = XML_N_SVG_COMPAT;
1628
124
        return true;
1629
124
    }
1630
1.32M
    else if( rName == XML_N_FO )
1631
5
    {
1632
5
        rName = XML_N_FO_COMPAT;
1633
5
        return true;
1634
5
    }
1635
1.32M
    else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD  )
1636
0
    {
1637
0
        rName = XML_N_SMIL_COMPAT;
1638
0
        return true;
1639
0
    }
1640
1641
1642
    // Check if URN matches
1643
    // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]*
1644
    //                     |---|       |---| |-----|
1645
    //                     TC-Id      Sub-Id Version
1646
1647
1.32M
    sal_Int32 nNameLen = rName.getLength();
1648
    // :urn:oasis:names:tc.*
1649
1.32M
    const OUString aOasisURN = XML_URN_OASIS_NAMES_TC;
1650
1.32M
    if( !rName.startsWith( aOasisURN ) )
1651
1.02M
        return false;
1652
1653
    // :urn:oasis:names:tc:.*
1654
305k
    sal_Int32 nPos = aOasisURN.getLength();
1655
305k
    if( nPos >= nNameLen || rName[nPos] != ':' )
1656
3.78k
        return false;
1657
1658
    // :urn:oasis:names:tc:[^:]:.*
1659
301k
    sal_Int32 nTCIdStart = nPos+1;
1660
301k
    sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart );
1661
301k
    if( -1 == nTCIdEnd )
1662
1.93k
        return false;
1663
1664
    // :urn:oasis:names:tc:[^:]:xmlns.*
1665
299k
    nPos = nTCIdEnd + 1;
1666
299k
    std::u16string_view sTmp( rName.subView( nPos ) );
1667
299k
    const OUString aXMLNS = XML_XMLNS;
1668
299k
    if( !o3tl::starts_with(sTmp, aXMLNS ) )
1669
25.2k
        return false;
1670
1671
    // :urn:oasis:names:tc:[^:]:xmlns:.*
1672
274k
    nPos += aXMLNS.getLength();
1673
274k
    if( nPos >= nNameLen || rName[nPos] != ':' )
1674
2.76k
        return false;
1675
1676
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.*
1677
271k
    nPos = rName.indexOf( ':', nPos+1 );
1678
271k
    if( -1 == nPos )
1679
1.72k
        return false;
1680
1681
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]*
1682
269k
    sal_Int32 nVersionStart = nPos+1;
1683
269k
    if( nVersionStart+2 >= nNameLen ||
1684
269k
        -1 != rName.indexOf( ':', nVersionStart ) )
1685
4.05k
        return false;
1686
1687
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]*
1688
265k
    if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' )
1689
16.5k
        return false;
1690
1691
    // replace [tcid] with current TCID and version with current version.
1692
1693
249k
    rName = rName.subView( 0, nTCIdStart ) +
1694
249k
            XML_OPENDOCUMENT +
1695
249k
            rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) +
1696
249k
            XML_1_0;
1697
1698
249k
    return true;
1699
265k
}
1700
1701
1702
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */