Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/sax/source/fastparser/fastparser.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sax/fastparser.hxx>
21
#include <sax/fastattribs.hxx>
22
#include <utility>
23
#include <xml2utf.hxx>
24
25
#include <com/sun/star/io/XSeekable.hpp>
26
#include <com/sun/star/lang/DisposedException.hpp>
27
#include <com/sun/star/lang/IllegalArgumentException.hpp>
28
#include <com/sun/star/uno/XComponentContext.hpp>
29
#include <com/sun/star/xml/sax/FastToken.hpp>
30
#include <com/sun/star/xml/sax/SAXParseException.hpp>
31
#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
32
#include <cppuhelper/implbase.hxx>
33
#include <cppuhelper/supportsservice.hxx>
34
#include <cppuhelper/exc_hlp.hxx>
35
#include <osl/conditn.hxx>
36
#include <rtl/ref.hxx>
37
#include <sal/log.hxx>
38
#include <salhelper/thread.hxx>
39
#include <comphelper/diagnose_ex.hxx>
40
#include <o3tl/string_view.hxx>
41
42
#include <queue>
43
#include <memory>
44
#include <mutex>
45
#include <optional>
46
#include <stack>
47
#include <string_view>
48
#include <unordered_map>
49
#include <vector>
50
#include <cassert>
51
#include <cstring>
52
#include <libxml/parser.h>
53
54
// Inverse of libxml's BAD_CAST.
55
151M
#define XML_CAST( str ) reinterpret_cast< const char* >( str )
56
57
using namespace ::osl;
58
using namespace ::cppu;
59
using namespace ::com::sun::star::uno;
60
using namespace ::com::sun::star::lang;
61
using namespace ::com::sun::star::xml::sax;
62
using namespace ::com::sun::star::io;
63
using namespace com::sun::star;
64
using namespace sax_fastparser;
65
66
static void NormalizeURI( OUString& rName );
67
68
namespace {
69
70
struct Event;
71
class FastLocatorImpl;
72
struct NamespaceDefine;
73
struct Entity;
74
75
typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
76
77
struct EventList
78
{
79
    std::vector<Event> maEvents;
80
    bool mbIsAttributesEmpty;
81
};
82
83
enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
84
85
struct Event
86
{
87
    CallbackType maType;
88
    sal_Int32 mnElementToken;
89
    OUString msNamespace;
90
    OUString msElementName;
91
    rtl::Reference< FastAttributeList > mxAttributes;
92
    rtl::Reference< FastAttributeList > mxDeclAttributes;
93
    OUString msChars;
94
};
95
96
struct NameWithToken
97
{
98
    OUString msName;
99
    sal_Int32 mnToken;
100
101
    NameWithToken(OUString sName, sal_Int32 nToken) :
102
37.5M
        msName(std::move(sName)), mnToken(nToken) {}
103
};
104
105
struct SaxContext
106
{
107
    Reference< XFastContextHandler > mxContext;
108
    sal_Int32 mnElementToken;
109
    std::optional<OUString>  moNamespace;
110
    std::optional<OUString> moElementName;
111
112
    SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
113
37.5M
            mnElementToken(nElementToken)
114
37.5M
    {
115
37.5M
        if (nElementToken == FastToken::DONTKNOW)
116
19.4M
        {
117
19.4M
            moNamespace = aNamespace;
118
19.4M
            moElementName = aElementName;
119
19.4M
        }
120
37.5M
    }
121
};
122
123
struct ParserData
124
{
125
    css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
126
    rtl::Reference<FastTokenHandlerBase>                       mxTokenHandler;
127
    css::uno::Reference< css::xml::sax::XErrorHandler >        mxErrorHandler;
128
    css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
129
130
    ParserData();
131
};
132
133
struct NamespaceDefine
134
{
135
    OString     maPrefix;
136
    sal_Int32   mnToken;
137
    OUString    maNamespaceURL;
138
139
    NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL )
140
1.59M
        : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {}
141
14.5M
    NamespaceDefine() : mnToken(-1) {}
142
};
143
144
// Entity binds all information needed for a single file | single call of parseStream
145
struct Entity : public ParserData
146
{
147
    // Amount of work producer sends to consumer in one iteration:
148
    static const size_t mnEventListSize = 1000;
149
150
    // unique for each Entity instance:
151
152
    // Number of valid events in mxProducedEvents:
153
    size_t mnProducedEventsSize;
154
    std::optional<EventList> mxProducedEvents;
155
    std::queue<EventList> maPendingEvents;
156
    std::queue<EventList> maUsedEvents;
157
    std::mutex maEventProtector;
158
159
    static const size_t mnEventLowWater = 4;
160
    static const size_t mnEventHighWater = 8;
161
    osl::Condition maConsumeResume;
162
    osl::Condition maProduceResume;
163
    // Event we use to store data if threading is disabled:
164
    Event maSharedEvent;
165
166
    // copied in copy constructor:
167
168
    // Allow to disable threading for small documents:
169
    bool                                    mbEnableThreads;
170
    css::xml::sax::InputSource              maStructSource;
171
    xmlParserCtxtPtr                        mpParser;
172
    ::sax_expatwrap::XMLFile2UTFConverter   maConverter;
173
174
    // Exceptions cannot be thrown through the C-XmlParser (possible
175
    // resource leaks), therefore any exception thrown by a UNO callback
176
    // must be saved somewhere until the C-XmlParser is stopped.
177
    css::uno::Any                           maSavedException;
178
    std::mutex                              maSavedExceptionMutex;
179
    void saveException( const Any & e );
180
    // Thread-safe check if maSavedException has value
181
    bool hasException();
182
    void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
183
                         bool mbDuringParse );
184
185
    std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
186
    /* Context for main thread consuming events.
187
     * startElement() stores the data, which characters() and endElement() uses
188
     */
189
    std::stack< SaxContext, std::vector<SaxContext> >  maContextStack;
190
    // Determines which elements of maNamespaceDefines are valid in current context
191
    std::stack< sal_uInt32, std::vector<sal_uInt32> >  maNamespaceCount;
192
    std::vector< NamespaceDefine >                     maNamespaceDefines;
193
194
    explicit Entity( const ParserData& rData );
195
    Entity( const Entity& rEntity ) = delete;
196
    Entity& operator=( const Entity& rEntity ) = delete;
197
    void startElement( Event const *pEvent );
198
    void characters( const OUString& sChars );
199
    void endElement();
200
    void processingInstruction( const OUString& rTarget, const OUString& rData );
201
    void transferUsedEvents();
202
    EventList& getEventList();
203
    Event& getEvent( CallbackType aType );
204
};
205
206
// Stuff for custom entity names
207
struct ReplacementPair
208
{
209
    OUString name;
210
    OUString replacement;
211
};
212
inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
213
0
{
214
0
    return lhs.name < rhs.name;
215
0
}
216
inline bool operator<(const ReplacementPair& lhs, const char* rhs)
217
0
{
218
0
    return lhs.name.compareToAscii(rhs) < 0;
219
0
}
220
221
} // namespace
222
223
namespace sax_fastparser {
224
225
class FastSaxParserImpl
226
{
227
public:
228
    explicit FastSaxParserImpl();
229
    ~FastSaxParserImpl();
230
231
private:
232
    std::vector<ReplacementPair> m_Replacements;
233
    std::vector<xmlEntityPtr> m_TemporalEntities;
234
235
public:
236
    // XFastParser
237
    /// @throws css::xml::sax::SAXException
238
    /// @throws css::io::IOException
239
    /// @throws css::uno::RuntimeException
240
    void parseStream( const css::xml::sax::InputSource& aInputSource );
241
    /// @throws css::uno::RuntimeException
242
    void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
243
    /// @throws css::uno::RuntimeException
244
    void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
245
    /// @throws css::lang::IllegalArgumentException
246
    /// @throws css::uno::RuntimeException
247
    void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
248
    /// @throws css::lang::IllegalArgumentException
249
    /// @throws css::uno::RuntimeException
250
    OUString const & getNamespaceURL( std::u16string_view rPrefix );
251
    /// @throws css::uno::RuntimeException
252
    void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
253
    /// @throws css::uno::RuntimeException
254
    void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
255
    // Fake DTD file
256
    void setCustomEntityNames(
257
       const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements);
258
259
    // called by the C callbacks of the expat parser
260
    void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
261
        int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
262
    void callbackEndElement();
263
    void callbackCharacters( const xmlChar* s, int nLen );
264
    void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
265
    xmlEntityPtr callbackGetEntity( const xmlChar *name );
266
267
    void pushEntity(const ParserData&, xml::sax::InputSource const&);
268
    void popEntity();
269
149M
    Entity& getEntity()             { return *mpTop; }
270
    void parse();
271
    void produce( bool bForceFlush = false );
272
    bool m_bIgnoreMissingNSDecl;
273
    bool m_bDisableThreadedParser;
274
275
private:
276
    bool consume(EventList&);
277
    void deleteUsedEvents();
278
    void sendPendingCharacters();
279
    void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
280
281
    sal_Int32 GetToken( const xmlChar* pName );
282
    /// @throws css::xml::sax::SAXException
283
    sal_Int32 GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName );
284
    /// @throws css::xml::sax::SAXException
285
    OUString const & GetNamespaceURL( std::string_view rPrefix );
286
    sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
287
    sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName );
288
    void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
289
290
private:
291
    std::mutex maMutex; ///< Protecting whole parseStream() execution
292
    ::rtl::Reference< FastLocatorImpl >     mxDocumentLocator;
293
    NamespaceMap                            maNamespaceMap;
294
295
    ParserData maData;                      /// Cached parser configuration for next call of parseStream().
296
297
    Entity *mpTop;                          /// std::stack::top() is amazingly slow => cache this.
298
    std::stack< Entity > maEntities;        /// Entity stack for each call of parseStream().
299
    std::vector<char> pendingCharacters;    /// Data from characters() callback that needs to be sent.
300
};
301
302
} // namespace sax_fastparser
303
304
namespace {
305
306
class ParserThread: public salhelper::Thread
307
{
308
    FastSaxParserImpl *mpParser;
309
public:
310
0
    explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
311
private:
312
    virtual void execute() override
313
0
    {
314
0
        try
315
0
        {
316
0
            mpParser->parse();
317
0
        }
318
0
        catch (...)
319
0
        {
320
0
            Entity &rEntity = mpParser->getEntity();
321
0
            rEntity.getEvent( CallbackType::EXCEPTION );
322
0
            mpParser->produce( true );
323
0
        }
324
0
    }
325
};
326
327
extern "C" {
328
329
static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
330
    int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
331
37.5M
{
332
37.5M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
333
37.5M
    pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
334
37.5M
}
335
336
static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
337
17.4M
{
338
17.4M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
339
17.4M
    pFastParser->callbackEndElement();
340
17.4M
}
341
342
static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
343
14.4M
{
344
14.4M
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
345
14.4M
    pFastParser->callbackCharacters( s, nLen );
346
14.4M
}
347
348
static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
349
71.2k
{
350
71.2k
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
351
71.2k
    pFastParser->callbackProcessingInstruction( target, data );
352
71.2k
}
353
354
static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name)
355
70.1k
{
356
70.1k
    FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
357
70.1k
    return pFastParser->callbackGetEntity( name );
358
70.1k
}
359
360
}
361
362
class FastLocatorImpl : public WeakImplHelper< XLocator >
363
{
364
public:
365
372k
    explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
366
367
372k
    void dispose() { mpParser = nullptr; }
368
    /// @throws RuntimeException
369
1.41M
    void checkDispose() const { if( !mpParser ) throw DisposedException(); }
370
371
    //XLocator
372
    virtual sal_Int32 SAL_CALL getColumnNumber() override;
373
    virtual sal_Int32 SAL_CALL getLineNumber() override;
374
    virtual OUString SAL_CALL getPublicId() override;
375
    virtual OUString SAL_CALL getSystemId() override;
376
377
private:
378
    FastSaxParserImpl *mpParser;
379
};
380
381
sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
382
298k
{
383
298k
    checkDispose();
384
298k
    return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
385
298k
}
386
387
sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
388
407k
{
389
407k
    checkDispose();
390
407k
    return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
391
407k
}
392
393
OUString SAL_CALL FastLocatorImpl::getPublicId()
394
298k
{
395
298k
    checkDispose();
396
298k
    return mpParser->getEntity().maStructSource.sPublicId;
397
298k
}
398
399
OUString SAL_CALL FastLocatorImpl::getSystemId()
400
407k
{
401
407k
    checkDispose();
402
407k
    return mpParser->getEntity().maStructSource.sSystemId;
403
407k
}
404
405
ParserData::ParserData()
406
372k
{}
407
408
Entity::Entity(const ParserData& rData)
409
250k
    : ParserData(rData)
410
250k
    , mnProducedEventsSize(0)
411
250k
    , mbEnableThreads(false)
412
250k
    , mpParser(nullptr)
413
250k
{
414
250k
}
415
416
void Entity::startElement( Event const *pEvent )
417
37.5M
{
418
37.5M
    const sal_Int32& nElementToken = pEvent->mnElementToken;
419
37.5M
    const OUString& aNamespace = pEvent->msNamespace;
420
37.5M
    const OUString& aElementName = pEvent->msElementName;
421
422
    // Use un-wrapped pointers to avoid significant acquire/release overhead
423
37.5M
    XFastContextHandler *pParentContext = nullptr;
424
37.5M
    if( !maContextStack.empty() )
425
37.3M
    {
426
37.3M
        pParentContext = maContextStack.top().mxContext.get();
427
37.3M
        if( !pParentContext )
428
151k
        {
429
151k
            maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
430
151k
            return;
431
151k
        }
432
37.3M
    }
433
434
37.4M
    maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
435
436
37.4M
    try
437
37.4M
    {
438
37.4M
        const Reference< XFastAttributeList > xAttr( pEvent->mxAttributes );
439
37.4M
        Reference< XFastContextHandler > xContext;
440
441
37.4M
        if ( mxNamespaceHandler.is() )
442
25.3M
        {
443
25.3M
            const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
444
25.3M
            for (const auto& rNSDeclAttrib : NSDeclAttribs)
445
1.32M
            {
446
1.32M
                mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
447
1.32M
            }
448
25.3M
        }
449
450
37.4M
        if( nElementToken == FastToken::DONTKNOW )
451
19.3M
        {
452
19.3M
            if( pParentContext )
453
19.3M
                xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
454
44.5k
            else if( mxDocumentHandler.is() )
455
44.5k
                xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
456
457
19.3M
            if( xContext.is() )
458
19.3M
            {
459
19.3M
                xContext->startUnknownElement( aNamespace, aElementName, xAttr );
460
19.3M
            }
461
19.3M
        }
462
18.0M
        else
463
18.0M
        {
464
18.0M
            if( pParentContext )
465
17.8M
                xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
466
182k
            else if( mxDocumentHandler.is() )
467
182k
                xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
468
469
18.0M
            if( xContext.is() )
470
16.6M
                xContext->startFastElement( nElementToken, xAttr );
471
18.0M
        }
472
        // swap the reference we own in to avoid referencing thrash.
473
37.4M
        maContextStack.top().mxContext = std::move( xContext );
474
37.4M
    }
475
37.4M
    catch (...)
476
37.4M
    {
477
7.81k
        saveException( ::cppu::getCaughtException() );
478
7.81k
    }
479
37.4M
}
480
481
void Entity::characters( const OUString& sChars )
482
12.7M
{
483
12.7M
    if (maContextStack.empty())
484
57
    {
485
        // Malformed XML stream !?
486
57
        return;
487
57
    }
488
489
12.7M
    XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
490
12.7M
    if( pContext ) try
491
12.7M
    {
492
12.7M
        pContext->characters( sChars );
493
12.7M
    }
494
12.7M
    catch (...)
495
12.7M
    {
496
0
        saveException( ::cppu::getCaughtException() );
497
0
    }
498
12.7M
}
499
500
void Entity::endElement()
501
17.4M
{
502
17.4M
    if (maContextStack.empty())
503
597
    {
504
        // Malformed XML stream !?
505
597
        return;
506
597
    }
507
508
17.4M
    const SaxContext& aContext = maContextStack.top();
509
17.4M
    XFastContextHandler* pContext( aContext.mxContext.get() );
510
17.4M
    if( pContext )
511
15.9M
        try
512
15.9M
        {
513
15.9M
            sal_Int32 nElementToken = aContext.mnElementToken;
514
15.9M
            if( nElementToken != FastToken::DONTKNOW )
515
14.8M
                pContext->endFastElement( nElementToken );
516
1.15M
            else
517
1.15M
                pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName );
518
15.9M
        }
519
15.9M
        catch (...)
520
15.9M
        {
521
64.3k
            saveException( ::cppu::getCaughtException() );
522
64.3k
        }
523
17.4M
    maContextStack.pop();
524
17.4M
}
525
526
void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
527
71.2k
{
528
71.2k
    if( mxDocumentHandler.is() ) try
529
71.2k
    {
530
71.2k
        mxDocumentHandler->processingInstruction( rTarget, rData );
531
71.2k
    }
532
71.2k
    catch (...)
533
71.2k
    {
534
0
        saveException( ::cppu::getCaughtException() );
535
0
    }
536
71.2k
}
537
538
void Entity::transferUsedEvents()
539
0
{
540
0
    std::unique_lock aGuard(maEventProtector);
541
0
    if (!maUsedEvents.empty())
542
0
    {
543
0
        mxProducedEvents = std::move(maUsedEvents.front());
544
0
        maUsedEvents.pop();
545
0
        aGuard.unlock(); // unlock
546
0
        mnProducedEventsSize = 0;
547
0
    }
548
0
}
549
550
EventList& Entity::getEventList()
551
0
{
552
0
    if (!mxProducedEvents)
553
0
    {
554
0
        transferUsedEvents();
555
0
        if (!mxProducedEvents)
556
0
        {
557
0
            mxProducedEvents.emplace();
558
0
            mxProducedEvents->maEvents.resize(mnEventListSize);
559
0
            mxProducedEvents->mbIsAttributesEmpty = false;
560
0
            mnProducedEventsSize = 0;
561
0
        }
562
0
    }
563
0
    return *mxProducedEvents;
564
0
}
565
566
Event& Entity::getEvent( CallbackType aType )
567
55.2M
{
568
55.2M
    if (!mbEnableThreads)
569
55.2M
        return maSharedEvent;
570
571
19
    EventList& rEventList = getEventList();
572
19
    if (mnProducedEventsSize == rEventList.maEvents.size())
573
0
    {
574
0
        SAL_WARN_IF(!maSavedException.hasValue(), "sax",
575
0
            "Event vector should only exceed " << mnEventListSize <<
576
0
            " temporarily while an exception is pending");
577
0
        rEventList.maEvents.resize(mnProducedEventsSize + 1);
578
0
    }
579
19
    Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
580
19
    rEvent.maType = aType;
581
19
    return rEvent;
582
19
}
583
584
OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine )
585
108k
{
586
108k
    const char* pMessage;
587
108k
    const xmlError* error = xmlCtxtGetLastError( ctxt );
588
108k
    if( error && error->message )
589
107k
        pMessage = error->message;
590
1.40k
    else
591
1.40k
        pMessage = "unknown error";
592
108k
    return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " +
593
108k
           OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US);
594
108k
}
595
596
// throw an exception, but avoid callback if
597
// during a threaded produce
598
void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
599
                             bool mbDuringParse )
600
108k
{
601
    // Error during parsing !
602
108k
    Any savedException;
603
108k
    {
604
108k
        std::scoped_lock g(maSavedExceptionMutex);
605
108k
        if (maSavedException.hasValue())
606
5.20k
        {
607
5.20k
            savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
608
5.20k
        }
609
108k
    }
610
108k
    SAXParseException aExcept(
611
108k
        lclGetErrorMessage( mpParser,
612
108k
                            xDocumentLocator->getSystemId(),
613
108k
                            xDocumentLocator->getLineNumber() ),
614
108k
        Reference< XInterface >(),
615
108k
        savedException,
616
108k
        xDocumentLocator->getPublicId(),
617
108k
        xDocumentLocator->getSystemId(),
618
108k
        xDocumentLocator->getLineNumber(),
619
108k
        xDocumentLocator->getColumnNumber()
620
108k
    );
621
622
    // error handler is set, it may throw the exception
623
108k
    if( !mbDuringParse || !mbEnableThreads )
624
108k
    {
625
108k
        if (mxErrorHandler.is() )
626
0
            mxErrorHandler->fatalError( Any( aExcept ) );
627
108k
    }
628
629
    // error handler has not thrown, but parsing must stop => throw ourselves
630
108k
    throw aExcept;
631
108k
}
632
633
// In the single threaded case we emit events via our C
634
// callbacks, so any exception caught must be queued up until
635
// we can safely re-throw it from our C++ parent of parse()
636
637
// If multi-threaded, we need to push an EXCEPTION event, at
638
// which point we transfer ownership of maSavedException to
639
// the consuming thread.
640
void Entity::saveException( const Any & e )
641
73.1k
{
642
    // fdo#81214 - allow the parser to run on after an exception,
643
    // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
644
    // for XComponent; and yet expect to continue parsing.
645
73.1k
    SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
646
73.1k
    std::scoped_lock g(maSavedExceptionMutex);
647
73.1k
    if (maSavedException.hasValue())
648
67.9k
    {
649
67.9k
        SAL_INFO("sax.fastparser", "discarding exception, already have one");
650
67.9k
    }
651
5.20k
    else
652
5.20k
    {
653
5.20k
        maSavedException = e;
654
5.20k
    }
655
73.1k
}
656
657
bool Entity::hasException()
658
408k
{
659
408k
    std::scoped_lock g(maSavedExceptionMutex);
660
408k
    return maSavedException.hasValue();
661
408k
}
662
663
} // namespace
664
665
namespace sax_fastparser {
666
667
FastSaxParserImpl::FastSaxParserImpl() :
668
372k
    m_bIgnoreMissingNSDecl(false),
669
372k
    m_bDisableThreadedParser(false),
670
372k
    mpTop(nullptr)
671
372k
{
672
372k
    mxDocumentLocator.set( new FastLocatorImpl( this ) );
673
372k
}
674
675
FastSaxParserImpl::~FastSaxParserImpl()
676
372k
{
677
372k
    if( mxDocumentLocator.is() )
678
372k
        mxDocumentLocator->dispose();
679
372k
    for (auto& entity : m_TemporalEntities)
680
0
    {
681
0
        if (!entity)
682
0
            continue;
683
0
        xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity);
684
0
        xmlUnlinkNode(pPtr);
685
0
        xmlFreeNode(pPtr);
686
0
    }
687
372k
}
688
689
void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
690
1.59M
{
691
1.59M
    Entity& rEntity = getEntity();
692
1.59M
    assert(!rEntity.maNamespaceCount.empty()); // need a context!
693
694
1.59M
    sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
695
1.59M
    if( rEntity.maNamespaceDefines.size() <= nOffset )
696
226k
        rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
697
698
1.59M
    rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
699
1.59M
}
700
701
sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName)
702
57.0M
{
703
57.0M
    return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(),
704
57.0M
                                                    XML_CAST( pName ) ); // uses utf-8
705
57.0M
}
706
707
sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( std::string_view sPrefix, const xmlChar* pName )
708
21.1M
{
709
21.1M
    Entity& rEntity = getEntity();
710
21.1M
    if (rEntity.maNamespaceCount.empty())
711
0
        return FastToken::DONTKNOW;
712
713
21.1M
    sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
714
193M
    while( nNamespace-- )
715
192M
    {
716
192M
        const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
717
192M
        if( rNamespaceDefine.maPrefix == sPrefix )
718
19.8M
            return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName);
719
192M
    }
720
721
1.32M
    if (!m_bIgnoreMissingNSDecl)
722
964
        throw SAXException("No namespace defined for " + OStringToOUString(sPrefix,
723
964
            RTL_TEXTENCODING_UTF8), {}, {});
724
725
1.32M
    return FastToken::DONTKNOW;
726
1.32M
}
727
728
sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
729
25.0M
{
730
25.0M
    NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
731
25.0M
    if( aIter != maNamespaceMap.end() )
732
1.18M
        return (*aIter).second;
733
23.8M
    else
734
23.8M
        return FastToken::DONTKNOW;
735
25.0M
}
736
737
OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix )
738
0
{
739
0
    Entity& rEntity = getEntity();
740
0
    if( !rEntity.maNamespaceCount.empty() )
741
0
    {
742
0
        sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
743
0
        while( nNamespace-- )
744
0
            if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
745
0
                return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
746
0
    }
747
748
0
    throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
749
0
            Reference< XInterface >(), Any());
750
0
}
751
752
sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName )
753
25.6M
{
754
25.6M
    if( nNamespaceToken != FastToken::DONTKNOW )
755
24.7M
    {
756
24.7M
        sal_Int32 nNameToken = GetToken( pName );
757
24.7M
        if( nNameToken != FastToken::DONTKNOW )
758
23.3M
            return nNamespaceToken | nNameToken;
759
24.7M
    }
760
761
2.34M
    return FastToken::DONTKNOW;
762
25.6M
}
763
764
namespace
765
{
766
    class ParserCleanup
767
    {
768
    private:
769
        FastSaxParserImpl& m_rParser;
770
        Entity& m_rEntity;
771
        rtl::Reference<ParserThread> m_xParser;
772
    public:
773
        ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
774
250k
            : m_rParser(rParser)
775
250k
            , m_rEntity(rEntity)
776
250k
        {
777
250k
        }
778
        ~ParserCleanup()
779
250k
        {
780
250k
            if (m_rEntity.mpParser)
781
244k
            {
782
244k
                if (m_rEntity.mpParser->myDoc)
783
1.91k
                    xmlFreeDoc(m_rEntity.mpParser->myDoc);
784
244k
                xmlFreeParserCtxt(m_rEntity.mpParser);
785
244k
            }
786
250k
            joinThread();
787
250k
            m_rParser.popEntity();
788
250k
        }
789
        void setThread(const rtl::Reference<ParserThread> &xParser)
790
0
        {
791
0
            m_xParser = xParser;
792
0
        }
793
        void joinThread()
794
250k
        {
795
250k
            if (m_xParser.is())
796
0
            {
797
0
                rtl::Reference<ParserThread> xToJoin = m_xParser;
798
0
                m_xParser.clear();
799
0
                xToJoin->join();
800
0
            }
801
250k
        }
802
    };
803
}
804
/***************
805
*
806
* parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
807
* the file-specific initialization work. (During a parser run, external files may be opened)
808
*
809
****************/
810
void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
811
274k
{
812
274k
    xmlInitParser();
813
814
    // Only one text at one time
815
274k
    std::unique_lock guard( maMutex );
816
817
274k
    pushEntity(maData, rStructSource);
818
274k
    Entity& rEntity = getEntity();
819
274k
    ParserCleanup aEnsureFree(*this, rEntity);
820
821
    // start the document
822
274k
    if( rEntity.mxDocumentHandler.is() )
823
250k
    {
824
250k
        rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator );
825
250k
        rEntity.mxDocumentHandler->startDocument();
826
250k
    }
827
828
#ifdef EMSCRIPTEN
829
    rEntity.mbEnableThreads = false;
830
#else
831
274k
    if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
832
0
    {
833
0
        Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
834
        // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
835
0
        rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
836
0
                || (rEntity.maStructSource.aInputStream->available() > 10000);
837
0
    }
838
274k
#endif
839
840
274k
    if (rEntity.mbEnableThreads)
841
0
    {
842
0
        rtl::Reference<ParserThread> xParser = new ParserThread(this);
843
0
        xParser->launch();
844
0
        aEnsureFree.setThread(xParser);
845
0
        bool done = false;
846
0
        do {
847
0
            rEntity.maConsumeResume.wait();
848
0
            rEntity.maConsumeResume.reset();
849
850
0
            std::unique_lock aGuard(rEntity.maEventProtector);
851
0
            while (!rEntity.maPendingEvents.empty())
852
0
            {
853
0
                if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
854
0
                    rEntity.maProduceResume.set(); // start producer again
855
856
0
                EventList aEventList = std::move(rEntity.maPendingEvents.front());
857
0
                rEntity.maPendingEvents.pop();
858
0
                aGuard.unlock(); // unlock
859
860
0
                if (!consume(aEventList))
861
0
                    done = true;
862
863
0
                aGuard.lock(); // lock
864
865
0
                if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
866
0
                {
867
0
                    aGuard.unlock();
868
0
                    for (auto& rEvent : aEventList.maEvents)
869
0
                    {
870
0
                        if (rEvent.mxAttributes.is())
871
0
                        {
872
0
                            rEvent.mxAttributes->clear();
873
0
                            if( rEntity.mxNamespaceHandler.is() )
874
0
                                rEvent.mxDeclAttributes->clear();
875
0
                        }
876
0
                        aEventList.mbIsAttributesEmpty = true;
877
0
                    }
878
0
                    aGuard.lock();
879
0
                }
880
881
0
                rEntity.maUsedEvents.push(std::move(aEventList));
882
0
            }
883
0
        } while (!done);
884
0
        aEnsureFree.joinThread();
885
0
        deleteUsedEvents();
886
887
        // callbacks used inside XML_Parse may have caught an exception No need
888
        // to lock maSavedExceptionMutex here because parser thread is joined.
889
        // coverity[missing_lock : SUPPRESS] 2024.6.1
890
0
        if( rEntity.maSavedException.hasValue() )
891
0
            rEntity.throwException( mxDocumentLocator, true );
892
0
    }
893
274k
    else
894
274k
    {
895
274k
        parse();
896
274k
    }
897
898
    // finish document
899
274k
    if( rEntity.mxDocumentHandler.is() )
900
135k
    {
901
135k
        rEntity.mxDocumentHandler->endDocument();
902
135k
    }
903
274k
}
904
905
void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
906
478k
{
907
478k
    maData.mxDocumentHandler = Handler;
908
478k
}
909
910
void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
911
372k
{
912
372k
    assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" );
913
372k
    maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
914
372k
}
915
916
void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
917
22.0M
{
918
22.0M
    if( NamespaceToken < FastToken::NAMESPACE )
919
0
        throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0);
920
921
22.0M
    if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
922
22.0M
    {
923
22.0M
        maNamespaceMap[ NamespaceURL ] = NamespaceToken;
924
22.0M
        return;
925
22.0M
    }
926
0
    throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0);
927
22.0M
}
928
929
OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix )
930
0
{
931
0
    try
932
0
    {
933
0
        return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
934
0
    }
935
0
    catch (const Exception&)
936
0
    {
937
0
    }
938
0
    throw IllegalArgumentException();
939
0
}
940
941
void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
942
0
{
943
0
    maData.mxErrorHandler = Handler;
944
0
}
945
946
void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
947
150k
{
948
150k
    maData.mxNamespaceHandler = Handler;
949
150k
}
950
951
void FastSaxParserImpl::setCustomEntityNames(
952
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
953
0
{
954
0
    m_Replacements.resize(replacements.size());
955
0
    for (size_t i = 0; i < replacements.size(); ++i)
956
0
    {
957
0
        m_Replacements[i].name = replacements[i].First;
958
0
        m_Replacements[i].replacement = replacements[i].Second;
959
0
    }
960
0
    if (m_Replacements.size() > 1)
961
0
        std::sort(m_Replacements.begin(), m_Replacements.end());
962
0
}
963
964
void FastSaxParserImpl::deleteUsedEvents()
965
0
{
966
0
    Entity& rEntity = getEntity();
967
0
    std::unique_lock aGuard(rEntity.maEventProtector);
968
969
0
    while (!rEntity.maUsedEvents.empty())
970
0
    {
971
0
        { // the block makes sure that aEventList is destructed outside the lock
972
0
            EventList aEventList = std::move(rEntity.maUsedEvents.front());
973
0
            rEntity.maUsedEvents.pop();
974
975
0
            aGuard.unlock(); // unlock
976
0
        }
977
978
0
        aGuard.lock(); // lock
979
0
    }
980
0
}
981
982
void FastSaxParserImpl::produce( bool bForceFlush )
983
0
{
984
0
    Entity& rEntity = getEntity();
985
0
    if (!(bForceFlush ||
986
0
        rEntity.mnProducedEventsSize >= Entity::mnEventListSize))
987
0
        return;
988
989
0
    std::unique_lock aGuard(rEntity.maEventProtector);
990
991
0
    while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
992
0
    { // pause parsing for a bit
993
0
        aGuard.unlock(); // unlock
994
0
        rEntity.maProduceResume.wait();
995
0
        rEntity.maProduceResume.reset();
996
0
        aGuard.lock(); // lock
997
0
    }
998
999
0
    rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents));
1000
1001
0
    aGuard.unlock(); // unlock
1002
1003
0
    rEntity.mxProducedEvents.reset();
1004
0
    assert(!rEntity.mxProducedEvents);
1005
1006
0
    rEntity.maConsumeResume.set();
1007
0
}
1008
1009
bool FastSaxParserImpl::consume(EventList& rEventList)
1010
0
{
1011
0
    Entity& rEntity = getEntity();
1012
0
    rEventList.mbIsAttributesEmpty = false;
1013
0
    for (auto& rEvent : rEventList.maEvents)
1014
0
    {
1015
0
        switch (rEvent.maType)
1016
0
        {
1017
0
            case CallbackType::START_ELEMENT:
1018
0
                rEntity.startElement( &rEvent );
1019
0
                break;
1020
0
            case CallbackType::END_ELEMENT:
1021
0
                rEntity.endElement();
1022
0
                break;
1023
0
            case CallbackType::CHARACTERS:
1024
0
                rEntity.characters( rEvent.msChars );
1025
0
                break;
1026
0
            case CallbackType::PROCESSING_INSTRUCTION:
1027
0
                rEntity.processingInstruction(
1028
0
                    rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
1029
0
                break;
1030
0
            case CallbackType::DONE:
1031
0
                return false;
1032
0
            case CallbackType::EXCEPTION:
1033
0
                rEntity.throwException( mxDocumentLocator, false );
1034
0
                [[fallthrough]]; // avoid unreachable code warning with some compilers
1035
0
            default:
1036
0
                assert(false);
1037
0
                return false;
1038
0
        }
1039
0
    }
1040
0
    return true;
1041
0
}
1042
1043
void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
1044
        xml::sax::InputSource const& rSource)
1045
274k
{
1046
274k
    if (!rSource.aInputStream.is())
1047
24.3k
        throw SAXException(u"No input source"_ustr, Reference<XInterface>(), Any());
1048
1049
250k
    maEntities.emplace(rEntityData);
1050
250k
    mpTop = &maEntities.top();
1051
1052
250k
    mpTop->maStructSource = rSource;
1053
1054
250k
    mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
1055
250k
    if (!mpTop->maStructSource.sEncoding.isEmpty())
1056
0
    {
1057
0
        mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
1058
0
    }
1059
250k
}
1060
1061
void FastSaxParserImpl::popEntity()
1062
250k
{
1063
250k
    maEntities.pop();
1064
250k
    mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
1065
250k
}
1066
1067
// starts parsing with actual parser !
1068
void FastSaxParserImpl::parse()
1069
250k
{
1070
250k
    const int BUFFER_SIZE = 16 * 1024;
1071
250k
    Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
1072
1073
250k
    Entity& rEntity = getEntity();
1074
1075
    // set all necessary C-Callbacks
1076
250k
    static xmlSAXHandler callbacks;
1077
250k
    callbacks.startElementNs = call_callbackStartElement;
1078
250k
    callbacks.endElementNs = call_callbackEndElement;
1079
250k
    callbacks.characters = call_callbackCharacters;
1080
250k
    callbacks.processingInstruction = call_callbackProcessingInstruction;
1081
250k
    callbacks.getEntity = call_callbackGetEntity;
1082
250k
    callbacks.initialized = XML_SAX2_MAGIC;
1083
250k
    int nRead = 0;
1084
250k
    do
1085
521k
    {
1086
521k
        nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
1087
521k
        if( nRead <= 0 )
1088
230k
        {
1089
230k
            if( rEntity.mpParser != nullptr )
1090
230k
            {
1091
230k
                if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
1092
94.0k
                    rEntity.throwException( mxDocumentLocator, true );
1093
230k
                if (rEntity.hasException())
1094
1.03k
                    rEntity.throwException(mxDocumentLocator, true);
1095
230k
            }
1096
230k
            break;
1097
230k
        }
1098
1099
291k
        bool bContinue = true;
1100
291k
        if( rEntity.mpParser == nullptr )
1101
244k
        {
1102
            // create parser with proper encoding (needs the first chunk of data)
1103
244k
            rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
1104
244k
                reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
1105
244k
            if( !rEntity.mpParser )
1106
0
                throw SAXException(u"Couldn't create parser"_ustr, Reference< XInterface >(), Any() );
1107
1108
            // Tell libxml2 parser to decode entities in attribute values.
1109
            // Also allow XML attribute values which are larger than 10MB, because this used to work
1110
            // with expat.
1111
            // coverity[unsafe_xml_parse_config] - entity support is required
1112
244k
            xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE);
1113
244k
        }
1114
46.9k
        else
1115
46.9k
        {
1116
46.9k
            bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
1117
46.9k
                            == XML_ERR_OK;
1118
46.9k
        }
1119
1120
        // callbacks used inside XML_Parse may have caught an exception
1121
291k
        if (!bContinue)
1122
13.3k
        {
1123
13.3k
            rEntity.throwException( mxDocumentLocator, true );
1124
13.3k
        }
1125
291k
        if (rEntity.hasException())
1126
583
        {
1127
583
            rEntity.throwException( mxDocumentLocator, true );
1128
583
        }
1129
291k
    } while( nRead > 0 );
1130
250k
    rEntity.getEvent( CallbackType::DONE );
1131
250k
    if( rEntity.mbEnableThreads )
1132
0
        produce( true );
1133
250k
}
1134
1135
// The C-Callbacks
1136
void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
1137
    int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
1138
37.5M
{
1139
37.5M
    if (!pendingCharacters.empty())
1140
9.46M
        sendPendingCharacters();
1141
37.5M
    Entity& rEntity = getEntity();
1142
37.5M
    if( rEntity.maNamespaceCount.empty() )
1143
225k
    {
1144
225k
        rEntity.maNamespaceCount.push(0);
1145
225k
        DefineNamespace( "xml"_ostr, u"http://www.w3.org/XML/1998/namespace"_ustr);
1146
225k
    }
1147
37.3M
    else
1148
37.3M
    {
1149
37.3M
        rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
1150
37.3M
    }
1151
1152
    // create attribute map and process namespace instructions
1153
37.5M
    Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
1154
37.5M
    bool bIsAttributesEmpty = false;
1155
37.5M
    if ( rEntity.mbEnableThreads )
1156
0
        bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
1157
1158
37.5M
    if (rEvent.mxAttributes.is())
1159
37.3M
    {
1160
37.3M
        if( !bIsAttributesEmpty )
1161
37.3M
            rEvent.mxAttributes->clear();
1162
37.3M
    }
1163
225k
    else
1164
225k
        rEvent.mxAttributes.set(
1165
225k
                new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1166
1167
37.5M
    if( rEntity.mxNamespaceHandler.is() )
1168
25.3M
    {
1169
25.3M
        if (rEvent.mxDeclAttributes.is())
1170
25.2M
        {
1171
25.2M
            if( !bIsAttributesEmpty )
1172
25.2M
                rEvent.mxDeclAttributes->clear();
1173
25.2M
        }
1174
109k
        else
1175
109k
            rEvent.mxDeclAttributes.set(
1176
109k
                new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1177
25.3M
    }
1178
1179
37.5M
    OUString sNamespace;
1180
37.5M
    sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
1181
37.5M
    if (!rEntity.maNamespaceStack.empty())
1182
37.3M
    {
1183
37.3M
        sNamespace = rEntity.maNamespaceStack.top().msName;
1184
37.3M
        nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
1185
37.3M
    }
1186
1187
37.5M
    try
1188
37.5M
    {
1189
        /*  #158414# Each element may define new namespaces, also for attributes.
1190
            First, process all namespaces, second, process the attributes after namespaces
1191
            have been initialized. */
1192
1193
37.5M
        std::string_view sPrefix; // convert to string_view so we only do strlen() once.
1194
37.5M
        if (prefix != nullptr)
1195
12.5M
            sPrefix = XML_CAST(prefix);
1196
        // #158414# first: get namespaces
1197
39.2M
        for (int i = 0; i < numNamespaces * 2; i += 2)
1198
1.65M
        {
1199
            // namespaces[] is (prefix/URI)
1200
1.65M
            if( namespaces[ i ] != nullptr )
1201
1.37M
            {
1202
1.37M
                OString aPrefix( XML_CAST( namespaces[ i ] ));
1203
1.37M
                OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1204
1.37M
                NormalizeURI( namespaceURL );
1205
1.37M
                DefineNamespace(aPrefix, namespaceURL);
1206
1.37M
                if( rEntity.mxNamespaceHandler.is() )
1207
1.11M
                    rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1208
1.37M
            }
1209
288k
            else
1210
288k
            {
1211
                // default namespace
1212
288k
                sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1213
288k
                NormalizeURI( sNamespace );
1214
288k
                nNamespaceToken = GetNamespaceToken( sNamespace );
1215
288k
                if( rEntity.mxNamespaceHandler.is() )
1216
220k
                    rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1217
288k
            }
1218
1.65M
        }
1219
1220
37.5M
        if ( rEntity.mxTokenHandler.is() )
1221
37.5M
        {
1222
            // #158414# second: fill attribute list with other attributes
1223
37.5M
            rEvent.mxAttributes->reserve( numAttributes );
1224
59.3M
            for (int i = 0; i < numAttributes * 5; i += 5)
1225
21.7M
            {
1226
                // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1227
21.7M
                if( attributes[ i + 1 ] != nullptr )
1228
8.62M
                {
1229
8.62M
                    sal_Int32 nAttributeToken = GetTokenWithPrefix(XML_CAST(attributes[ i + 1 ]), attributes[ i ]);
1230
8.62M
                    if( nAttributeToken != FastToken::DONTKNOW )
1231
7.21M
                        rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1232
1.41M
                    else
1233
1.41M
                        addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1234
8.62M
                }
1235
13.1M
                else
1236
13.1M
                {
1237
13.1M
                    sal_Int32 nAttributeToken = GetToken(attributes[ i ]);
1238
13.1M
                    if( nAttributeToken != FastToken::DONTKNOW )
1239
12.5M
                        rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1240
552k
                    else
1241
552k
                    {
1242
552k
                        SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" <<
1243
552k
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1244
552k
                        rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1245
552k
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1246
552k
                    }
1247
13.1M
                }
1248
21.7M
            }
1249
1250
37.5M
            if( !sPrefix.empty() )
1251
12.5M
                rEvent.mnElementToken = GetTokenWithPrefix(sPrefix, localName);
1252
24.9M
            else if( !sNamespace.isEmpty() )
1253
5.78M
                rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName);
1254
19.2M
            else
1255
19.2M
                rEvent.mnElementToken = GetToken(localName);
1256
37.5M
        }
1257
18.4E
        else
1258
18.4E
        {
1259
18.4E
            for (int i = 0; i < numAttributes * 5; i += 5)
1260
0
            {
1261
0
                if( attributes[ i + 1 ] != nullptr )
1262
0
                    addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1263
0
                else
1264
0
                    rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1265
0
                            OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1266
0
            }
1267
1268
18.4E
            rEvent.mnElementToken = FastToken::DONTKNOW;
1269
18.4E
        }
1270
1271
37.5M
        if( rEvent.mnElementToken == FastToken::DONTKNOW )
1272
19.4M
        {
1273
19.4M
            OUString aElementPrefix;
1274
19.4M
            if( !sPrefix.empty() )
1275
1.03M
            {
1276
1.03M
                aElementPrefix = OUString( sPrefix.data(), sPrefix.size(), RTL_TEXTENCODING_UTF8 );
1277
1.03M
                if ( URI != nullptr )
1278
369k
                    sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
1279
665k
                else if ( m_bIgnoreMissingNSDecl )
1280
665k
                    sNamespace.clear();
1281
0
                else
1282
0
                    throw SAXException("No namespace defined for " + aElementPrefix, {}, {});
1283
1.03M
                nNamespaceToken = GetNamespaceToken( sNamespace );
1284
1.03M
            }
1285
19.4M
            OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
1286
19.4M
            rEvent.msNamespace = sNamespace;
1287
19.4M
            if( aElementPrefix.isEmpty() )
1288
18.3M
                rEvent.msElementName = std::move(aElementLocalName);
1289
1.03M
            else
1290
1.03M
                rEvent.msElementName = aElementPrefix + ":" + aElementLocalName;
1291
19.4M
        }
1292
18.1M
        else // token is always preferred.
1293
18.1M
            rEvent.msElementName.clear();
1294
1295
37.5M
        rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
1296
37.5M
        if (rEntity.mbEnableThreads)
1297
0
            produce();
1298
37.5M
        else
1299
37.5M
        {
1300
37.5M
            SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
1301
37.5M
            rEntity.startElement( &rEvent );
1302
37.5M
        }
1303
37.5M
    }
1304
37.5M
    catch (...)
1305
37.5M
    {
1306
964
        rEntity.saveException( ::cppu::getCaughtException() );
1307
964
    }
1308
37.5M
}
1309
1310
void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
1311
1.41M
{
1312
1.41M
    OUString aNamespaceURI;
1313
1.41M
    if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
1314
761k
        aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
1315
1.41M
    const OString aPrefix( XML_CAST( attributes[ i + 1 ] ));
1316
1.41M
    const OString aLocalName( XML_CAST( attributes[ i ] ));
1317
1.41M
    OString aQualifiedName = (aPrefix.isEmpty())? aLocalName : aPrefix + ":" + aLocalName;
1318
1.41M
    xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
1319
1.41M
        OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1320
1.41M
    SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI);
1321
1.41M
}
1322
1323
void FastSaxParserImpl::callbackEndElement()
1324
17.4M
{
1325
17.4M
    if (!pendingCharacters.empty())
1326
3.27M
        sendPendingCharacters();
1327
17.4M
    Entity& rEntity = getEntity();
1328
17.4M
    SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
1329
17.4M
    if( !rEntity.maNamespaceCount.empty() )
1330
17.4M
        rEntity.maNamespaceCount.pop();
1331
1332
17.4M
    SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
1333
17.4M
    if( !rEntity.maNamespaceStack.empty() )
1334
17.4M
        rEntity.maNamespaceStack.pop();
1335
1336
17.4M
    rEntity.getEvent( CallbackType::END_ELEMENT );
1337
17.4M
    if (rEntity.mbEnableThreads)
1338
0
        produce();
1339
17.4M
    else
1340
17.4M
        rEntity.endElement();
1341
17.4M
}
1342
1343
void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
1344
14.4M
{
1345
    // SAX interface allows that the characters callback splits content of one XML node
1346
    // (e.g. because there's an entity that needs decoding), however for consumers it's
1347
    // simpler FastSaxParser's character callback provides the whole string at once,
1348
    // so merge data from possible multiple calls and send them at once (before the element
1349
    // ends or another one starts).
1350
    //
1351
    // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1352
    // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1353
    // often in writer documents.
1354
14.4M
    int nOriginalLen = pendingCharacters.size();
1355
14.4M
    pendingCharacters.resize(nOriginalLen + nLen);
1356
14.4M
    memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
1357
14.4M
}
1358
1359
void FastSaxParserImpl::sendPendingCharacters()
1360
12.7M
{
1361
12.7M
    Entity& rEntity = getEntity();
1362
12.7M
    OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
1363
12.7M
    if (rEntity.mbEnableThreads)
1364
0
    {
1365
0
        Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
1366
0
        rEvent.msChars = std::move(sChars);
1367
0
        produce();
1368
0
    }
1369
12.7M
    else
1370
12.7M
        rEntity.characters( sChars );
1371
12.7M
    pendingCharacters.resize(0);
1372
12.7M
}
1373
1374
void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
1375
71.2k
{
1376
71.2k
    if (!pendingCharacters.empty())
1377
8.00k
        sendPendingCharacters();
1378
71.2k
    Entity& rEntity = getEntity();
1379
71.2k
    Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
1380
1381
    // This event is very rare, so no need to waste extra space for this
1382
    // Using namespace and element strings to be target and data in that order.
1383
71.2k
    rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
1384
71.2k
    if ( data != nullptr )
1385
61.4k
        rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
1386
9.77k
    else
1387
9.77k
        rEvent.msElementName.clear();
1388
1389
71.2k
    if (rEntity.mbEnableThreads)
1390
0
        produce();
1391
71.2k
    else
1392
71.2k
        rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
1393
71.2k
}
1394
1395
xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name )
1396
70.1k
{
1397
70.1k
    if( !name )
1398
0
        return xmlGetPredefinedEntity(name);
1399
70.1k
    const char* dname = XML_CAST(name);
1400
70.1k
    int lname = strlen(dname);
1401
70.1k
    if( lname == 0 )
1402
0
        return xmlGetPredefinedEntity(name);
1403
70.1k
    if (m_Replacements.size() > 0)
1404
0
    {
1405
0
        auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname);
1406
0
        if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0)
1407
0
        {
1408
0
            xmlEntityPtr entpt = xmlNewEntity(
1409
0
                nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1410
0
                BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr()));
1411
0
            m_TemporalEntities.push_back(entpt);
1412
0
            return entpt;
1413
0
        }
1414
0
    }
1415
70.1k
    if( lname < 2 )
1416
26.0k
        return xmlGetPredefinedEntity(name);
1417
44.0k
    if ( dname[0] == '#' )
1418
0
    {
1419
0
        sal_uInt32 cval = 0;
1420
0
        if( dname[1] == 'x' ||  dname[1] == 'X' )
1421
0
        {
1422
0
            if( lname < 3 )
1423
0
                return xmlGetPredefinedEntity(name);
1424
0
            cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) );
1425
0
            if( cval == 0 )
1426
0
                return xmlGetPredefinedEntity(name);
1427
0
            OUString vname( &cval, 1 );
1428
0
            xmlEntityPtr entpt
1429
0
                = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1430
0
                               BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1431
0
            m_TemporalEntities.push_back(entpt);
1432
0
            return entpt;
1433
0
        }
1434
0
        else
1435
0
        {
1436
0
            cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) );
1437
0
            if( cval == 0 )
1438
0
                return xmlGetPredefinedEntity(name);
1439
0
            OUString vname(&cval, 1);
1440
0
            xmlEntityPtr entpt
1441
0
                = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1442
0
                               BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1443
0
            m_TemporalEntities.push_back(entpt);
1444
0
            return entpt;
1445
0
        }
1446
0
    }
1447
44.0k
    return xmlGetPredefinedEntity(name);
1448
44.0k
}
1449
1450
372k
FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
1451
1452
FastSaxParser::~FastSaxParser()
1453
372k
{
1454
372k
}
1455
1456
void SAL_CALL
1457
FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
1458
159k
{
1459
159k
    if (!rArguments.hasElements())
1460
0
        return;
1461
1462
159k
    OUString str;
1463
159k
    if ( !(rArguments[0] >>= str) )
1464
0
        throw IllegalArgumentException();
1465
1466
159k
    if ( str == "IgnoreMissingNSDecl" )
1467
125k
        mpImpl->m_bIgnoreMissingNSDecl = true;
1468
33.6k
    else if ( str == "DoSmeplease" )
1469
0
        ; //just ignore as this is already immune to billion laughs
1470
33.6k
    else if ( str == "DisableThreadedParser" )
1471
33.6k
        mpImpl->m_bDisableThreadedParser = true;
1472
0
    else
1473
0
        throw IllegalArgumentException();
1474
1475
159k
}
1476
1477
void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
1478
274k
{
1479
274k
    mpImpl->parseStream(aInputSource);
1480
274k
}
1481
1482
void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
1483
478k
{
1484
478k
    mpImpl->setFastDocumentHandler(Handler);
1485
478k
}
1486
1487
void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
1488
372k
{
1489
372k
    mpImpl->setTokenHandler(Handler);
1490
372k
}
1491
1492
void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
1493
22.0M
{
1494
22.0M
    mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
1495
22.0M
}
1496
1497
OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
1498
0
{
1499
0
    return mpImpl->getNamespaceURL(rPrefix);
1500
0
}
1501
1502
void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
1503
0
{
1504
0
    mpImpl->setErrorHandler(Handler);
1505
0
}
1506
1507
void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
1508
0
{
1509
    // not implemented
1510
0
}
1511
1512
void FastSaxParser::setLocale( const lang::Locale& )
1513
0
{
1514
    // not implemented
1515
0
}
1516
1517
void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
1518
150k
{
1519
150k
    mpImpl->setNamespaceHandler(Handler);
1520
150k
}
1521
1522
OUString FastSaxParser::getImplementationName()
1523
0
{
1524
0
    return u"com.sun.star.comp.extensions.xml.sax.FastParser"_ustr;
1525
0
}
1526
1527
void FastSaxParser::setCustomEntityNames(
1528
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
1529
0
{
1530
0
    mpImpl->setCustomEntityNames(replacements);
1531
0
}
1532
1533
sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
1534
0
{
1535
0
    return cppu::supportsService(this, ServiceName);
1536
0
}
1537
1538
uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
1539
0
{
1540
0
    return { u"com.sun.star.xml.sax.FastParser"_ustr };
1541
0
}
1542
1543
} // namespace sax_fastparser
1544
1545
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
1546
com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1547
    css::uno::XComponentContext *,
1548
    css::uno::Sequence<css::uno::Any> const &)
1549
188k
{
1550
188k
    return cppu::acquire(new FastSaxParser);
1551
188k
}
1552
1553
// ----------------------------------------------------------
1554
// copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases
1555
// for various dodgy namespace decls in the wild.
1556
1557
static bool NormalizeW3URI( OUString& rName );
1558
static bool NormalizeOasisURN( OUString& rName );
1559
1560
static void NormalizeURI( OUString& rName )
1561
1.65M
{
1562
    // try OASIS + W3 URI normalization
1563
1.65M
    bool bSuccess = NormalizeOasisURN( rName );
1564
1.65M
    if( ! bSuccess )
1565
1.34M
        NormalizeW3URI( rName );
1566
1.65M
}
1567
1568
constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/");
1569
constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms");
1570
constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms");
1571
constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg");
1572
constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
1573
constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format");
1574
constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
1575
constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/");
1576
constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20");
1577
constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
1578
constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc");
1579
constexpr OUStringLiteral XML_XMLNS(u"xmlns");
1580
constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument");
1581
constexpr OUStringLiteral XML_1_0(u"1.0");
1582
1583
static bool NormalizeW3URI( OUString& rName )
1584
1.34M
{
1585
    // check if URI matches:
1586
    // http://www.w3.org/[0-9]*/[:letter:]*
1587
    //                   (year)/(WG name)
1588
    // For the following WG/standards names:
1589
    // - xforms
1590
1591
1.34M
    bool bSuccess = false;
1592
1.34M
    const OUString sURIPrefix = XML_URI_W3_PREFIX;
1593
1.34M
    if( rName.startsWith( sURIPrefix ) )
1594
132k
    {
1595
132k
        const OUString sURISuffix = XML_URI_XFORMS_SUFFIX ;
1596
132k
        sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength();
1597
132k
        if( rName.subView( nCompareFrom ) == sURISuffix )
1598
10.7k
        {
1599
            // found W3 prefix, and xforms suffix
1600
10.7k
            rName = XML_N_XFORMS_1_0;
1601
10.7k
            bSuccess = true;
1602
10.7k
        }
1603
132k
    }
1604
1.34M
    return bSuccess;
1605
1.34M
}
1606
1607
static bool NormalizeOasisURN( OUString& rName )
1608
1.65M
{
1609
    // #i38644#
1610
    // we exported the wrong namespace for smil, so we correct this here on load
1611
    // for older documents
1612
1.65M
    if( rName == XML_N_SVG )
1613
463
    {
1614
463
        rName = XML_N_SVG_COMPAT;
1615
463
        return true;
1616
463
    }
1617
1.65M
    else if( rName == XML_N_FO )
1618
0
    {
1619
0
        rName = XML_N_FO_COMPAT;
1620
0
        return true;
1621
0
    }
1622
1.65M
    else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD  )
1623
0
    {
1624
0
        rName = XML_N_SMIL_COMPAT;
1625
0
        return true;
1626
0
    }
1627
1628
1629
    // Check if URN matches
1630
    // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]*
1631
    //                     |---|       |---| |-----|
1632
    //                     TC-Id      Sub-Id Version
1633
1634
1.65M
    sal_Int32 nNameLen = rName.getLength();
1635
    // :urn:oasis:names:tc.*
1636
1.65M
    const OUString aOasisURN = XML_URN_OASIS_NAMES_TC;
1637
1.65M
    if( !rName.startsWith( aOasisURN ) )
1638
1.27M
        return false;
1639
1640
    // :urn:oasis:names:tc:.*
1641
388k
    sal_Int32 nPos = aOasisURN.getLength();
1642
388k
    if( nPos >= nNameLen || rName[nPos] != ':' )
1643
5.79k
        return false;
1644
1645
    // :urn:oasis:names:tc:[^:]:.*
1646
382k
    sal_Int32 nTCIdStart = nPos+1;
1647
382k
    sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart );
1648
382k
    if( -1 == nTCIdEnd )
1649
2.64k
        return false;
1650
1651
    // :urn:oasis:names:tc:[^:]:xmlns.*
1652
379k
    nPos = nTCIdEnd + 1;
1653
379k
    std::u16string_view sTmp( rName.subView( nPos ) );
1654
379k
    const OUString aXMLNS = XML_XMLNS;
1655
379k
    if( !o3tl::starts_with(sTmp, aXMLNS ) )
1656
33.4k
        return false;
1657
1658
    // :urn:oasis:names:tc:[^:]:xmlns:.*
1659
346k
    nPos += aXMLNS.getLength();
1660
346k
    if( nPos >= nNameLen || rName[nPos] != ':' )
1661
3.17k
        return false;
1662
1663
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.*
1664
343k
    nPos = rName.indexOf( ':', nPos+1 );
1665
343k
    if( -1 == nPos )
1666
2.35k
        return false;
1667
1668
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]*
1669
340k
    sal_Int32 nVersionStart = nPos+1;
1670
340k
    if( nVersionStart+2 >= nNameLen ||
1671
340k
        -1 != rName.indexOf( ':', nVersionStart ) )
1672
5.42k
        return false;
1673
1674
    // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]*
1675
335k
    if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' )
1676
19.8k
        return false;
1677
1678
    // replace [tcid] with current TCID and version with current version.
1679
1680
315k
    rName = rName.subView( 0, nTCIdStart ) +
1681
315k
            XML_OPENDOCUMENT +
1682
315k
            rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) +
1683
315k
            XML_1_0;
1684
1685
315k
    return true;
1686
335k
}
1687
1688
1689
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */