Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/unoxml/source/dom/documentbuilder.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include "documentbuilder.hxx"
21
22
#include <string.h>
23
24
#include <libxml/xmlerror.h>
25
#include <libxml/parser.h>
26
27
#include <memory>
28
29
#include <sal/log.hxx>
30
#include <comphelper/diagnose_ex.hxx>
31
32
#include <comphelper/processfactory.hxx>
33
#include <cppuhelper/implbase.hxx>
34
#include <cppuhelper/supportsservice.hxx>
35
36
#include <com/sun/star/xml/sax/SAXParseException.hpp>
37
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
38
#include <com/sun/star/task/XInteractionHandler.hpp>
39
#include <com/sun/star/ucb/SimpleFileAccess.hpp>
40
41
#include <ucbhelper/content.hxx>
42
#include <ucbhelper/commandenvironment.hxx>
43
44
#include "document.hxx"
45
46
using namespace css::io;
47
using namespace css::lang;
48
using namespace css::ucb;
49
using namespace css::uno;
50
using namespace css::xml::dom;
51
using namespace css::xml::sax;
52
using namespace ucbhelper;
53
using css::task::XInteractionHandler;
54
using css::xml::sax::InputSource;
55
56
57
namespace DOM
58
{
59
    namespace {
60
61
    class CDefaultEntityResolver : public cppu::WeakImplHelper< XEntityResolver >
62
    {
63
    public:
64
        virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId ) override
65
0
        {
66
0
            InputSource is;
67
0
            is.sPublicId = sPublicId;
68
0
            is.sSystemId = sSystemId;
69
0
            is.sEncoding.clear();
70
71
0
            try {
72
0
                Reference< XCommandEnvironment > aEnvironment(
73
0
                    new CommandEnvironment(Reference< XInteractionHandler >(),
74
0
                                           Reference< XProgressHandler >() ));
75
0
                Content aContent(sSystemId, aEnvironment, comphelper::getProcessComponentContext());
76
77
0
                is.aInputStream = aContent.openStream();
78
0
            } catch (const css::uno::Exception&) {
79
0
                TOOLS_WARN_EXCEPTION( "unoxml", "exception in default entity resolver");
80
0
                is.aInputStream.clear();
81
0
            }
82
0
            return is;
83
0
        }
84
85
    };
86
87
    }
88
89
    CDocumentBuilder::CDocumentBuilder()
90
257k
        : m_xEntityResolver(new CDefaultEntityResolver)
91
257k
    {
92
        // init libxml. libxml will protect itself against multiple
93
        // initializations so there is no problem here if this gets
94
        // called multiple times.
95
257k
        xmlInitParser();
96
257k
    }
97
98
    Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames()
99
0
    {
100
0
        return { u"com.sun.star.xml.dom.DocumentBuilder"_ustr };
101
0
    }
102
103
    OUString SAL_CALL CDocumentBuilder::getImplementationName()
104
0
    {
105
0
        return u"com.sun.star.comp.xml.dom.DocumentBuilder"_ustr;
106
0
    }
107
108
    sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName)
109
0
    {
110
0
        return cppu::supportsService(this, aServiceName);
111
0
    }
112
113
    Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation()
114
0
    {
115
116
0
        return Reference< XDOMImplementation >();
117
0
    }
118
119
    sal_Bool SAL_CALL CDocumentBuilder::isNamespaceAware()
120
0
    {
121
0
        return true;
122
0
    }
123
124
    sal_Bool SAL_CALL CDocumentBuilder::isValidating()
125
0
    {
126
0
        return false;
127
0
    }
128
129
    Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument()
130
251k
    {
131
251k
        std::scoped_lock const g(m_Mutex);
132
133
        // create a new document
134
251k
        xmlDocPtr pDocument = xmlNewDoc(reinterpret_cast<const xmlChar*>("1.0"));
135
251k
        return CDocument::CreateCDocument(pDocument);
136
251k
    }
137
138
    static OUString make_error_message(xmlParserCtxtPtr ctxt)
139
322
    {
140
322
        const xmlError* lastError = xmlCtxtGetLastError(ctxt);
141
322
        return OUString(lastError->message, strlen(lastError->message), RTL_TEXTENCODING_ASCII_US) +
142
322
               "Line: " +
143
322
               OUString::number(static_cast<sal_Int32>(lastError->line)) +
144
322
               "\nColumn: " +
145
322
               OUString::number(static_cast<sal_Int32>(lastError->int2));
146
322
    }
147
148
    // -- callbacks and context struct for parsing from stream
149
    // -- c-linkage, so the callbacks can be used by libxml
150
    extern "C" {
151
152
    namespace {
153
154
    // context struct passed to IO functions
155
    typedef struct context {
156
        Reference< XInputStream > rInputStream;
157
        bool close;
158
        bool freeOnClose;
159
    } context_t;
160
161
    }
162
163
    static int xmlIO_read_func( void *context, char *buffer, int len)
164
18.7k
    {
165
        // get the context...
166
18.7k
        context_t *pctx = static_cast<context_t*>(context);
167
18.7k
        if (!pctx->rInputStream.is())
168
0
            return -1;
169
18.7k
        try {
170
            // try to read the requested number of bytes
171
18.7k
            Sequence< sal_Int8 > chunk(len);
172
18.7k
            int nread = pctx->rInputStream->readBytes(chunk, len);
173
174
            // copy bytes to the provided buffer
175
18.7k
            memcpy(buffer, chunk.getConstArray(), nread);
176
18.7k
            return nread;
177
18.7k
        } catch (const css::uno::Exception&) {
178
215
            TOOLS_WARN_EXCEPTION( "unoxml", "");
179
215
            return -1;
180
215
        }
181
18.7k
    }
182
183
    static int xmlIO_close_func(void* context)
184
6.01k
    {
185
        // get the context...
186
6.01k
        context_t *pctx = static_cast<context_t*>(context);
187
6.01k
        if (!pctx->rInputStream.is())
188
0
            return 0;
189
6.01k
        try
190
6.01k
        {
191
6.01k
            if (pctx->close)
192
0
                pctx->rInputStream->closeInput();
193
6.01k
            if (pctx->freeOnClose)
194
0
                delete pctx;
195
6.01k
            return 0;
196
6.01k
        } catch (const css::uno::Exception&) {
197
0
            TOOLS_WARN_EXCEPTION( "unoxml", "");
198
0
            return -1;
199
0
        }
200
6.01k
    }
201
202
    static xmlParserInputPtr resolve_func(void *ctx,
203
                                const xmlChar *publicId,
204
                                const xmlChar *systemId)
205
0
    {
206
        // get the CDocumentBuilder object
207
0
        xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(ctx);
208
0
        CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private);
209
0
        Reference< XEntityResolver > resolver = builder->getEntityResolver();
210
0
        OUString sysid;
211
0
        if (systemId != nullptr)
212
0
            sysid = OUString(reinterpret_cast<char const *>(systemId), strlen(reinterpret_cast<char const *>(systemId)), RTL_TEXTENCODING_UTF8);
213
0
        OUString pubid;
214
0
        if (publicId != nullptr)
215
0
            pubid = OUString(reinterpret_cast<char const *>(publicId), strlen(reinterpret_cast<char const *>(publicId)), RTL_TEXTENCODING_UTF8);
216
217
        // resolve the entity
218
0
        InputSource src = resolver->resolveEntity(pubid, sysid);
219
220
        // create IO context on heap because this call will no longer be on the stack
221
        // when IO is actually performed through the callbacks. The close function must
222
        // free the memory which is indicated by the freeOnClose field in the context struct
223
0
        context_t *c = new context_t;
224
0
        c->rInputStream = src.aInputStream;
225
0
        c->close = true;
226
0
        c->freeOnClose = true;
227
228
        // set up the inputBuffer and inputPtr for libxml
229
0
        xmlParserInputBufferPtr pBuffer =
230
0
            xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE);
231
0
        xmlParserInputPtr pInput =
232
0
                    xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE);
233
0
        return pInput;
234
0
    }
235
236
#if 0
237
    static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt)
238
    {
239
        // just call our resolver function using the URL as systemId
240
        return resolve_func(ctxt, 0, (const xmlChar*)URL);
241
    }
242
#endif
243
244
    // default warning handler does not trigger assertion
245
    static void warning_func(void * ctx, const char * /*msg*/, ...)
246
221
    {
247
221
        try
248
221
        {
249
221
            xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
250
251
221
            SAL_INFO(
252
221
                "unoxml",
253
221
                "libxml2 warning: "
254
221
                << make_error_message(pctx));
255
256
221
            CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
257
258
221
            Reference<XErrorHandler> xErrorHandler = pDocBuilder->getErrorHandler();
259
221
            if (xErrorHandler.is())   // if custom error handler is set (using setErrorHandler ())
260
0
            {
261
                // Prepare SAXParseException to be passed to custom XErrorHandler::warning function
262
0
                const xmlError* lastError = xmlCtxtGetLastError(pctx);
263
0
                css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {},
264
0
                                                       lastError->line, lastError->int2);
265
266
                // Call custom warning function
267
0
                xErrorHandler->warning(::css::uno::Any(saxex));
268
0
            }
269
221
        }
270
221
        catch (const css::uno::Exception &)
271
221
        {
272
            // Protect lib2xml from UNO Exception
273
0
            TOOLS_WARN_EXCEPTION("unoxml", "DOM::warning_func");
274
0
        }
275
221
    }
276
277
    // default error handler triggers assertion
278
    static void error_func(void * ctx, const char * /*msg*/, ...)
279
20.5k
    {
280
20.5k
        try
281
20.5k
        {
282
20.5k
            xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
283
20.5k
            SAL_WARN(
284
20.5k
                "unoxml",
285
20.5k
                "libxml2 error: "
286
20.5k
                << make_error_message(pctx));
287
288
20.5k
            CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
289
290
20.5k
            Reference<XErrorHandler> xErrorHandler = pDocBuilder->getErrorHandler();
291
20.5k
            if (xErrorHandler.is())   // if custom error handler is set (using setErrorHandler ())
292
0
            {
293
                // Prepare SAXParseException to be passed to custom XErrorHandler::error function
294
0
                const xmlError* lastError = xmlCtxtGetLastError(pctx);
295
0
                css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {},
296
0
                                                       lastError->line, lastError->int2);
297
298
                // Call custom warning function
299
0
                xErrorHandler->error(::css::uno::Any(saxex));
300
0
            }
301
20.5k
        }
302
20.5k
        catch (const css::uno::Exception &)
303
20.5k
        {
304
            // Protect lib2xml from UNO Exception
305
0
            TOOLS_WARN_EXCEPTION("unoxml", "DOM::error_func");
306
0
        }
307
20.5k
    }
308
    } // extern "C"
309
310
    static void throwEx(xmlParserCtxtPtr ctxt)
311
322
    {
312
322
        const xmlError* lastError = xmlCtxtGetLastError(ctxt);
313
322
        css::xml::sax::SAXParseException saxex(make_error_message(ctxt), {}, {}, {}, {},
314
322
                                               lastError->line, lastError->int2);
315
322
        throw saxex;
316
322
    }
317
318
    namespace {
319
320
    struct XmlFreeParserCtxt {
321
6.01k
        void operator ()(xmlParserCtxt * p) const { xmlFreeParserCtxt(p); }
322
    };
323
324
    }
325
326
    Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is)
327
6.01k
    {
328
6.01k
        if (!is.is()) {
329
0
            throw RuntimeException();
330
0
        }
331
332
6.01k
        std::scoped_lock const g(m_Mutex);
333
334
        // IO context struct.  Must outlive pContext, as destroying that via
335
        // xmlFreeParserCtxt may still access this context_t
336
6.01k
        context_t c;
337
6.01k
        c.rInputStream = is;
338
        // we did not open the stream, thus we do not close it.
339
6.01k
        c.close = false;
340
6.01k
        c.freeOnClose = false;
341
342
6.01k
        std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
343
6.01k
                xmlNewParserCtxt());
344
345
        // register error functions to prevent errors being printed
346
        // on the console
347
6.01k
        pContext->_private = this;
348
6.01k
        pContext->sax->error = error_func;
349
6.01k
        pContext->sax->warning = warning_func;
350
6.01k
        pContext->sax->resolveEntity = resolve_func;
351
352
6.01k
        xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(),
353
6.01k
                xmlIO_read_func, xmlIO_close_func, &c, nullptr, nullptr, 0);
354
355
6.01k
        if (pDoc == nullptr) {
356
322
            throwEx(pContext.get());
357
322
        }
358
6.01k
        return CDocument::CreateCDocument(pDoc);
359
6.01k
    }
360
361
    Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri)
362
0
    {
363
0
        std::scoped_lock const g(m_Mutex);
364
365
0
        std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
366
0
                xmlNewParserCtxt());
367
0
        pContext->_private = this;
368
0
        pContext->sax->error = error_func;
369
0
        pContext->sax->warning = warning_func;
370
0
        pContext->sax->resolveEntity = resolve_func;
371
        // xmlSetExternalEntityLoader(external_entity_loader);
372
0
        OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8);
373
0
        char *uri = const_cast<char*>(oUri.getStr());
374
0
        xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, nullptr, 0);
375
376
0
        Reference< XDocument > xRet;
377
378
        // if we failed to parse the URI as a simple file, let's try via a ucb stream.
379
        // For Android file:///assets/ URLs which must go via the osl/ file API.
380
0
        if (pDoc == nullptr) {
381
0
            Reference < XSimpleFileAccess3 > xStreamAccess(
382
0
                SimpleFileAccess::create( comphelper::getProcessComponentContext() ) );
383
0
            Reference< XInputStream > xInStream = xStreamAccess->openFileRead( sUri );
384
0
            if (!xInStream.is())
385
0
                throwEx(pContext.get());
386
387
            // loop over every layout entry in current file
388
0
            xRet = parse( xInStream );
389
390
0
            xInStream->closeInput();
391
0
            xInStream.clear();
392
393
0
        } else
394
0
            xRet = CDocument::CreateCDocument(pDoc).get();
395
396
0
        return xRet;
397
0
    }
398
399
    void SAL_CALL
400
    CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER)
401
0
    {
402
0
        std::scoped_lock const g(m_Mutex);
403
404
0
        m_xEntityResolver = xER;
405
0
    }
406
407
    Reference< XEntityResolver > CDocumentBuilder::getEntityResolver()
408
0
    {
409
0
        std::scoped_lock const g(m_Mutex);
410
411
0
        return m_xEntityResolver;
412
0
    }
413
414
    void SAL_CALL
415
    CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH)
416
0
    {
417
0
        std::scoped_lock const g(m_Mutex);
418
419
0
        m_xErrorHandler = xEH;
420
0
    }
421
422
    Reference< XErrorHandler > CDocumentBuilder::getErrorHandler()
423
20.7k
    {
424
20.7k
        std::scoped_lock const g(m_Mutex);
425
426
20.7k
        return m_xErrorHandler;
427
20.7k
    }
428
}
429
430
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
431
unoxml_CDocumentBuilder_get_implementation(
432
    css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
433
257k
{
434
257k
    return cppu::acquire(new DOM::CDocumentBuilder());
435
257k
}
436
437
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */