/src/libreoffice/unoxml/source/dom/documentbuilder.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include "documentbuilder.hxx" |
21 | | |
22 | | #include <string.h> |
23 | | |
24 | | #include <libxml/xmlerror.h> |
25 | | #include <libxml/parser.h> |
26 | | |
27 | | #include <memory> |
28 | | |
29 | | #include <sal/log.hxx> |
30 | | #include <comphelper/diagnose_ex.hxx> |
31 | | |
32 | | #include <comphelper/processfactory.hxx> |
33 | | #include <cppuhelper/implbase.hxx> |
34 | | #include <cppuhelper/supportsservice.hxx> |
35 | | |
36 | | #include <com/sun/star/xml/sax/SAXParseException.hpp> |
37 | | #include <com/sun/star/ucb/XCommandEnvironment.hpp> |
38 | | #include <com/sun/star/task/XInteractionHandler.hpp> |
39 | | #include <com/sun/star/ucb/SimpleFileAccess.hpp> |
40 | | |
41 | | #include <ucbhelper/content.hxx> |
42 | | #include <ucbhelper/commandenvironment.hxx> |
43 | | |
44 | | #include "document.hxx" |
45 | | |
46 | | using namespace css::io; |
47 | | using namespace css::lang; |
48 | | using namespace css::ucb; |
49 | | using namespace css::uno; |
50 | | using namespace css::xml::dom; |
51 | | using namespace css::xml::sax; |
52 | | using namespace ucbhelper; |
53 | | using css::task::XInteractionHandler; |
54 | | using css::xml::sax::InputSource; |
55 | | |
56 | | |
57 | | namespace DOM |
58 | | { |
59 | | namespace { |
60 | | |
61 | | class CDefaultEntityResolver : public cppu::WeakImplHelper< XEntityResolver > |
62 | | { |
63 | | public: |
64 | | virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId ) override |
65 | 0 | { |
66 | 0 | InputSource is; |
67 | 0 | is.sPublicId = sPublicId; |
68 | 0 | is.sSystemId = sSystemId; |
69 | 0 | is.sEncoding.clear(); |
70 | |
|
71 | 0 | try { |
72 | 0 | Reference< XCommandEnvironment > aEnvironment( |
73 | 0 | new CommandEnvironment(Reference< XInteractionHandler >(), |
74 | 0 | Reference< XProgressHandler >() )); |
75 | 0 | Content aContent(sSystemId, aEnvironment, comphelper::getProcessComponentContext()); |
76 | |
|
77 | 0 | is.aInputStream = aContent.openStream(); |
78 | 0 | } catch (const css::uno::Exception&) { |
79 | 0 | TOOLS_WARN_EXCEPTION( "unoxml", "exception in default entity resolver"); |
80 | 0 | is.aInputStream.clear(); |
81 | 0 | } |
82 | 0 | return is; |
83 | 0 | } |
84 | | |
85 | | }; |
86 | | |
87 | | } |
88 | | |
89 | | CDocumentBuilder::CDocumentBuilder() |
90 | 257k | : m_xEntityResolver(new CDefaultEntityResolver) |
91 | 257k | { |
92 | | // init libxml. libxml will protect itself against multiple |
93 | | // initializations so there is no problem here if this gets |
94 | | // called multiple times. |
95 | 257k | xmlInitParser(); |
96 | 257k | } |
97 | | |
98 | | Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames() |
99 | 0 | { |
100 | 0 | return { u"com.sun.star.xml.dom.DocumentBuilder"_ustr }; |
101 | 0 | } |
102 | | |
103 | | OUString SAL_CALL CDocumentBuilder::getImplementationName() |
104 | 0 | { |
105 | 0 | return u"com.sun.star.comp.xml.dom.DocumentBuilder"_ustr; |
106 | 0 | } |
107 | | |
108 | | sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName) |
109 | 0 | { |
110 | 0 | return cppu::supportsService(this, aServiceName); |
111 | 0 | } |
112 | | |
113 | | Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation() |
114 | 0 | { |
115 | |
|
116 | 0 | return Reference< XDOMImplementation >(); |
117 | 0 | } |
118 | | |
119 | | sal_Bool SAL_CALL CDocumentBuilder::isNamespaceAware() |
120 | 0 | { |
121 | 0 | return true; |
122 | 0 | } |
123 | | |
124 | | sal_Bool SAL_CALL CDocumentBuilder::isValidating() |
125 | 0 | { |
126 | 0 | return false; |
127 | 0 | } |
128 | | |
129 | | Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument() |
130 | 251k | { |
131 | 251k | std::scoped_lock const g(m_Mutex); |
132 | | |
133 | | // create a new document |
134 | 251k | xmlDocPtr pDocument = xmlNewDoc(reinterpret_cast<const xmlChar*>("1.0")); |
135 | 251k | return CDocument::CreateCDocument(pDocument); |
136 | 251k | } |
137 | | |
138 | | static OUString make_error_message(xmlParserCtxtPtr ctxt) |
139 | 322 | { |
140 | 322 | const xmlError* lastError = xmlCtxtGetLastError(ctxt); |
141 | 322 | return OUString(lastError->message, strlen(lastError->message), RTL_TEXTENCODING_ASCII_US) + |
142 | 322 | "Line: " + |
143 | 322 | OUString::number(static_cast<sal_Int32>(lastError->line)) + |
144 | 322 | "\nColumn: " + |
145 | 322 | OUString::number(static_cast<sal_Int32>(lastError->int2)); |
146 | 322 | } |
147 | | |
148 | | // -- callbacks and context struct for parsing from stream |
149 | | // -- c-linkage, so the callbacks can be used by libxml |
150 | | extern "C" { |
151 | | |
152 | | namespace { |
153 | | |
154 | | // context struct passed to IO functions |
155 | | typedef struct context { |
156 | | Reference< XInputStream > rInputStream; |
157 | | bool close; |
158 | | bool freeOnClose; |
159 | | } context_t; |
160 | | |
161 | | } |
162 | | |
163 | | static int xmlIO_read_func( void *context, char *buffer, int len) |
164 | 18.7k | { |
165 | | // get the context... |
166 | 18.7k | context_t *pctx = static_cast<context_t*>(context); |
167 | 18.7k | if (!pctx->rInputStream.is()) |
168 | 0 | return -1; |
169 | 18.7k | try { |
170 | | // try to read the requested number of bytes |
171 | 18.7k | Sequence< sal_Int8 > chunk(len); |
172 | 18.7k | int nread = pctx->rInputStream->readBytes(chunk, len); |
173 | | |
174 | | // copy bytes to the provided buffer |
175 | 18.7k | memcpy(buffer, chunk.getConstArray(), nread); |
176 | 18.7k | return nread; |
177 | 18.7k | } catch (const css::uno::Exception&) { |
178 | 215 | TOOLS_WARN_EXCEPTION( "unoxml", ""); |
179 | 215 | return -1; |
180 | 215 | } |
181 | 18.7k | } |
182 | | |
183 | | static int xmlIO_close_func(void* context) |
184 | 6.01k | { |
185 | | // get the context... |
186 | 6.01k | context_t *pctx = static_cast<context_t*>(context); |
187 | 6.01k | if (!pctx->rInputStream.is()) |
188 | 0 | return 0; |
189 | 6.01k | try |
190 | 6.01k | { |
191 | 6.01k | if (pctx->close) |
192 | 0 | pctx->rInputStream->closeInput(); |
193 | 6.01k | if (pctx->freeOnClose) |
194 | 0 | delete pctx; |
195 | 6.01k | return 0; |
196 | 6.01k | } catch (const css::uno::Exception&) { |
197 | 0 | TOOLS_WARN_EXCEPTION( "unoxml", ""); |
198 | 0 | return -1; |
199 | 0 | } |
200 | 6.01k | } |
201 | | |
202 | | static xmlParserInputPtr resolve_func(void *ctx, |
203 | | const xmlChar *publicId, |
204 | | const xmlChar *systemId) |
205 | 0 | { |
206 | | // get the CDocumentBuilder object |
207 | 0 | xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(ctx); |
208 | 0 | CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private); |
209 | 0 | Reference< XEntityResolver > resolver = builder->getEntityResolver(); |
210 | 0 | OUString sysid; |
211 | 0 | if (systemId != nullptr) |
212 | 0 | sysid = OUString(reinterpret_cast<char const *>(systemId), strlen(reinterpret_cast<char const *>(systemId)), RTL_TEXTENCODING_UTF8); |
213 | 0 | OUString pubid; |
214 | 0 | if (publicId != nullptr) |
215 | 0 | pubid = OUString(reinterpret_cast<char const *>(publicId), strlen(reinterpret_cast<char const *>(publicId)), RTL_TEXTENCODING_UTF8); |
216 | | |
217 | | // resolve the entity |
218 | 0 | InputSource src = resolver->resolveEntity(pubid, sysid); |
219 | | |
220 | | // create IO context on heap because this call will no longer be on the stack |
221 | | // when IO is actually performed through the callbacks. The close function must |
222 | | // free the memory which is indicated by the freeOnClose field in the context struct |
223 | 0 | context_t *c = new context_t; |
224 | 0 | c->rInputStream = src.aInputStream; |
225 | 0 | c->close = true; |
226 | 0 | c->freeOnClose = true; |
227 | | |
228 | | // set up the inputBuffer and inputPtr for libxml |
229 | 0 | xmlParserInputBufferPtr pBuffer = |
230 | 0 | xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE); |
231 | 0 | xmlParserInputPtr pInput = |
232 | 0 | xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE); |
233 | 0 | return pInput; |
234 | 0 | } |
235 | | |
236 | | #if 0 |
237 | | static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt) |
238 | | { |
239 | | // just call our resolver function using the URL as systemId |
240 | | return resolve_func(ctxt, 0, (const xmlChar*)URL); |
241 | | } |
242 | | #endif |
243 | | |
244 | | // default warning handler does not trigger assertion |
245 | | static void warning_func(void * ctx, const char * /*msg*/, ...) |
246 | 221 | { |
247 | 221 | try |
248 | 221 | { |
249 | 221 | xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx); |
250 | | |
251 | 221 | SAL_INFO( |
252 | 221 | "unoxml", |
253 | 221 | "libxml2 warning: " |
254 | 221 | << make_error_message(pctx)); |
255 | | |
256 | 221 | CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private); |
257 | | |
258 | 221 | Reference<XErrorHandler> xErrorHandler = pDocBuilder->getErrorHandler(); |
259 | 221 | if (xErrorHandler.is()) // if custom error handler is set (using setErrorHandler ()) |
260 | 0 | { |
261 | | // Prepare SAXParseException to be passed to custom XErrorHandler::warning function |
262 | 0 | const xmlError* lastError = xmlCtxtGetLastError(pctx); |
263 | 0 | css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {}, |
264 | 0 | lastError->line, lastError->int2); |
265 | | |
266 | | // Call custom warning function |
267 | 0 | xErrorHandler->warning(::css::uno::Any(saxex)); |
268 | 0 | } |
269 | 221 | } |
270 | 221 | catch (const css::uno::Exception &) |
271 | 221 | { |
272 | | // Protect lib2xml from UNO Exception |
273 | 0 | TOOLS_WARN_EXCEPTION("unoxml", "DOM::warning_func"); |
274 | 0 | } |
275 | 221 | } |
276 | | |
277 | | // default error handler triggers assertion |
278 | | static void error_func(void * ctx, const char * /*msg*/, ...) |
279 | 20.5k | { |
280 | 20.5k | try |
281 | 20.5k | { |
282 | 20.5k | xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx); |
283 | 20.5k | SAL_WARN( |
284 | 20.5k | "unoxml", |
285 | 20.5k | "libxml2 error: " |
286 | 20.5k | << make_error_message(pctx)); |
287 | | |
288 | 20.5k | CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private); |
289 | | |
290 | 20.5k | Reference<XErrorHandler> xErrorHandler = pDocBuilder->getErrorHandler(); |
291 | 20.5k | if (xErrorHandler.is()) // if custom error handler is set (using setErrorHandler ()) |
292 | 0 | { |
293 | | // Prepare SAXParseException to be passed to custom XErrorHandler::error function |
294 | 0 | const xmlError* lastError = xmlCtxtGetLastError(pctx); |
295 | 0 | css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {}, |
296 | 0 | lastError->line, lastError->int2); |
297 | | |
298 | | // Call custom warning function |
299 | 0 | xErrorHandler->error(::css::uno::Any(saxex)); |
300 | 0 | } |
301 | 20.5k | } |
302 | 20.5k | catch (const css::uno::Exception &) |
303 | 20.5k | { |
304 | | // Protect lib2xml from UNO Exception |
305 | 0 | TOOLS_WARN_EXCEPTION("unoxml", "DOM::error_func"); |
306 | 0 | } |
307 | 20.5k | } |
308 | | } // extern "C" |
309 | | |
310 | | static void throwEx(xmlParserCtxtPtr ctxt) |
311 | 322 | { |
312 | 322 | const xmlError* lastError = xmlCtxtGetLastError(ctxt); |
313 | 322 | css::xml::sax::SAXParseException saxex(make_error_message(ctxt), {}, {}, {}, {}, |
314 | 322 | lastError->line, lastError->int2); |
315 | 322 | throw saxex; |
316 | 322 | } |
317 | | |
318 | | namespace { |
319 | | |
320 | | struct XmlFreeParserCtxt { |
321 | 6.01k | void operator ()(xmlParserCtxt * p) const { xmlFreeParserCtxt(p); } |
322 | | }; |
323 | | |
324 | | } |
325 | | |
326 | | Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is) |
327 | 6.01k | { |
328 | 6.01k | if (!is.is()) { |
329 | 0 | throw RuntimeException(); |
330 | 0 | } |
331 | | |
332 | 6.01k | std::scoped_lock const g(m_Mutex); |
333 | | |
334 | | // IO context struct. Must outlive pContext, as destroying that via |
335 | | // xmlFreeParserCtxt may still access this context_t |
336 | 6.01k | context_t c; |
337 | 6.01k | c.rInputStream = is; |
338 | | // we did not open the stream, thus we do not close it. |
339 | 6.01k | c.close = false; |
340 | 6.01k | c.freeOnClose = false; |
341 | | |
342 | 6.01k | std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext( |
343 | 6.01k | xmlNewParserCtxt()); |
344 | | |
345 | | // register error functions to prevent errors being printed |
346 | | // on the console |
347 | 6.01k | pContext->_private = this; |
348 | 6.01k | pContext->sax->error = error_func; |
349 | 6.01k | pContext->sax->warning = warning_func; |
350 | 6.01k | pContext->sax->resolveEntity = resolve_func; |
351 | | |
352 | 6.01k | xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(), |
353 | 6.01k | xmlIO_read_func, xmlIO_close_func, &c, nullptr, nullptr, 0); |
354 | | |
355 | 6.01k | if (pDoc == nullptr) { |
356 | 322 | throwEx(pContext.get()); |
357 | 322 | } |
358 | 6.01k | return CDocument::CreateCDocument(pDoc); |
359 | 6.01k | } |
360 | | |
361 | | Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri) |
362 | 0 | { |
363 | 0 | std::scoped_lock const g(m_Mutex); |
364 | |
|
365 | 0 | std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext( |
366 | 0 | xmlNewParserCtxt()); |
367 | 0 | pContext->_private = this; |
368 | 0 | pContext->sax->error = error_func; |
369 | 0 | pContext->sax->warning = warning_func; |
370 | 0 | pContext->sax->resolveEntity = resolve_func; |
371 | | // xmlSetExternalEntityLoader(external_entity_loader); |
372 | 0 | OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8); |
373 | 0 | char *uri = const_cast<char*>(oUri.getStr()); |
374 | 0 | xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, nullptr, 0); |
375 | |
|
376 | 0 | Reference< XDocument > xRet; |
377 | | |
378 | | // if we failed to parse the URI as a simple file, let's try via a ucb stream. |
379 | | // For Android file:///assets/ URLs which must go via the osl/ file API. |
380 | 0 | if (pDoc == nullptr) { |
381 | 0 | Reference < XSimpleFileAccess3 > xStreamAccess( |
382 | 0 | SimpleFileAccess::create( comphelper::getProcessComponentContext() ) ); |
383 | 0 | Reference< XInputStream > xInStream = xStreamAccess->openFileRead( sUri ); |
384 | 0 | if (!xInStream.is()) |
385 | 0 | throwEx(pContext.get()); |
386 | | |
387 | | // loop over every layout entry in current file |
388 | 0 | xRet = parse( xInStream ); |
389 | |
|
390 | 0 | xInStream->closeInput(); |
391 | 0 | xInStream.clear(); |
392 | |
|
393 | 0 | } else |
394 | 0 | xRet = CDocument::CreateCDocument(pDoc).get(); |
395 | |
|
396 | 0 | return xRet; |
397 | 0 | } |
398 | | |
399 | | void SAL_CALL |
400 | | CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER) |
401 | 0 | { |
402 | 0 | std::scoped_lock const g(m_Mutex); |
403 | |
|
404 | 0 | m_xEntityResolver = xER; |
405 | 0 | } |
406 | | |
407 | | Reference< XEntityResolver > CDocumentBuilder::getEntityResolver() |
408 | 0 | { |
409 | 0 | std::scoped_lock const g(m_Mutex); |
410 | |
|
411 | 0 | return m_xEntityResolver; |
412 | 0 | } |
413 | | |
414 | | void SAL_CALL |
415 | | CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH) |
416 | 0 | { |
417 | 0 | std::scoped_lock const g(m_Mutex); |
418 | |
|
419 | 0 | m_xErrorHandler = xEH; |
420 | 0 | } |
421 | | |
422 | | Reference< XErrorHandler > CDocumentBuilder::getErrorHandler() |
423 | 20.7k | { |
424 | 20.7k | std::scoped_lock const g(m_Mutex); |
425 | | |
426 | 20.7k | return m_xErrorHandler; |
427 | 20.7k | } |
428 | | } |
429 | | |
430 | | extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* |
431 | | unoxml_CDocumentBuilder_get_implementation( |
432 | | css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) |
433 | 257k | { |
434 | 257k | return cppu::acquire(new DOM::CDocumentBuilder()); |
435 | 257k | } |
436 | | |
437 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |