/src/libreoffice/sdext/source/pdfimport/wrapper/wrapper.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <config_folders.h> |
21 | | |
22 | | #include <contentsink.hxx> |
23 | | #include <wrapper.hxx> |
24 | | |
25 | | #include <o3tl/string_view.hxx> |
26 | | #include <osl/file.h> |
27 | | #include <osl/file.hxx> |
28 | | #include <osl/thread.h> |
29 | | #include <osl/process.h> |
30 | | #include <osl/diagnose.h> |
31 | | #include <rtl/bootstrap.hxx> |
32 | | #include <rtl/ustring.hxx> |
33 | | #include <rtl/strbuf.hxx> |
34 | | #include <sal/log.hxx> |
35 | | |
36 | | #include <comphelper/propertysequence.hxx> |
37 | | #include <comphelper/string.hxx> |
38 | | #include <com/sun/star/io/XInputStream.hpp> |
39 | | #include <com/sun/star/uno/XComponentContext.hpp> |
40 | | #include <com/sun/star/rendering/PathCapType.hpp> |
41 | | #include <com/sun/star/rendering/XPolyPolygon2D.hpp> |
42 | | #include <com/sun/star/geometry/Matrix2D.hpp> |
43 | | #include <com/sun/star/geometry/AffineMatrix2D.hpp> |
44 | | #include <com/sun/star/geometry/RealRectangle2D.hpp> |
45 | | #include <com/sun/star/geometry/RealSize2D.hpp> |
46 | | #include <com/sun/star/task/XInteractionHandler.hpp> |
47 | | |
48 | | #include <basegfx/point/b2dpoint.hxx> |
49 | | #include <basegfx/polygon/b2dpolypolygon.hxx> |
50 | | #include <basegfx/polygon/b2dpolygon.hxx> |
51 | | #include <basegfx/utils/unopolypolygon.hxx> |
52 | | #include <basegfx/vector/b2enums.hxx> |
53 | | |
54 | | #include <vcl/metric.hxx> |
55 | | #include <vcl/font.hxx> |
56 | | #include <vcl/pdf/pwdinteract.hxx> |
57 | | #include <vcl/virdev.hxx> |
58 | | |
59 | | #include <cstddef> |
60 | | #include <memory> |
61 | | #include <string_view> |
62 | | #include <unordered_map> |
63 | | #include <vector> |
64 | | #include <string.h> |
65 | | |
66 | | using namespace com::sun::star; |
67 | | |
68 | | namespace pdfi |
69 | | { |
70 | | |
71 | | namespace |
72 | | { |
73 | | |
74 | | // identifier of the strings coming from the out-of-process xpdf |
75 | | // converter |
76 | | enum parseKey { |
77 | | BEGINTRANSPARENCYGROUP, |
78 | | CLIPPATH, |
79 | | CLIPTOSTROKEPATH, |
80 | | DRAWCHAR, |
81 | | DRAWIMAGE, |
82 | | DRAWLINK, |
83 | | DRAWMASK, |
84 | | DRAWMASKEDIMAGE, |
85 | | DRAWSOFTMASKEDIMAGE, |
86 | | ENDPAGE, |
87 | | ENDTEXTOBJECT, |
88 | | ENDTRANSPARENCYGROUP, |
89 | | EOCLIPPATH, |
90 | | EOFILLPATH, |
91 | | FILLPATH, |
92 | | HYPERLINK, |
93 | | INTERSECTCLIP, |
94 | | INTERSECTEOCLIP, |
95 | | POPSTATE, |
96 | | PUSHSTATE, |
97 | | RESTORESTATE, |
98 | | SAVESTATE, |
99 | | SETBLENDMODE, |
100 | | SETFILLCOLOR, |
101 | | SETFONT, |
102 | | SETLINECAP, |
103 | | SETLINEDASH, |
104 | | SETLINEJOIN, |
105 | | SETLINEWIDTH, |
106 | | SETMITERLIMIT, |
107 | | SETPAGENUM, |
108 | | SETSTROKECOLOR, |
109 | | SETTEXTRENDERMODE, |
110 | | SETTRANSFORMATION, |
111 | | STARTPAGE, |
112 | | STROKEPATH, |
113 | | TILINGPATTERNFILL, |
114 | | UPDATEBLENDMODE, |
115 | | UPDATECTM, |
116 | | UPDATEFILLCOLOR, |
117 | | UPDATEFILLOPACITY, |
118 | | UPDATEFLATNESS, |
119 | | UPDATEFONT, |
120 | | UPDATELINECAP, |
121 | | UPDATELINEDASH, |
122 | | UPDATELINEJOIN, |
123 | | UPDATELINEWIDTH, |
124 | | UPDATEMITERLIMIT, |
125 | | UPDATESTROKECOLOR, |
126 | | UPDATESTROKEOPACITY, |
127 | | NONE |
128 | | }; |
129 | | |
130 | | #if defined _MSC_VER && defined __clang__ |
131 | | #pragma clang diagnostic push |
132 | | #pragma clang diagnostic ignored "-Wdeprecated-register" |
133 | | #pragma clang diagnostic ignored "-Wextra-tokens" |
134 | | #endif |
135 | | #include <hash.cxx> |
136 | | #if defined _MSC_VER && defined __clang__ |
137 | | #pragma clang diagnostic pop |
138 | | #endif |
139 | | |
140 | | class Parser |
141 | | { |
142 | | friend class LineParser; |
143 | | |
144 | | typedef std::unordered_map< sal_Int64, |
145 | | FontAttributes > FontMapType; |
146 | | |
147 | | ScopedVclPtr<VirtualDevice> m_xDev; |
148 | | const uno::Reference<uno::XComponentContext> m_xContext; |
149 | | const ContentSinkSharedPtr m_pSink; |
150 | | const oslFileHandle m_pErr; |
151 | | FontMapType m_aFontMap; |
152 | | |
153 | | public: |
154 | | Parser( const ContentSinkSharedPtr& rSink, |
155 | | oslFileHandle pErr, |
156 | | const uno::Reference<uno::XComponentContext>& xContext ) : |
157 | 0 | m_xContext(xContext), |
158 | 0 | m_pSink(rSink), |
159 | 0 | m_pErr(pErr), |
160 | 0 | m_aFontMap(101) |
161 | 0 | {} |
162 | | |
163 | | void parseLine( std::string_view aLine ); |
164 | | }; |
165 | | |
166 | | class LineParser { |
167 | | Parser & m_parser; |
168 | | std::string_view m_aLine; |
169 | | |
170 | | static void parseFontFamilyName( FontAttributes& aResult ); |
171 | | void readInt32( sal_Int32& o_Value ); |
172 | | void readInt64( sal_Int64& o_Value ); |
173 | | void readDouble( double& o_Value ); |
174 | | void readBinaryData( uno::Sequence<sal_Int8>& rBuf ); |
175 | | |
176 | | uno::Sequence<beans::PropertyValue> readImageImpl(); |
177 | | |
178 | | public: |
179 | | std::size_t m_nCharIndex = 0; |
180 | | |
181 | 0 | LineParser(Parser & parser, std::string_view line): m_parser(parser), m_aLine(line) {} |
182 | | |
183 | | std::string_view readNextToken(); |
184 | | sal_Int32 readInt32(); |
185 | | double readDouble(); |
186 | | |
187 | | uno::Reference<rendering::XPolyPolygon2D> readPath(); |
188 | | |
189 | | void readChar(); |
190 | | void readLineCap(); |
191 | | void readLineDash(); |
192 | | void readLineJoin(); |
193 | | void readTransformation(); |
194 | | rendering::ARGBColor readColor(); |
195 | | void readFont(); |
196 | | |
197 | | void readImage(); |
198 | | void readMask(); |
199 | | void readLink(); |
200 | | void readMaskedImage(); |
201 | | void readSoftMaskedImage(); |
202 | | void readTilingPatternFill(); |
203 | | void beginTransparencyGroup(); |
204 | | void endTransparencyGroup(); |
205 | | }; |
206 | | |
207 | | /** Unescapes line-ending characters in input string. These |
208 | | characters are encoded as pairs of characters: '\\' 'n', resp. |
209 | | '\\' 'r'. This function converts them back to '\n', resp. '\r'. |
210 | | */ |
211 | | OString lcl_unescapeLineFeeds(std::string_view i_rStr) |
212 | 0 | { |
213 | 0 | const size_t nOrigLen(i_rStr.size()); |
214 | 0 | const char* const pOrig(i_rStr.data()); |
215 | 0 | std::unique_ptr<char[]> pBuffer(new char[nOrigLen + 1]); |
216 | |
|
217 | 0 | const char* pRead(pOrig); |
218 | 0 | char* pWrite(pBuffer.get()); |
219 | 0 | const char* pCur(pOrig); |
220 | 0 | while ((pCur = strchr(pCur, '\\')) != nullptr) |
221 | 0 | { |
222 | 0 | const char cNext(pCur[1]); |
223 | 0 | if (cNext == 'n' || cNext == 'r' || cNext == '\\') |
224 | 0 | { |
225 | 0 | const size_t nLen(pCur - pRead); |
226 | 0 | strncpy(pWrite, pRead, nLen); |
227 | 0 | pWrite += nLen; |
228 | 0 | *pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\'); |
229 | 0 | ++pWrite; |
230 | 0 | pCur = pRead = pCur + 2; |
231 | 0 | } |
232 | 0 | else |
233 | 0 | { |
234 | | // Just continue on the next character. The current |
235 | | // block will be copied the next time it goes through the |
236 | | // 'if' branch. |
237 | 0 | ++pCur; |
238 | 0 | } |
239 | 0 | } |
240 | | // maybe there are some data to copy yet |
241 | 0 | if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen) |
242 | 0 | { |
243 | 0 | const size_t nLen(nOrigLen - (pRead - pOrig)); |
244 | 0 | strncpy(pWrite, pRead, nLen); |
245 | 0 | pWrite += nLen; |
246 | 0 | } |
247 | 0 | *pWrite = '\0'; |
248 | |
|
249 | 0 | OString aResult(pBuffer.get()); |
250 | 0 | return aResult; |
251 | 0 | } |
252 | | |
253 | | std::string_view LineParser::readNextToken() |
254 | 0 | { |
255 | 0 | if (m_nCharIndex == std::string_view::npos) { |
256 | 0 | SAL_WARN("sdext.pdfimport", "insufficient input"); |
257 | 0 | return {}; |
258 | 0 | } |
259 | 0 | return o3tl::getToken(m_aLine,' ',m_nCharIndex); |
260 | 0 | } |
261 | | |
262 | | void LineParser::readInt32( sal_Int32& o_Value ) |
263 | 0 | { |
264 | 0 | std::string_view tok = readNextToken(); |
265 | 0 | o_Value = o3tl::toInt32(tok); |
266 | 0 | } |
267 | | |
268 | | sal_Int32 LineParser::readInt32() |
269 | 0 | { |
270 | 0 | std::string_view tok = readNextToken(); |
271 | 0 | return o3tl::toInt32(tok); |
272 | 0 | } |
273 | | |
274 | | void LineParser::readInt64( sal_Int64& o_Value ) |
275 | 0 | { |
276 | 0 | std::string_view tok = readNextToken(); |
277 | 0 | o_Value = o3tl::toInt64(tok); |
278 | 0 | } |
279 | | |
280 | | void LineParser::readDouble( double& o_Value ) |
281 | 0 | { |
282 | 0 | std::string_view tok = readNextToken(); |
283 | 0 | o_Value = rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0, |
284 | 0 | nullptr, nullptr); |
285 | 0 | } |
286 | | |
287 | | double LineParser::readDouble() |
288 | 0 | { |
289 | 0 | std::string_view tok = readNextToken(); |
290 | 0 | return rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0, |
291 | 0 | nullptr, nullptr); |
292 | 0 | } |
293 | | |
294 | | void LineParser::readBinaryData( uno::Sequence<sal_Int8>& rBuf ) |
295 | 0 | { |
296 | 0 | sal_Int32 nFileLen( rBuf.getLength() ); |
297 | 0 | sal_Int8* pBuf( rBuf.getArray() ); |
298 | 0 | sal_uInt64 nBytesRead(0); |
299 | 0 | oslFileError nRes=osl_File_E_None; |
300 | 0 | while( nFileLen ) |
301 | 0 | { |
302 | 0 | nRes = osl_readFile( m_parser.m_pErr, pBuf, nFileLen, &nBytesRead ); |
303 | 0 | if (osl_File_E_None != nRes ) |
304 | 0 | break; |
305 | 0 | pBuf += nBytesRead; |
306 | 0 | nFileLen -= sal::static_int_cast<sal_Int32>(nBytesRead); |
307 | 0 | } |
308 | |
|
309 | 0 | OSL_PRECOND(nRes==osl_File_E_None, "inconsistent data"); |
310 | 0 | } |
311 | | |
312 | | uno::Reference<rendering::XPolyPolygon2D> LineParser::readPath() |
313 | 0 | { |
314 | 0 | static const std::string_view aSubPathMarker( "subpath" ); |
315 | |
|
316 | 0 | if( readNextToken() != aSubPathMarker ) |
317 | 0 | OSL_PRECOND(false, "broken path"); |
318 | |
|
319 | 0 | basegfx::B2DPolyPolygon aResult; |
320 | 0 | while( m_nCharIndex != std::string_view::npos ) |
321 | 0 | { |
322 | 0 | basegfx::B2DPolygon aSubPath; |
323 | |
|
324 | 0 | sal_Int32 nClosedFlag; |
325 | 0 | readInt32( nClosedFlag ); |
326 | 0 | aSubPath.setClosed( nClosedFlag != 0 ); |
327 | |
|
328 | 0 | sal_Int32 nContiguousControlPoints(0); |
329 | |
|
330 | 0 | while( m_nCharIndex != std::string_view::npos ) |
331 | 0 | { |
332 | 0 | std::size_t nDummy=m_nCharIndex; |
333 | 0 | if (o3tl::getToken(m_aLine,' ',nDummy) == aSubPathMarker) { |
334 | 0 | break; |
335 | 0 | } |
336 | | |
337 | 0 | sal_Int32 nCurveFlag; |
338 | 0 | double nX, nY; |
339 | 0 | readDouble( nX ); |
340 | 0 | readDouble( nY ); |
341 | 0 | readInt32( nCurveFlag ); |
342 | |
|
343 | 0 | aSubPath.append(basegfx::B2DPoint(nX,nY)); |
344 | 0 | if( nCurveFlag ) |
345 | 0 | { |
346 | 0 | ++nContiguousControlPoints; |
347 | 0 | } |
348 | 0 | else if( nContiguousControlPoints ) |
349 | 0 | { |
350 | 0 | OSL_PRECOND(nContiguousControlPoints==2,"broken bezier path"); |
351 | | |
352 | | // have two control points before us. the current one |
353 | | // is a normal point - thus, convert previous points |
354 | | // into bezier segment |
355 | 0 | const sal_uInt32 nPoints( aSubPath.count() ); |
356 | 0 | const basegfx::B2DPoint aCtrlA( aSubPath.getB2DPoint(nPoints-3) ); |
357 | 0 | const basegfx::B2DPoint aCtrlB( aSubPath.getB2DPoint(nPoints-2) ); |
358 | 0 | const basegfx::B2DPoint aEnd( aSubPath.getB2DPoint(nPoints-1) ); |
359 | 0 | aSubPath.remove(nPoints-3, 3); |
360 | 0 | aSubPath.appendBezierSegment(aCtrlA, aCtrlB, aEnd); |
361 | |
|
362 | 0 | nContiguousControlPoints=0; |
363 | 0 | } |
364 | 0 | } |
365 | |
|
366 | 0 | aResult.append( aSubPath ); |
367 | 0 | if( m_nCharIndex != std::string_view::npos ) |
368 | 0 | readNextToken(); |
369 | 0 | } |
370 | |
|
371 | 0 | return static_cast<rendering::XLinePolyPolygon2D*>( |
372 | 0 | new basegfx::unotools::UnoPolyPolygon(std::move(aResult))); |
373 | 0 | } |
374 | | |
375 | | void LineParser::readChar() |
376 | 0 | { |
377 | 0 | double fontSize; |
378 | 0 | geometry::Matrix2D aUnoMatrix; |
379 | 0 | geometry::RealRectangle2D aRect; |
380 | |
|
381 | 0 | readDouble(aRect.X1); |
382 | 0 | readDouble(aRect.Y1); |
383 | 0 | readDouble(aRect.X2); |
384 | 0 | readDouble(aRect.Y2); |
385 | 0 | readDouble(aUnoMatrix.m00); |
386 | 0 | readDouble(aUnoMatrix.m01); |
387 | 0 | readDouble(aUnoMatrix.m10); |
388 | 0 | readDouble(aUnoMatrix.m11); |
389 | 0 | readDouble(fontSize); |
390 | |
|
391 | 0 | OString aChars; |
392 | |
|
393 | 0 | if (m_nCharIndex != std::string_view::npos) |
394 | 0 | aChars = lcl_unescapeLineFeeds( m_aLine.substr( m_nCharIndex ) ); |
395 | | |
396 | | // chars gobble up rest of line |
397 | 0 | m_nCharIndex = std::string_view::npos; |
398 | |
|
399 | 0 | m_parser.m_pSink->drawGlyphs(OStringToOUString(aChars, RTL_TEXTENCODING_UTF8), |
400 | 0 | aRect, aUnoMatrix, fontSize); |
401 | 0 | } |
402 | | |
403 | | void LineParser::readLineCap() |
404 | 0 | { |
405 | 0 | sal_Int8 nCap(rendering::PathCapType::BUTT); |
406 | 0 | switch( readInt32() ) |
407 | 0 | { |
408 | 0 | default: |
409 | 0 | case 0: nCap = rendering::PathCapType::BUTT; break; |
410 | 0 | case 1: nCap = rendering::PathCapType::ROUND; break; |
411 | 0 | case 2: nCap = rendering::PathCapType::SQUARE; break; |
412 | 0 | } |
413 | 0 | m_parser.m_pSink->setLineCap(nCap); |
414 | 0 | } |
415 | | |
416 | | void LineParser::readLineDash() |
417 | 0 | { |
418 | 0 | if( m_nCharIndex == std::string_view::npos ) |
419 | 0 | { |
420 | 0 | m_parser.m_pSink->setLineDash( uno::Sequence<double>(), 0.0 ); |
421 | 0 | return; |
422 | 0 | } |
423 | | |
424 | 0 | const double nOffset(readDouble()); |
425 | 0 | const sal_Int32 nLen(readInt32()); |
426 | |
|
427 | 0 | uno::Sequence<double> aDashArray(nLen); |
428 | 0 | double* pArray=aDashArray.getArray(); |
429 | 0 | for( sal_Int32 i=0; i<nLen; ++i ) |
430 | 0 | *pArray++ = readDouble(); |
431 | |
|
432 | 0 | m_parser.m_pSink->setLineDash( aDashArray, nOffset ); |
433 | 0 | } |
434 | | |
435 | | void LineParser::readLineJoin() |
436 | 0 | { |
437 | 0 | basegfx::B2DLineJoin nJoin(basegfx::B2DLineJoin::Miter); |
438 | 0 | switch( readInt32() ) |
439 | 0 | { |
440 | 0 | default: |
441 | 0 | case 0: nJoin = basegfx::B2DLineJoin::Miter; break; |
442 | 0 | case 1: nJoin = basegfx::B2DLineJoin::Round; break; |
443 | 0 | case 2: nJoin = basegfx::B2DLineJoin::Bevel; break; |
444 | 0 | } |
445 | 0 | m_parser.m_pSink->setLineJoin(nJoin); |
446 | 0 | } |
447 | | |
448 | | void LineParser::readTransformation() |
449 | 0 | { |
450 | 0 | geometry::AffineMatrix2D aMat; |
451 | 0 | readDouble(aMat.m00); |
452 | 0 | readDouble(aMat.m10); |
453 | 0 | readDouble(aMat.m01); |
454 | 0 | readDouble(aMat.m11); |
455 | 0 | readDouble(aMat.m02); |
456 | 0 | readDouble(aMat.m12); |
457 | 0 | m_parser.m_pSink->setTransformation( aMat ); |
458 | 0 | } |
459 | | |
460 | | rendering::ARGBColor LineParser::readColor() |
461 | 0 | { |
462 | 0 | rendering::ARGBColor aRes; |
463 | 0 | readDouble(aRes.Red); |
464 | 0 | readDouble(aRes.Green); |
465 | 0 | readDouble(aRes.Blue); |
466 | 0 | readDouble(aRes.Alpha); |
467 | 0 | return aRes; |
468 | 0 | } |
469 | | |
470 | | /* Parse and convert the font family name (passed from xpdfimport) to correct font names |
471 | | e.g. TimesNewRomanPSMT -> TimesNewRoman |
472 | | TimesNewRomanPS-BoldMT -> TimesNewRoman |
473 | | TimesNewRomanPS-BoldItalicMT -> TimesNewRoman |
474 | | During the conversion, also apply the font features (bold italic etc) to the result. |
475 | | |
476 | | TODO: Further convert the font names to real font names in the system rather than the PS names. |
477 | | e.g., TimesNewRoman -> Times New Roman |
478 | | */ |
479 | | void LineParser::parseFontFamilyName( FontAttributes& rResult ) |
480 | 0 | { |
481 | 0 | SAL_INFO("sdext.pdfimport", "Processing " << rResult.familyName << " ---"); |
482 | 0 | rResult.familyName = rResult.familyName.trim(); |
483 | 0 | for (const OUString& fontAttributesSuffix: fontAttributesSuffixes) |
484 | 0 | { |
485 | 0 | if ( rResult.familyName.endsWith(fontAttributesSuffix) ) |
486 | 0 | { |
487 | 0 | rResult.familyName = rResult.familyName.replaceAll(fontAttributesSuffix, ""); |
488 | 0 | SAL_INFO("sdext.pdfimport", rResult.familyName); |
489 | 0 | if (fontAttributesSuffix == u"Heavy" || fontAttributesSuffix == u"Black") |
490 | 0 | { |
491 | 0 | rResult.fontWeight = u"900"_ustr; |
492 | 0 | } |
493 | 0 | else if (fontAttributesSuffix == u"ExtraBold" || fontAttributesSuffix == u"UltraBold") |
494 | 0 | { |
495 | 0 | rResult.fontWeight = u"800"_ustr; |
496 | 0 | } |
497 | 0 | else if (fontAttributesSuffix == u"Bold") |
498 | 0 | { |
499 | 0 | rResult.fontWeight = u"bold"_ustr; |
500 | 0 | } |
501 | 0 | else if (fontAttributesSuffix == u"Semibold") |
502 | 0 | { |
503 | 0 | rResult.fontWeight = u"600"_ustr; |
504 | 0 | } |
505 | 0 | else if (fontAttributesSuffix == u"Medium") |
506 | 0 | { |
507 | 0 | rResult.fontWeight = u"500"_ustr; |
508 | 0 | } |
509 | 0 | else if (fontAttributesSuffix == u"Normal" || fontAttributesSuffix == u"Regular" || fontAttributesSuffix == u"Book") |
510 | 0 | { |
511 | 0 | rResult.fontWeight = u"400"_ustr; |
512 | 0 | } |
513 | 0 | else if (fontAttributesSuffix == u"Light") |
514 | 0 | { |
515 | 0 | rResult.fontWeight = u"300"_ustr; |
516 | 0 | } |
517 | 0 | else if (fontAttributesSuffix == u"ExtraLight" || fontAttributesSuffix == u"UltraLight") |
518 | 0 | { |
519 | 0 | rResult.fontWeight = u"200"_ustr; |
520 | 0 | } |
521 | 0 | else if (fontAttributesSuffix == u"Thin") |
522 | 0 | { |
523 | 0 | rResult.fontWeight = u"100"_ustr; |
524 | 0 | } |
525 | |
|
526 | 0 | if ( (fontAttributesSuffix == "Italic") or (fontAttributesSuffix == "Oblique") ) |
527 | 0 | { |
528 | 0 | rResult.isItalic = true; |
529 | 0 | } |
530 | 0 | } |
531 | 0 | } |
532 | 0 | } |
533 | | |
534 | | void LineParser::readFont() |
535 | 0 | { |
536 | | /* |
537 | | xpdf line is like (separated by space): |
538 | | updateFont <FontID> <isEmbedded> <maFontWeight> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName> |
539 | | updateFont 14 1 4 0 0 1200.000000 23068 TimesNewRomanPSMT |
540 | | |
541 | | If nEmbedSize > 0, then a fontFile is followed as a stream. |
542 | | */ |
543 | 0 | sal_Int64 nFontID; |
544 | 0 | sal_Int32 nIsEmbedded; |
545 | 0 | sal_Int32 nFontWeight; |
546 | 0 | sal_Int32 nIsItalic; |
547 | 0 | sal_Int32 nIsUnderline; |
548 | 0 | double nSize; |
549 | 0 | sal_Int32 nFileLen; |
550 | 0 | OString aFontName; |
551 | |
|
552 | 0 | readInt64(nFontID); // read FontID |
553 | 0 | readInt32(nIsEmbedded); // read isEmbedded |
554 | 0 | readInt32(nFontWeight); // read maFontWeight, see GfxFont enum Weight |
555 | 0 | readInt32(nIsItalic); // read isItalic |
556 | 0 | readInt32(nIsUnderline);// read isUnderline |
557 | 0 | readDouble(nSize); // read TransformedFontSize |
558 | 0 | readInt32(nFileLen); // read nEmbedSize |
559 | |
|
560 | 0 | nSize = nSize < 0.0 ? -nSize : nSize; |
561 | | // Read FontName. From the current position to the end (any white spaces will be included). |
562 | 0 | aFontName = lcl_unescapeLineFeeds(m_aLine.substr(m_nCharIndex)); |
563 | | |
564 | | // name gobbles up rest of line |
565 | 0 | m_nCharIndex = std::string_view::npos; |
566 | | |
567 | | // Check if this font is already in our font map list. |
568 | | // If yes, update the font size and skip. |
569 | 0 | Parser::FontMapType::const_iterator pFont( m_parser.m_aFontMap.find(nFontID) ); |
570 | 0 | if( pFont != m_parser.m_aFontMap.end() ) |
571 | 0 | { |
572 | 0 | OSL_PRECOND(nFileLen==0,"font data for known font"); |
573 | 0 | FontAttributes aRes(pFont->second); |
574 | 0 | aRes.size = nSize; |
575 | 0 | m_parser.m_pSink->setFont( aRes ); |
576 | |
|
577 | 0 | return; |
578 | 0 | } |
579 | | |
580 | | // The font is not yet in the map list - get info and add to map |
581 | 0 | OUString sFontWeight; // font weight name per ODF specifications |
582 | 0 | if (nFontWeight == 0 or nFontWeight == 4) // WeightNotDefined or W400, map to normal font |
583 | 0 | sFontWeight = u"normal"_ustr; |
584 | 0 | else if (nFontWeight == 1) // W100, Thin |
585 | 0 | sFontWeight = u"100"_ustr; |
586 | 0 | else if (nFontWeight == 2) // W200, Extra-Light |
587 | 0 | sFontWeight = u"200"_ustr; |
588 | 0 | else if (nFontWeight == 3) // W300, Light |
589 | 0 | sFontWeight = u"300"_ustr; |
590 | 0 | else if (nFontWeight == 5) // W500, Medium. Is this supported by ODF? |
591 | 0 | sFontWeight = u"500"_ustr; |
592 | 0 | else if (nFontWeight == 6) // W600, Semi-Bold |
593 | 0 | sFontWeight = u"600"_ustr; |
594 | 0 | else if (nFontWeight == 7) // W700, Bold |
595 | 0 | sFontWeight = u"bold"_ustr; |
596 | 0 | else if (nFontWeight == 8) // W800, Extra-Bold |
597 | 0 | sFontWeight = u"800"_ustr; |
598 | 0 | else if (nFontWeight == 9) // W900, Black |
599 | 0 | sFontWeight = u"900"_ustr; |
600 | 0 | SAL_INFO("sdext.pdfimport", "Font weight passed from xpdfimport is: " << sFontWeight); |
601 | | |
602 | 0 | FontAttributes aResult( OStringToOUString( aFontName, RTL_TEXTENCODING_UTF8 ), |
603 | 0 | sFontWeight, |
604 | 0 | nIsItalic != 0, |
605 | 0 | nIsUnderline != 0, |
606 | 0 | nSize, |
607 | 0 | 1.0); |
608 | | |
609 | | /* The above font attributes (fontName, fontWeight, italic) are based on |
610 | | xpdf line output and may not be reliable. To get correct attributes, |
611 | | we do the following: |
612 | | 1. Read the embedded font file and determine the attributes based on the |
613 | | font file. |
614 | | 2. If we failed to read the font file, or empty result is returned, then |
615 | | determine the font attributes from the font name. |
616 | | 3. If all these attempts have failed, then use a fallback font. |
617 | | */ |
618 | 0 | if (nFileLen > 0) |
619 | 0 | { |
620 | 0 | uno::Sequence<sal_Int8> aFontFile(nFileLen); |
621 | 0 | readBinaryData(aFontFile); // Read fontFile. |
622 | |
|
623 | 0 | vcl::Font aFontReadResult = vcl::Font::identifyFont(aFontFile.getArray(), nFileLen); |
624 | 0 | SAL_INFO("sdext.pdfimport", "familyName: " << aFontReadResult.GetFamilyName()); |
625 | | |
626 | 0 | if (!aFontReadResult.GetFamilyName().isEmpty()) // font detection successful |
627 | 0 | { |
628 | | // Family name |
629 | 0 | aResult.familyName = aFontReadResult.GetFamilyName(); |
630 | 0 | SAL_INFO("sdext.pdfimport", aResult.familyName); |
631 | | // tdf#143959: there are cases when the family name returned by font descriptor |
632 | | // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name |
633 | | // determined by parseFontFamilyName instead, but still determine the font |
634 | | // attributes (bold italic etc) from the font descriptor. |
635 | 0 | if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6) |
636 | 0 | { |
637 | 0 | aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7); |
638 | 0 | parseFontFamilyName(aResult); |
639 | 0 | } |
640 | 0 | if (aResult.familyName.endsWithIgnoreAsciiCase("-VKana")) |
641 | 0 | { |
642 | 0 | parseFontFamilyName(aResult); |
643 | 0 | } |
644 | | |
645 | | // Font weight |
646 | 0 | if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_THIN) |
647 | 0 | aResult.fontWeight = u"100"_ustr; |
648 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_ULTRALIGHT) |
649 | 0 | aResult.fontWeight = u"200"_ustr; |
650 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_LIGHT) |
651 | 0 | aResult.fontWeight = u"300"_ustr; |
652 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_SEMILIGHT) |
653 | 0 | aResult.fontWeight = u"350"_ustr; |
654 | | // no need to check "normal" here as this is default in nFontWeight above |
655 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_SEMIBOLD) |
656 | 0 | aResult.fontWeight = u"600"_ustr; |
657 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_BOLD) |
658 | 0 | aResult.fontWeight = u"bold"_ustr; |
659 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_ULTRABOLD) |
660 | 0 | aResult.fontWeight = u"800"_ustr; |
661 | 0 | else if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_BLACK) |
662 | 0 | aResult.fontWeight = u"900"_ustr; |
663 | 0 | SAL_INFO("sdext.pdfimport", aResult.fontWeight); |
664 | | |
665 | | // Italic |
666 | 0 | aResult.isItalic = (aFontReadResult.GetItalicMaybeAskConfig() == ITALIC_OBLIQUE || |
667 | 0 | aFontReadResult.GetItalicMaybeAskConfig() == ITALIC_NORMAL); |
668 | 0 | } else // font detection failed |
669 | 0 | { |
670 | 0 | SAL_WARN("sdext.pdfimport", |
671 | 0 | "Font detection from fontFile returned empty result. Guessing font info from font name."); |
672 | 0 | parseFontFamilyName(aResult); |
673 | 0 | } |
674 | |
|
675 | 0 | } else // no embedded font file - guess font attributes from font name |
676 | 0 | { |
677 | 0 | parseFontFamilyName(aResult); |
678 | 0 | } |
679 | | |
680 | | // last fallback |
681 | 0 | if (aResult.familyName.isEmpty()) |
682 | 0 | { |
683 | 0 | SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial."); |
684 | 0 | aResult.familyName = "Arial"; |
685 | 0 | } |
686 | | |
687 | 0 | if (!m_parser.m_xDev) |
688 | 0 | m_parser.m_xDev.disposeAndReset(VclPtr<VirtualDevice>::Create()); |
689 | |
|
690 | 0 | vcl::Font font(aResult.familyName, Size(0, 1000)); |
691 | 0 | m_parser.m_xDev->SetFont(font); |
692 | 0 | FontMetric metric(m_parser.m_xDev->GetFontMetric()); |
693 | 0 | aResult.ascent = metric.GetAscent() / 1000.0; |
694 | |
|
695 | 0 | m_parser.m_aFontMap[nFontID] = aResult; |
696 | |
|
697 | 0 | aResult.size = nSize; |
698 | 0 | m_parser.m_pSink->setFont(aResult); |
699 | 0 | } |
700 | | |
701 | | uno::Sequence<beans::PropertyValue> LineParser::readImageImpl() |
702 | 0 | { |
703 | 0 | std::string_view aToken = readNextToken(); |
704 | 0 | const sal_Int32 nImageSize( readInt32() ); |
705 | |
|
706 | 0 | OUString aFileName; |
707 | 0 | if( aToken == "PNG" ) |
708 | 0 | aFileName = "DUMMY.PNG"; |
709 | 0 | else if( aToken == "JPEG" ) |
710 | 0 | aFileName = "DUMMY.JPEG"; |
711 | 0 | else if( aToken == "PBM" ) |
712 | 0 | aFileName = "DUMMY.PBM"; |
713 | 0 | else |
714 | 0 | { |
715 | 0 | SAL_WARN_IF(aToken != "PPM","sdext.pdfimport","Invalid bitmap format"); |
716 | 0 | aFileName = "DUMMY.PPM"; |
717 | 0 | } |
718 | | |
719 | 0 | uno::Sequence<sal_Int8> aDataSequence(nImageSize); |
720 | 0 | readBinaryData( aDataSequence ); |
721 | |
|
722 | 0 | uno::Sequence< uno::Any > aStreamCreationArgs{ uno::Any(aDataSequence) }; |
723 | |
|
724 | 0 | uno::Reference< uno::XComponentContext > xContext( m_parser.m_xContext, uno::UNO_SET_THROW ); |
725 | 0 | uno::Reference< lang::XMultiComponentFactory > xFactory( xContext->getServiceManager(), uno::UNO_SET_THROW ); |
726 | 0 | uno::Reference< io::XInputStream > xDataStream( |
727 | 0 | xFactory->createInstanceWithArgumentsAndContext( u"com.sun.star.io.SequenceInputStream"_ustr, aStreamCreationArgs, m_parser.m_xContext ), |
728 | 0 | uno::UNO_QUERY_THROW ); |
729 | |
|
730 | 0 | uno::Sequence<beans::PropertyValue> aSequence( comphelper::InitPropertySequence({ |
731 | 0 | { "URL", uno::Any(aFileName) }, |
732 | 0 | { "InputStream", uno::Any( xDataStream ) }, |
733 | 0 | { "InputSequence", uno::Any(aDataSequence) } |
734 | 0 | })); |
735 | |
|
736 | 0 | return aSequence; |
737 | 0 | } |
738 | | |
739 | | void LineParser::readImage() |
740 | 0 | { |
741 | 0 | sal_Int32 nWidth, nHeight,nMaskColors; |
742 | 0 | readInt32(nWidth); |
743 | 0 | readInt32(nHeight); |
744 | 0 | readInt32(nMaskColors); |
745 | |
|
746 | 0 | uno::Sequence<beans::PropertyValue> aImg( readImageImpl() ); |
747 | |
|
748 | 0 | if( nMaskColors ) |
749 | 0 | { |
750 | 0 | uno::Sequence<sal_Int8> aDataSequence(nMaskColors); |
751 | 0 | readBinaryData( aDataSequence ); |
752 | |
|
753 | 0 | uno::Sequence<double> aMinRange(nMaskColors/2); |
754 | 0 | auto pMinRange = aMinRange.getArray(); |
755 | 0 | uno::Sequence<double> aMaxRange(nMaskColors/2); |
756 | 0 | auto pMaxRange = aMaxRange.getArray(); |
757 | 0 | for( sal_Int32 i=0; i<nMaskColors/2; ++i ) |
758 | 0 | { |
759 | 0 | pMinRange[i] = aDataSequence[i] / 255.0; |
760 | 0 | pMaxRange[i] = aDataSequence[i+nMaskColors/2] / 255.0; |
761 | 0 | } |
762 | |
|
763 | 0 | uno::Sequence<uno::Any> aMaskRanges{ uno::Any(aMinRange), uno::Any(aMaxRange) }; |
764 | 0 | m_parser.m_pSink->drawColorMaskedImage( aImg, aMaskRanges ); |
765 | 0 | } |
766 | 0 | else |
767 | 0 | m_parser.m_pSink->drawImage( aImg ); |
768 | 0 | } |
769 | | |
770 | | void LineParser::readMask() |
771 | 0 | { |
772 | 0 | sal_Int32 nWidth, nHeight, nInvert; |
773 | 0 | readInt32(nWidth); |
774 | 0 | readInt32(nHeight); |
775 | 0 | readInt32(nInvert); |
776 | |
|
777 | 0 | m_parser.m_pSink->drawMask( readImageImpl(), nInvert != 0); |
778 | 0 | } |
779 | | |
780 | | void LineParser::readLink() |
781 | 0 | { |
782 | 0 | geometry::RealRectangle2D aBounds; |
783 | 0 | readDouble(aBounds.X1); |
784 | 0 | readDouble(aBounds.Y1); |
785 | 0 | readDouble(aBounds.X2); |
786 | 0 | readDouble(aBounds.Y2); |
787 | |
|
788 | 0 | m_parser.m_pSink->hyperLink( aBounds, |
789 | 0 | OStringToOUString( lcl_unescapeLineFeeds( |
790 | 0 | m_aLine.substr(m_nCharIndex) ), |
791 | 0 | RTL_TEXTENCODING_UTF8 ) ); |
792 | | // name gobbles up rest of line |
793 | 0 | m_nCharIndex = std::string_view::npos; |
794 | 0 | } |
795 | | |
796 | | void LineParser::readMaskedImage() |
797 | 0 | { |
798 | 0 | sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight, nMaskInvert; |
799 | 0 | readInt32(nWidth); |
800 | 0 | readInt32(nHeight); |
801 | 0 | readInt32(nMaskWidth); |
802 | 0 | readInt32(nMaskHeight); |
803 | 0 | readInt32(nMaskInvert); |
804 | |
|
805 | 0 | const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() ); |
806 | 0 | const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() ); |
807 | 0 | m_parser.m_pSink->drawMaskedImage( aImage, aMask, nMaskInvert != 0 ); |
808 | 0 | } |
809 | | |
810 | | void LineParser::readSoftMaskedImage() |
811 | 0 | { |
812 | 0 | sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight; |
813 | 0 | readInt32(nWidth); |
814 | 0 | readInt32(nHeight); |
815 | 0 | readInt32(nMaskWidth); |
816 | 0 | readInt32(nMaskHeight); |
817 | |
|
818 | 0 | const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() ); |
819 | 0 | const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() ); |
820 | 0 | m_parser.m_pSink->drawAlphaMaskedImage( aImage, aMask ); |
821 | 0 | } |
822 | | |
823 | | void LineParser::readTilingPatternFill() |
824 | 0 | { |
825 | 0 | sal_Int32 nX0, nY0, nX1, nY1, nPaintType; |
826 | 0 | double nXStep, nYStep; |
827 | 0 | geometry::AffineMatrix2D aMat; |
828 | 0 | readInt32(nX0); |
829 | 0 | readInt32(nY0); |
830 | 0 | readInt32(nX1); |
831 | 0 | readInt32(nY1); |
832 | |
|
833 | 0 | readDouble(nXStep); |
834 | 0 | readDouble(nYStep); |
835 | |
|
836 | 0 | readInt32(nPaintType); |
837 | |
|
838 | 0 | readDouble(aMat.m00); |
839 | 0 | readDouble(aMat.m10); |
840 | 0 | readDouble(aMat.m01); |
841 | 0 | readDouble(aMat.m11); |
842 | 0 | readDouble(aMat.m02); |
843 | 0 | readDouble(aMat.m12); |
844 | | |
845 | | // The tile is an image with alpha |
846 | 0 | const uno::Sequence<beans::PropertyValue> aTile ( readImageImpl() ); |
847 | |
|
848 | 0 | m_parser.m_pSink->tilingPatternFill( nX0, nY0, nX1, nY1, |
849 | 0 | nXStep, nYStep, |
850 | 0 | nPaintType, |
851 | 0 | aMat, |
852 | 0 | aTile ); |
853 | 0 | } |
854 | | |
855 | | void LineParser::beginTransparencyGroup() |
856 | 0 | { |
857 | 0 | sal_Int32 nForSoftMask; |
858 | 0 | readInt32( nForSoftMask ); |
859 | 0 | m_parser.m_pSink->beginTransparencyGroup(!!nForSoftMask); |
860 | 0 | } |
861 | | |
862 | | void LineParser::endTransparencyGroup() |
863 | 0 | { |
864 | 0 | m_parser.m_pSink->endTransparencyGroup(); |
865 | 0 | } |
866 | | |
867 | | void Parser::parseLine( std::string_view aLine ) |
868 | 0 | { |
869 | 0 | OSL_PRECOND( m_pSink, "Invalid sink" ); |
870 | 0 | OSL_PRECOND( m_pErr, "Invalid filehandle" ); |
871 | 0 | OSL_PRECOND( m_xContext.is(), "Invalid service factory" ); |
872 | |
|
873 | 0 | LineParser lp(*this, aLine); |
874 | 0 | const std::string_view rCmd = lp.readNextToken(); |
875 | 0 | const hash_entry* pEntry = PdfKeywordHash::in_word_set( rCmd.data(), |
876 | 0 | rCmd.size() ); |
877 | 0 | assert(pEntry); |
878 | 0 | switch( pEntry->eKey ) |
879 | 0 | { |
880 | 0 | case BEGINTRANSPARENCYGROUP: |
881 | 0 | lp.beginTransparencyGroup(); break; |
882 | 0 | case CLIPPATH: |
883 | 0 | m_pSink->intersectClip(lp.readPath()); break; |
884 | 0 | case CLIPTOSTROKEPATH: |
885 | 0 | m_pSink->intersectClipToStroke(lp.readPath()); break; |
886 | 0 | case DRAWCHAR: |
887 | 0 | lp.readChar(); break; |
888 | 0 | case DRAWIMAGE: |
889 | 0 | lp.readImage(); break; |
890 | 0 | case DRAWLINK: |
891 | 0 | lp.readLink(); break; |
892 | 0 | case DRAWMASK: |
893 | 0 | lp.readMask(); break; |
894 | 0 | case DRAWMASKEDIMAGE: |
895 | 0 | lp.readMaskedImage(); break; |
896 | 0 | case DRAWSOFTMASKEDIMAGE: |
897 | 0 | lp.readSoftMaskedImage(); break; |
898 | 0 | case ENDPAGE: |
899 | 0 | m_pSink->endPage(); break; |
900 | 0 | case ENDTEXTOBJECT: |
901 | 0 | m_pSink->endText(); break; |
902 | 0 | case ENDTRANSPARENCYGROUP: |
903 | 0 | lp.endTransparencyGroup(); break; |
904 | 0 | case EOCLIPPATH: |
905 | 0 | m_pSink->intersectEoClip(lp.readPath()); break; |
906 | 0 | case EOFILLPATH: |
907 | 0 | m_pSink->eoFillPath(lp.readPath()); break; |
908 | 0 | case FILLPATH: |
909 | 0 | m_pSink->fillPath(lp.readPath()); break; |
910 | 0 | case RESTORESTATE: |
911 | 0 | m_pSink->popState(); break; |
912 | 0 | case SAVESTATE: |
913 | 0 | m_pSink->pushState(); break; |
914 | 0 | case SETPAGENUM: |
915 | 0 | m_pSink->setPageNum( lp.readInt32() ); break; |
916 | 0 | case STARTPAGE: |
917 | 0 | { |
918 | 0 | const double nWidth ( lp.readDouble() ); |
919 | 0 | const double nHeight( lp.readDouble() ); |
920 | 0 | m_pSink->startPage( geometry::RealSize2D( nWidth, nHeight ) ); |
921 | 0 | break; |
922 | 0 | } |
923 | 0 | case STROKEPATH: |
924 | 0 | m_pSink->strokePath(lp.readPath()); break; |
925 | 0 | case TILINGPATTERNFILL: |
926 | 0 | lp.readTilingPatternFill(); break; |
927 | 0 | case UPDATECTM: |
928 | 0 | lp.readTransformation(); break; |
929 | 0 | case UPDATEFILLCOLOR: |
930 | 0 | m_pSink->setFillColor( lp.readColor() ); break; |
931 | 0 | case UPDATEFLATNESS: |
932 | 0 | m_pSink->setFlatness( lp.readDouble( ) ); break; |
933 | 0 | case UPDATEFONT: |
934 | 0 | lp.readFont(); break; |
935 | 0 | case UPDATELINECAP: |
936 | 0 | lp.readLineCap(); break; |
937 | 0 | case UPDATELINEDASH: |
938 | 0 | lp.readLineDash(); break; |
939 | 0 | case UPDATELINEJOIN: |
940 | 0 | lp.readLineJoin(); break; |
941 | 0 | case UPDATELINEWIDTH: |
942 | 0 | m_pSink->setLineWidth( lp.readDouble() );break; |
943 | 0 | case UPDATEMITERLIMIT: |
944 | 0 | m_pSink->setMiterLimit( lp.readDouble() ); break; |
945 | 0 | case UPDATESTROKECOLOR: |
946 | 0 | m_pSink->setStrokeColor( lp.readColor() ); break; |
947 | 0 | case UPDATESTROKEOPACITY: |
948 | 0 | break; |
949 | 0 | case SETTEXTRENDERMODE: |
950 | 0 | m_pSink->setTextRenderMode( lp.readInt32() ); break; |
951 | | |
952 | 0 | case NONE: |
953 | 0 | default: |
954 | 0 | OSL_PRECOND(false,"Unknown input"); |
955 | 0 | break; |
956 | 0 | } |
957 | | |
958 | | // all consumed? |
959 | 0 | SAL_WARN_IF( |
960 | 0 | lp.m_nCharIndex!=std::string_view::npos, "sdext.pdfimport", "leftover scanner input"); |
961 | 0 | } |
962 | | |
963 | | } // namespace |
964 | | |
965 | | namespace { |
966 | | |
967 | | class Buffering |
968 | | { |
969 | | static const int SIZE = 64*1024; |
970 | | std::unique_ptr<char[]> aBuffer; |
971 | | oslFileHandle& pOut; |
972 | | size_t pos; |
973 | | sal_uInt64 left; |
974 | | |
975 | | public: |
976 | 0 | explicit Buffering(oslFileHandle& out) : aBuffer(new char[SIZE]), pOut(out), pos(0), left(0) {} |
977 | | |
978 | | oslFileError read(char *pChar, short count, sal_uInt64* pBytesRead) |
979 | 0 | { |
980 | 0 | oslFileError nRes = osl_File_E_None; |
981 | 0 | sal_uInt64 nBytesRead = 0; |
982 | 0 | while (count > 0) |
983 | 0 | { |
984 | 0 | if (left == 0) |
985 | 0 | { |
986 | 0 | nRes = osl_readFile(pOut, aBuffer.get(), SIZE, &left); |
987 | 0 | if (nRes != osl_File_E_None || left == 0) |
988 | 0 | { |
989 | 0 | *pBytesRead = nBytesRead; |
990 | 0 | return nRes; |
991 | 0 | } |
992 | 0 | pos = 0; |
993 | 0 | } |
994 | 0 | *pChar = aBuffer.get()[pos]; |
995 | 0 | --count; |
996 | 0 | ++pos; |
997 | 0 | --left; |
998 | 0 | ++pChar; |
999 | 0 | ++nBytesRead; |
1000 | 0 | } |
1001 | 0 | *pBytesRead = nBytesRead; |
1002 | 0 | return osl_File_E_None; |
1003 | 0 | } |
1004 | | |
1005 | | // Read a line and return any error |
1006 | | // Note: It skips leading \n and \r |
1007 | | // It clears the line buffer at the start |
1008 | | oslFileError readLine(OStringBuffer& line) |
1009 | 0 | { |
1010 | 0 | char aChar('\n'); |
1011 | 0 | sal_uInt64 nBytesRead; |
1012 | 0 | oslFileError nRes; |
1013 | |
|
1014 | 0 | line.setLength(0); |
1015 | | |
1016 | | // skip garbage \r \n at start of line |
1017 | 0 | for (;;) |
1018 | 0 | { |
1019 | 0 | nRes = read(&aChar, 1, &nBytesRead); |
1020 | 0 | if (osl_File_E_None != nRes || nBytesRead != 1 || (aChar != '\n' && aChar != '\r')) |
1021 | 0 | break; |
1022 | 0 | } |
1023 | 0 | if (osl_File_E_None != nRes) |
1024 | 0 | return nRes; |
1025 | | |
1026 | 0 | if (aChar != '\n' && aChar != '\r') |
1027 | 0 | line.append(aChar); |
1028 | |
|
1029 | 0 | for (;;) |
1030 | 0 | { |
1031 | 0 | nRes = read(&aChar, 1, &nBytesRead); |
1032 | 0 | if (osl_File_E_None != nRes || nBytesRead != 1 || aChar == '\n' || aChar == '\r') |
1033 | 0 | break; |
1034 | 0 | line.append(aChar); |
1035 | 0 | } |
1036 | |
|
1037 | 0 | return nRes; |
1038 | 0 | } |
1039 | | }; |
1040 | | |
1041 | | } |
1042 | | |
1043 | | bool xpdf_ImportFromFile(const OUString& rURL, |
1044 | | const ContentSinkSharedPtr& rSink, |
1045 | | const uno::Reference<task::XInteractionHandler>& xIHdl, |
1046 | | const OUString& rPwd, |
1047 | | const uno::Reference<uno::XComponentContext>& xContext, |
1048 | | const OUString& rFilterOptions) |
1049 | 0 | { |
1050 | 0 | bool bPasswordOnEntry = !rPwd.isEmpty(); |
1051 | 0 | OSL_ASSERT(rSink); |
1052 | |
|
1053 | 0 | OUString aSysUPath; |
1054 | 0 | if( osl_getSystemPathFromFileURL( rURL.pData, &aSysUPath.pData ) != osl_File_E_None ) |
1055 | 0 | { |
1056 | 0 | SAL_WARN( |
1057 | 0 | "sdext.pdfimport", |
1058 | 0 | "getSystemPathFromFileURL(" << rURL << ") failed"); |
1059 | 0 | return false; |
1060 | 0 | } |
1061 | 0 | OUString aDocName( rURL.copy( rURL.lastIndexOf( '/' )+1 ) ); |
1062 | | |
1063 | | // check for encryption, if necessary get password |
1064 | 0 | OUString aPwd( rPwd ); |
1065 | | |
1066 | | // Determine xpdfimport executable URL: |
1067 | 0 | OUString converterURL(u"$BRAND_BASE_DIR/" LIBO_BIN_FOLDER "/xpdfimport"_ustr); |
1068 | 0 | rtl::Bootstrap::expandMacros(converterURL); //TODO: detect failure |
1069 | | |
1070 | | // spawn separate process to keep LGPL/GPL code apart. |
1071 | |
|
1072 | 0 | static constexpr OUString aOptFlag(u"-o"_ustr); |
1073 | 0 | std::vector<rtl_uString*> args({ aSysUPath.pData }); |
1074 | 0 | if (!rFilterOptions.isEmpty()) |
1075 | 0 | { |
1076 | 0 | args.push_back(aOptFlag.pData); |
1077 | 0 | args.push_back(rFilterOptions.pData); |
1078 | 0 | } |
1079 | |
|
1080 | 0 | oslProcess aProcess; |
1081 | 0 | oslFileHandle pIn = nullptr; |
1082 | 0 | oslFileHandle pOut = nullptr; |
1083 | 0 | oslFileHandle pErr = nullptr; |
1084 | 0 | oslSecurity pSecurity = osl_getCurrentSecurity (); |
1085 | 0 | oslProcessError eErr = |
1086 | 0 | osl_executeProcess_WithRedirectedIO(converterURL.pData, |
1087 | 0 | args.data(), |
1088 | 0 | args.size(), |
1089 | 0 | osl_Process_SEARCHPATH|osl_Process_HIDDEN, |
1090 | 0 | pSecurity, |
1091 | 0 | nullptr, nullptr, 0, |
1092 | 0 | &aProcess, &pIn, &pOut, &pErr); |
1093 | 0 | osl_freeSecurityHandle(pSecurity); |
1094 | |
|
1095 | 0 | bool bRet=true; |
1096 | 0 | try |
1097 | 0 | { |
1098 | 0 | std::unique_ptr<Buffering> pBuffering; |
1099 | 0 | sal_uInt64 nWritten = 0; |
1100 | |
|
1101 | 0 | if( eErr!=osl_Process_E_None ) |
1102 | 0 | { |
1103 | 0 | SAL_WARN( |
1104 | 0 | "sdext.pdfimport", |
1105 | 0 | "executeProcess of " << converterURL << " failed with " |
1106 | 0 | << +eErr); |
1107 | 0 | return false; |
1108 | 0 | } |
1109 | | |
1110 | 0 | if (!pIn || !pOut || !pErr) |
1111 | 0 | { |
1112 | 0 | SAL_WARN("sdext.pdfimport", "Failure opening pipes"); |
1113 | 0 | bRet = false; |
1114 | 0 | } |
1115 | | |
1116 | | // Loop possibly asking for a password if needed |
1117 | 0 | bool bEntered = false; |
1118 | 0 | do |
1119 | 0 | { |
1120 | | // Password lines are Pmypassword\n followed by "O\n" to try to open |
1121 | 0 | OString aBuf = "P" + OUStringToOString(aPwd, RTL_TEXTENCODING_ISO_8859_1) + "\nO\n"; |
1122 | |
|
1123 | 0 | osl_writeFile(pIn, aBuf.getStr(), sal_uInt64(aBuf.getLength()), &nWritten); |
1124 | | |
1125 | | // Check for a header saying if the child managed to open the document |
1126 | 0 | OStringBuffer aHeaderLine; |
1127 | 0 | pBuffering = std::unique_ptr<Buffering>(new Buffering(pOut)); |
1128 | 0 | oslFileError eFileErr = pBuffering->readLine(aHeaderLine); |
1129 | 0 | if (osl_File_E_None == eFileErr) |
1130 | 0 | { |
1131 | 0 | auto aHeaderString = aHeaderLine.toString(); |
1132 | 0 | SAL_INFO("sdext.pdfimport", "Header line:" << aHeaderString); |
1133 | 0 | if (aHeaderString.startsWith("#OPEN")) |
1134 | 0 | { |
1135 | | // Great - it opened! |
1136 | 0 | break; |
1137 | 0 | } |
1138 | | |
1139 | | // The only other thing we expect here is a line starting with |
1140 | | // #ERROR: |
1141 | 0 | if (!aHeaderString.startsWith("#ERROR:")) |
1142 | 0 | { |
1143 | 0 | SAL_WARN("sdext.pdfimport", "Bad parser answer:: " << aHeaderString); |
1144 | 0 | bRet = false; |
1145 | 0 | break; |
1146 | 0 | } |
1147 | | |
1148 | 0 | if (!aHeaderString.endsWith(":ENCRYPTED")) |
1149 | 0 | { |
1150 | | // Some other type of parser error |
1151 | 0 | SAL_WARN("sdext.pdfimport", "Error from parser: " << aHeaderString); |
1152 | 0 | bRet = false; |
1153 | 0 | break; |
1154 | 0 | } |
1155 | | |
1156 | | // Must be a failure to decrypt, prompt for a password unless we've |
1157 | | // already got one (e.g. if the hybrid detect prompted for one) |
1158 | 0 | if (!bPasswordOnEntry) |
1159 | 0 | { |
1160 | 0 | bEntered = vcl::pdf::getPassword(xIHdl, aPwd, !bEntered, aDocName); |
1161 | 0 | if (!bEntered) |
1162 | 0 | { |
1163 | | // User cancelled password input |
1164 | 0 | SAL_INFO("sdext.pdfimport", "User cancelled password input"); |
1165 | 0 | bRet = false; |
1166 | 0 | break; |
1167 | 0 | } |
1168 | 0 | } |
1169 | | |
1170 | | // user entered a password, just loop around again |
1171 | 0 | } |
1172 | 0 | else |
1173 | 0 | { |
1174 | 0 | SAL_WARN("sdext.pdfimport", "Unable to read header line; " << eFileErr); |
1175 | 0 | bRet = false; |
1176 | 0 | } |
1177 | 0 | } while (bRet); |
1178 | | |
1179 | 0 | if (bRet && pOut && pErr) |
1180 | 0 | { |
1181 | | // Start the rendering by sending G command |
1182 | 0 | osl_writeFile(pIn, "G\n", 2, &nWritten); |
1183 | 0 | SAL_INFO("sdext.pdfimport", "Sent Go command: " << nWritten); |
1184 | | |
1185 | | // read results of PDF parser. One line - one call to |
1186 | | // OutputDev. stderr is used for alternate streams, like |
1187 | | // embedded fonts and bitmaps |
1188 | 0 | Parser aParser(rSink,pErr,xContext); |
1189 | 0 | OStringBuffer line; |
1190 | 0 | for( ;; ) |
1191 | 0 | { |
1192 | 0 | oslFileError nRes = pBuffering->readLine(line); |
1193 | |
|
1194 | 0 | if ( osl_File_E_None != nRes ) |
1195 | 0 | break; |
1196 | 0 | if ( line.isEmpty() ) |
1197 | 0 | break; |
1198 | | |
1199 | 0 | aParser.parseLine(line); |
1200 | 0 | } |
1201 | 0 | } |
1202 | 0 | } |
1203 | 0 | catch( uno::Exception& ) |
1204 | 0 | { |
1205 | | // crappy C file interface. need manual resource dealloc |
1206 | 0 | bRet = false; |
1207 | 0 | } |
1208 | | |
1209 | 0 | if( pIn ) |
1210 | 0 | osl_closeFile(pIn); |
1211 | 0 | if( pOut ) |
1212 | 0 | osl_closeFile(pOut); |
1213 | 0 | if( pErr ) |
1214 | 0 | osl_closeFile(pErr); |
1215 | 0 | eErr = osl_joinProcess(aProcess); |
1216 | 0 | if (eErr == osl_Process_E_None) |
1217 | 0 | { |
1218 | 0 | oslProcessInfo info; |
1219 | 0 | info.Size = sizeof info; |
1220 | 0 | eErr = osl_getProcessInfo(aProcess, osl_Process_EXITCODE, &info); |
1221 | 0 | if (eErr == osl_Process_E_None) |
1222 | 0 | { |
1223 | 0 | if (info.Code != 0) |
1224 | 0 | { |
1225 | 0 | SAL_WARN( |
1226 | 0 | "sdext.pdfimport", |
1227 | 0 | "getProcessInfo of " << converterURL |
1228 | 0 | << " failed with exit code " << info.Code); |
1229 | | // TODO: use xIHdl and/or exceptions to inform the user; see poppler/ErrorCodes.h |
1230 | 0 | bRet = false; |
1231 | 0 | } |
1232 | 0 | } |
1233 | 0 | else |
1234 | 0 | { |
1235 | 0 | SAL_WARN( |
1236 | 0 | "sdext.pdfimport", |
1237 | 0 | "getProcessInfo of " << converterURL << " failed with " |
1238 | 0 | << +eErr); |
1239 | 0 | bRet = false; |
1240 | 0 | } |
1241 | 0 | } |
1242 | 0 | else |
1243 | 0 | { |
1244 | 0 | SAL_WARN( |
1245 | 0 | "sdext.pdfimport", |
1246 | 0 | "joinProcess of " << converterURL << " failed with " << +eErr); |
1247 | 0 | bRet = false; |
1248 | 0 | } |
1249 | 0 | osl_freeProcessHandle(aProcess); |
1250 | 0 | return bRet; |
1251 | 0 | } |
1252 | | |
1253 | | |
1254 | | bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xInput, |
1255 | | const ContentSinkSharedPtr& rSink, |
1256 | | const uno::Reference<task::XInteractionHandler >& xIHdl, |
1257 | | const OUString& rPwd, |
1258 | | const uno::Reference< uno::XComponentContext >& xContext, |
1259 | | const OUString& rFilterOptions ) |
1260 | 0 | { |
1261 | 0 | OSL_ASSERT(xInput.is()); |
1262 | 0 | OSL_ASSERT(rSink); |
1263 | | |
1264 | | // convert XInputStream to local temp file |
1265 | 0 | oslFileHandle aFile = nullptr; |
1266 | 0 | OUString aURL; |
1267 | 0 | if( osl_createTempFile( nullptr, &aFile, &aURL.pData ) != osl_File_E_None ) |
1268 | 0 | return false; |
1269 | | |
1270 | | // copy content, buffered... |
1271 | 0 | const sal_uInt32 nBufSize = 4096; |
1272 | 0 | uno::Sequence<sal_Int8> aBuf( nBufSize ); |
1273 | 0 | sal_uInt64 nBytes = 0; |
1274 | 0 | sal_uInt64 nWritten = 0; |
1275 | 0 | bool bSuccess = true; |
1276 | 0 | do |
1277 | 0 | { |
1278 | 0 | try |
1279 | 0 | { |
1280 | 0 | nBytes = xInput->readBytes( aBuf, nBufSize ); |
1281 | 0 | } |
1282 | 0 | catch( css::uno::Exception& ) |
1283 | 0 | { |
1284 | 0 | osl_closeFile( aFile ); |
1285 | 0 | throw; |
1286 | 0 | } |
1287 | 0 | if( nBytes > 0 ) |
1288 | 0 | { |
1289 | 0 | osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten ); |
1290 | 0 | if( nWritten != nBytes ) |
1291 | 0 | { |
1292 | 0 | bSuccess = false; |
1293 | 0 | break; |
1294 | 0 | } |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 | while( nBytes == nBufSize ); |
1298 | | |
1299 | 0 | osl_closeFile( aFile ); |
1300 | |
|
1301 | 0 | if ( bSuccess ) |
1302 | 0 | bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext, rFilterOptions ); |
1303 | 0 | osl_removeFile( aURL.pData ); |
1304 | |
|
1305 | 0 | return bSuccess; |
1306 | 0 | } |
1307 | | |
1308 | | } |
1309 | | |
1310 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |