/src/libreoffice/vcl/source/gdi/pdfobjectcopier.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include <sal/log.hxx> |
11 | | #include <sal/types.h> |
12 | | #include <rtl/strbuf.hxx> |
13 | | #include <tools/stream.hxx> |
14 | | #include <tools/zcodec.hxx> |
15 | | |
16 | | #include <vcl/filter/pdfdocument.hxx> |
17 | | #include <vcl/filter/pdfobjectcontainer.hxx> |
18 | | |
19 | | #include <pdf/objectcopier.hxx> |
20 | | #include <pdf/pdfwriter_impl.hxx> |
21 | | |
22 | | #include <o3tl/string_view.hxx> |
23 | | |
24 | | namespace vcl |
25 | | { |
26 | | PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer) |
27 | 0 | : m_rContainer(rContainer) |
28 | 0 | { |
29 | 0 | } |
30 | | |
31 | | void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement, |
32 | | SvMemoryStream& rDocBuffer, |
33 | | std::map<sal_Int32, sal_Int32>& rCopiedResources) |
34 | 0 | { |
35 | 0 | if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement)) |
36 | 0 | { |
37 | 0 | filter::PDFObjectElement* pReferenced = pReference->LookupObject(); |
38 | 0 | if (pReferenced) |
39 | 0 | { |
40 | | // Copy the referenced object. |
41 | 0 | sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources); |
42 | | |
43 | | // Write the updated reference. |
44 | 0 | rLine.append(nRef); |
45 | 0 | rLine.append(" 0 R"); |
46 | 0 | } |
47 | 0 | } |
48 | 0 | else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement)) |
49 | 0 | { |
50 | 0 | rLine.append("[ "); |
51 | 0 | for (auto const& pElement : pInputArray->GetElements()) |
52 | 0 | { |
53 | 0 | copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources); |
54 | 0 | rLine.append(" "); |
55 | 0 | } |
56 | 0 | rLine.append("] "); |
57 | 0 | } |
58 | 0 | else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement)) |
59 | 0 | { |
60 | 0 | rLine.append("<< "); |
61 | 0 | for (auto const& pPair : pInputDictionary->GetItems()) |
62 | 0 | { |
63 | 0 | rLine.append("/"); |
64 | 0 | rLine.append(pPair.first); |
65 | 0 | rLine.append(" "); |
66 | 0 | copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources); |
67 | 0 | rLine.append(" "); |
68 | 0 | } |
69 | 0 | rLine.append(">> "); |
70 | 0 | } |
71 | 0 | else |
72 | 0 | { |
73 | 0 | rInputElement.writeString(rLine); |
74 | 0 | } |
75 | 0 | } |
76 | | |
77 | | sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer, |
78 | | filter::PDFObjectElement& rObject, |
79 | | std::map<sal_Int32, sal_Int32>& rCopiedResources) |
80 | 0 | { |
81 | 0 | auto it = rCopiedResources.find(rObject.GetObjectValue()); |
82 | 0 | if (it != rCopiedResources.end()) |
83 | 0 | { |
84 | | // This resource was already copied once, nothing to do. |
85 | 0 | return it->second; |
86 | 0 | } |
87 | | |
88 | 0 | sal_Int32 nObject = m_rContainer.createObject(); |
89 | | // Remember what is the ID of this object in our output. |
90 | 0 | rCopiedResources[rObject.GetObjectValue()] = nObject; |
91 | 0 | SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue() |
92 | 0 | << " -> " << nObject); |
93 | | |
94 | 0 | OStringBuffer aLine = OString::number(nObject) + " 0 obj\n"; |
95 | |
|
96 | 0 | if (rObject.GetDictionary()) |
97 | 0 | { |
98 | 0 | aLine.append("<< "); |
99 | 0 | bool bFirst = true; |
100 | 0 | for (auto const& rPair : rObject.GetDictionaryItems()) |
101 | 0 | { |
102 | 0 | if (bFirst) |
103 | 0 | bFirst = false; |
104 | 0 | else |
105 | 0 | aLine.append(" "); |
106 | |
|
107 | 0 | aLine.append("/" + rPair.first + " "); |
108 | 0 | copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources); |
109 | 0 | } |
110 | |
|
111 | 0 | aLine.append(" >>\n"); |
112 | 0 | } |
113 | |
|
114 | 0 | filter::PDFStreamElement* pStream = rObject.GetStream(); |
115 | 0 | if (pStream) |
116 | 0 | { |
117 | 0 | aLine.append("stream\n"); |
118 | 0 | } |
119 | |
|
120 | 0 | if (filter::PDFArrayElement* pArray = rObject.GetArray()) |
121 | 0 | { |
122 | 0 | aLine.append("[ "); |
123 | |
|
124 | 0 | const std::vector<filter::PDFElement*>& rElements = pArray->GetElements(); |
125 | |
|
126 | 0 | bool bFirst = true; |
127 | 0 | for (auto const& pElement : rElements) |
128 | 0 | { |
129 | 0 | if (bFirst) |
130 | 0 | bFirst = false; |
131 | 0 | else |
132 | 0 | aLine.append(" "); |
133 | 0 | copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources); |
134 | 0 | } |
135 | 0 | aLine.append("]\n"); |
136 | 0 | } |
137 | | |
138 | | // If the object has a number element outside a dictionary or array, copy that. |
139 | 0 | if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement()) |
140 | 0 | { |
141 | 0 | pNumber->writeString(aLine); |
142 | 0 | aLine.append("\n"); |
143 | 0 | } |
144 | | // If the object has a name element outside a dictionary or array, copy that. |
145 | 0 | else if (filter::PDFNameElement* pName = rObject.GetNameElement()) |
146 | 0 | { |
147 | | // currently just handle the exact case seen in the real world |
148 | 0 | if (pName->GetValue() == "DeviceRGB") |
149 | 0 | { |
150 | 0 | pName->writeString(aLine); |
151 | 0 | aLine.append("\n"); |
152 | 0 | } |
153 | 0 | else |
154 | 0 | { |
155 | 0 | SAL_INFO("vcl.pdfwriter", |
156 | 0 | "PDFObjectCopier::copyExternalResource: skipping: " << pName->GetValue()); |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | // We have the whole object, now write it to the output. |
161 | 0 | if (!m_rContainer.updateObject(nObject)) |
162 | 0 | return -1; |
163 | 0 | if (!m_rContainer.writeBuffer(aLine)) |
164 | 0 | return -1; |
165 | 0 | aLine.setLength(0); |
166 | |
|
167 | 0 | if (pStream) |
168 | 0 | { |
169 | 0 | SvMemoryStream& rStream = pStream->GetMemory(); |
170 | 0 | m_rContainer.checkAndEnableStreamEncryption(nObject); |
171 | 0 | aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize()); |
172 | 0 | if (!m_rContainer.writeBuffer(aLine)) |
173 | 0 | return -1; |
174 | 0 | aLine.setLength(0); |
175 | 0 | m_rContainer.disableStreamEncryption(); |
176 | |
|
177 | 0 | aLine.append("\nendstream\n"); |
178 | 0 | if (!m_rContainer.writeBuffer(aLine)) |
179 | 0 | return -1; |
180 | 0 | aLine.setLength(0); |
181 | 0 | } |
182 | | |
183 | 0 | aLine.append("endobj\n\n"); |
184 | 0 | if (!m_rContainer.writeBuffer(aLine)) |
185 | 0 | return -1; |
186 | | |
187 | 0 | return nObject; |
188 | 0 | } |
189 | | |
190 | | OString PDFObjectCopier::copyExternalResources(filter::PDFObjectElement& rPage, |
191 | | const OString& rKind, |
192 | | std::map<sal_Int32, sal_Int32>& rCopiedResources) |
193 | 0 | { |
194 | | // A name - object ID map, IDs as they appear in our output, not the |
195 | | // original ones. |
196 | 0 | std::map<OString, sal_Int32> aRet; |
197 | | |
198 | | // Get the rKind subset of the resource dictionary. |
199 | 0 | std::map<OString, filter::PDFElement*> aItems; |
200 | 0 | filter::PDFObjectElement* pKindObject = nullptr; |
201 | 0 | if (auto pResources |
202 | 0 | = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"_ostr))) |
203 | 0 | { |
204 | | // Resources is a direct dictionary. |
205 | 0 | filter::PDFElement* pLookup = pResources->LookupElement(rKind); |
206 | 0 | if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup)) |
207 | 0 | { |
208 | | // rKind is an inline dictionary. |
209 | 0 | aItems = pDictionary->GetItems(); |
210 | 0 | } |
211 | 0 | else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup)) |
212 | 0 | { |
213 | | // rKind refers to a dictionary. |
214 | 0 | filter::PDFObjectElement* pReferenced = pReference->LookupObject(); |
215 | 0 | if (!pReferenced) |
216 | 0 | { |
217 | 0 | return {}; |
218 | 0 | } |
219 | | |
220 | 0 | pKindObject = pReferenced; |
221 | 0 | aItems = pReferenced->GetDictionaryItems(); |
222 | 0 | } |
223 | 0 | } |
224 | 0 | else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"_ostr)) |
225 | 0 | { |
226 | | // Resources is an indirect object. |
227 | 0 | filter::PDFElement* pValue = pPageResources->Lookup(rKind); |
228 | 0 | if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue)) |
229 | 0 | { |
230 | | // Kind is a direct dictionary. |
231 | 0 | aItems = pDictionary->GetItems(); |
232 | 0 | } |
233 | 0 | else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind)) |
234 | 0 | { |
235 | | // Kind is an indirect object. |
236 | 0 | aItems = pObject->GetDictionaryItems(); |
237 | 0 | pKindObject = pObject; |
238 | 0 | } |
239 | 0 | } |
240 | 0 | if (aItems.empty()) |
241 | 0 | return {}; |
242 | | |
243 | 0 | SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer(); |
244 | 0 | bool bHasDictValue = false; |
245 | |
|
246 | 0 | for (const auto& rItem : aItems) |
247 | 0 | { |
248 | | // For each item copy it over to our output then insert it into aRet. |
249 | 0 | auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second); |
250 | 0 | if (!pReference) |
251 | 0 | { |
252 | 0 | if (pKindObject && dynamic_cast<filter::PDFDictionaryElement*>(rItem.second)) |
253 | 0 | { |
254 | 0 | bHasDictValue = true; |
255 | 0 | break; |
256 | 0 | } |
257 | | |
258 | 0 | continue; |
259 | 0 | } |
260 | | |
261 | 0 | filter::PDFObjectElement* pValue = pReference->LookupObject(); |
262 | 0 | if (!pValue) |
263 | 0 | continue; |
264 | | |
265 | | // Then copying over an object copy its dictionary and its stream. |
266 | 0 | sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources); |
267 | 0 | aRet[rItem.first] = nObject; |
268 | 0 | } |
269 | |
|
270 | 0 | if (bHasDictValue && pKindObject) |
271 | 0 | { |
272 | 0 | sal_Int32 nObject = copyExternalResource(rDocBuffer, *pKindObject, rCopiedResources); |
273 | 0 | return "/" + rKind + " " + OString::number(nObject) + " 0 R"; |
274 | 0 | } |
275 | | |
276 | | // Build the dictionary entry string. |
277 | 0 | OStringBuffer sRet("/" + rKind + "<<"); |
278 | 0 | for (const auto& rPair : aRet) |
279 | 0 | { |
280 | 0 | sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R"); |
281 | 0 | } |
282 | 0 | sRet.append(">>"); |
283 | |
|
284 | 0 | return sRet.makeStringAndClear(); |
285 | 0 | } |
286 | | |
287 | | void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine) |
288 | 0 | { |
289 | | // Maps from source object id (PDF image) to target object id (export result). |
290 | 0 | std::map<sal_Int32, sal_Int32> aCopiedResources; |
291 | 0 | copyPageResources(pPage, rLine, aCopiedResources); |
292 | 0 | } |
293 | | |
294 | | void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine, |
295 | | std::map<sal_Int32, sal_Int32>& rCopiedResources) |
296 | 0 | { |
297 | 0 | rLine.append(" /Resources <<"); |
298 | 0 | static const std::initializer_list<OString> aKeys |
299 | 0 | = { "ColorSpace"_ostr, "ExtGState"_ostr, "Font"_ostr, |
300 | 0 | "XObject"_ostr, "Shading"_ostr, "Pattern"_ostr }; |
301 | 0 | for (const auto& rKey : aKeys) |
302 | 0 | { |
303 | 0 | rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources)); |
304 | 0 | } |
305 | 0 | rLine.append(">>"); |
306 | 0 | } |
307 | | |
308 | | sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams, |
309 | | SvMemoryStream& rStream, bool& rCompressed, |
310 | | bool bIsTaggedNonReferenceXObject) |
311 | 0 | { |
312 | 0 | for (auto pContent : rContentStreams) |
313 | 0 | { |
314 | 0 | filter::PDFStreamElement* pPageStream = pContent->GetStream(); |
315 | 0 | if (!pPageStream) |
316 | 0 | { |
317 | 0 | SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream"); |
318 | 0 | continue; |
319 | 0 | } |
320 | | |
321 | 0 | SvMemoryStream& rPageStream = pPageStream->GetMemory(); |
322 | |
|
323 | 0 | auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"_ostr)); |
324 | 0 | auto pFilterArray = dynamic_cast<filter::PDFArrayElement*>(pContent->Lookup("Filter"_ostr)); |
325 | 0 | if (!pFilter && pFilterArray) |
326 | 0 | { |
327 | 0 | auto& aElements = pFilterArray->GetElements(); |
328 | 0 | if (!aElements.empty()) |
329 | 0 | pFilter = dynamic_cast<filter::PDFNameElement*>(aElements[0]); |
330 | 0 | } |
331 | |
|
332 | 0 | if (pFilter) |
333 | 0 | { |
334 | 0 | if (pFilter->GetValue() != "FlateDecode") |
335 | 0 | { |
336 | 0 | continue; |
337 | 0 | } |
338 | | |
339 | 0 | SvMemoryStream aMemoryStream; |
340 | 0 | ZCodec aZCodec; |
341 | 0 | rPageStream.Seek(0); |
342 | 0 | aZCodec.BeginCompression(); |
343 | 0 | aZCodec.Decompress(rPageStream, aMemoryStream); |
344 | 0 | if (!aZCodec.EndCompression()) |
345 | 0 | { |
346 | 0 | SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed"); |
347 | 0 | continue; |
348 | 0 | } |
349 | | |
350 | 0 | bool bHasArtifact = false; |
351 | 0 | if (bIsTaggedNonReferenceXObject) |
352 | 0 | { |
353 | 0 | auto pStart = static_cast<const char*>(aMemoryStream.GetData()); |
354 | 0 | const char* const pEnd = pStart + aMemoryStream.GetSize(); |
355 | 0 | std::string_view aStreamView(pStart, pEnd - pStart); |
356 | |
|
357 | 0 | std::string_view sArtifact = "/Artifact"; |
358 | 0 | std::size_t nPosArtifact = aStreamView.find(sArtifact); |
359 | 0 | if (nPosArtifact != std::string_view::npos) |
360 | 0 | { |
361 | 0 | bHasArtifact = true; |
362 | 0 | SvMemoryStream aTmpStream; |
363 | 0 | std::string_view sBMC = "BMC"; |
364 | 0 | std::string_view sBDC = "BDC"; |
365 | 0 | std::string_view sEMC = "EMC"; |
366 | |
|
367 | 0 | while (!aStreamView.empty()) |
368 | 0 | { |
369 | 0 | aTmpStream.WriteOString(aStreamView.substr(0, nPosArtifact)); |
370 | 0 | aStreamView.remove_prefix(nPosArtifact + sArtifact.size()); |
371 | |
|
372 | 0 | std::size_t nPosBMC = aStreamView.find(sBMC); |
373 | 0 | std::size_t nPosBDC = aStreamView.find(sBDC); |
374 | 0 | std::size_t nPos = std::min(nPosBMC, nPosBDC); |
375 | |
|
376 | 0 | if (nPos != std::string_view::npos) |
377 | 0 | { |
378 | 0 | if (nPos == nPosBMC) |
379 | 0 | aStreamView.remove_prefix(nPos + sBMC.size() + 1); |
380 | 0 | else |
381 | 0 | aStreamView.remove_prefix(nPos + sBDC.size() + 1); |
382 | |
|
383 | 0 | std::size_t nPosEMC = aStreamView.find(sEMC); |
384 | 0 | if (nPosEMC != std::string_view::npos) |
385 | 0 | { |
386 | 0 | aTmpStream.WriteOString(aStreamView.substr(0, nPosEMC)); |
387 | 0 | aStreamView.remove_prefix(nPosEMC + sEMC.size() + 1); |
388 | 0 | } |
389 | 0 | } |
390 | |
|
391 | 0 | nPosArtifact = aStreamView.find(sArtifact); |
392 | 0 | if (nPosArtifact == std::string_view::npos) |
393 | 0 | { |
394 | 0 | aTmpStream.WriteOString(aStreamView); |
395 | 0 | break; |
396 | 0 | } |
397 | 0 | } |
398 | 0 | rStream.WriteBytes(aTmpStream.GetData(), aTmpStream.GetSize()); |
399 | 0 | } |
400 | 0 | } |
401 | |
|
402 | 0 | if (!bHasArtifact) |
403 | 0 | rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize()); |
404 | 0 | } |
405 | 0 | else |
406 | 0 | { |
407 | 0 | rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize()); |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | 0 | rCompressed = PDFWriterImpl::compressStream(&rStream); |
412 | |
|
413 | 0 | return rStream.Tell(); |
414 | 0 | } |
415 | | } |
416 | | |
417 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |