Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/vcl/source/gdi/pdfobjectcopier.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#include <sal/log.hxx>
11
#include <sal/types.h>
12
#include <rtl/strbuf.hxx>
13
#include <tools/stream.hxx>
14
#include <tools/zcodec.hxx>
15
16
#include <vcl/filter/pdfdocument.hxx>
17
#include <vcl/filter/pdfobjectcontainer.hxx>
18
19
#include <pdf/objectcopier.hxx>
20
#include <pdf/pdfwriter_impl.hxx>
21
22
#include <o3tl/string_view.hxx>
23
24
namespace vcl
25
{
26
PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer)
27
0
    : m_rContainer(rContainer)
28
0
{
29
0
}
30
31
void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement,
32
                                      SvMemoryStream& rDocBuffer,
33
                                      std::map<sal_Int32, sal_Int32>& rCopiedResources)
34
0
{
35
0
    if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement))
36
0
    {
37
0
        filter::PDFObjectElement* pReferenced = pReference->LookupObject();
38
0
        if (pReferenced)
39
0
        {
40
            // Copy the referenced object.
41
0
            sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
42
43
            // Write the updated reference.
44
0
            rLine.append(nRef);
45
0
            rLine.append(" 0 R");
46
0
        }
47
0
    }
48
0
    else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement))
49
0
    {
50
0
        rLine.append("[ ");
51
0
        for (auto const& pElement : pInputArray->GetElements())
52
0
        {
53
0
            copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources);
54
0
            rLine.append(" ");
55
0
        }
56
0
        rLine.append("] ");
57
0
    }
58
0
    else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement))
59
0
    {
60
0
        rLine.append("<< ");
61
0
        for (auto const& pPair : pInputDictionary->GetItems())
62
0
        {
63
0
            rLine.append("/");
64
0
            rLine.append(pPair.first);
65
0
            rLine.append(" ");
66
0
            copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources);
67
0
            rLine.append(" ");
68
0
        }
69
0
        rLine.append(">> ");
70
0
    }
71
0
    else
72
0
    {
73
0
        rInputElement.writeString(rLine);
74
0
    }
75
0
}
76
77
sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
78
                                                filter::PDFObjectElement& rObject,
79
                                                std::map<sal_Int32, sal_Int32>& rCopiedResources)
80
0
{
81
0
    auto it = rCopiedResources.find(rObject.GetObjectValue());
82
0
    if (it != rCopiedResources.end())
83
0
    {
84
        // This resource was already copied once, nothing to do.
85
0
        return it->second;
86
0
    }
87
88
0
    sal_Int32 nObject = m_rContainer.createObject();
89
    // Remember what is the ID of this object in our output.
90
0
    rCopiedResources[rObject.GetObjectValue()] = nObject;
91
0
    SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
92
0
                                                                        << " -> " << nObject);
93
94
0
    OStringBuffer aLine = OString::number(nObject) + " 0 obj\n";
95
96
0
    if (rObject.GetDictionary())
97
0
    {
98
0
        aLine.append("<< ");
99
0
        bool bFirst = true;
100
0
        for (auto const& rPair : rObject.GetDictionaryItems())
101
0
        {
102
0
            if (bFirst)
103
0
                bFirst = false;
104
0
            else
105
0
                aLine.append(" ");
106
107
0
            aLine.append("/" + rPair.first + " ");
108
0
            copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources);
109
0
        }
110
111
0
        aLine.append(" >>\n");
112
0
    }
113
114
0
    filter::PDFStreamElement* pStream = rObject.GetStream();
115
0
    if (pStream)
116
0
    {
117
0
        aLine.append("stream\n");
118
0
    }
119
120
0
    if (filter::PDFArrayElement* pArray = rObject.GetArray())
121
0
    {
122
0
        aLine.append("[ ");
123
124
0
        const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
125
126
0
        bool bFirst = true;
127
0
        for (auto const& pElement : rElements)
128
0
        {
129
0
            if (bFirst)
130
0
                bFirst = false;
131
0
            else
132
0
                aLine.append(" ");
133
0
            copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources);
134
0
        }
135
0
        aLine.append("]\n");
136
0
    }
137
138
    // If the object has a number element outside a dictionary or array, copy that.
139
0
    if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
140
0
    {
141
0
        pNumber->writeString(aLine);
142
0
        aLine.append("\n");
143
0
    }
144
    // If the object has a name element outside a dictionary or array, copy that.
145
0
    else if (filter::PDFNameElement* pName = rObject.GetNameElement())
146
0
    {
147
        // currently just handle the exact case seen in the real world
148
0
        if (pName->GetValue() == "DeviceRGB")
149
0
        {
150
0
            pName->writeString(aLine);
151
0
            aLine.append("\n");
152
0
        }
153
0
        else
154
0
        {
155
0
            SAL_INFO("vcl.pdfwriter",
156
0
                     "PDFObjectCopier::copyExternalResource: skipping: " << pName->GetValue());
157
0
        }
158
0
    }
159
160
    // We have the whole object, now write it to the output.
161
0
    if (!m_rContainer.updateObject(nObject))
162
0
        return -1;
163
0
    if (!m_rContainer.writeBuffer(aLine))
164
0
        return -1;
165
0
    aLine.setLength(0);
166
167
0
    if (pStream)
168
0
    {
169
0
        SvMemoryStream& rStream = pStream->GetMemory();
170
0
        m_rContainer.checkAndEnableStreamEncryption(nObject);
171
0
        aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
172
0
        if (!m_rContainer.writeBuffer(aLine))
173
0
            return -1;
174
0
        aLine.setLength(0);
175
0
        m_rContainer.disableStreamEncryption();
176
177
0
        aLine.append("\nendstream\n");
178
0
        if (!m_rContainer.writeBuffer(aLine))
179
0
            return -1;
180
0
        aLine.setLength(0);
181
0
    }
182
183
0
    aLine.append("endobj\n\n");
184
0
    if (!m_rContainer.writeBuffer(aLine))
185
0
        return -1;
186
187
0
    return nObject;
188
0
}
189
190
OString PDFObjectCopier::copyExternalResources(filter::PDFObjectElement& rPage,
191
                                               const OString& rKind,
192
                                               std::map<sal_Int32, sal_Int32>& rCopiedResources)
193
0
{
194
    // A name - object ID map, IDs as they appear in our output, not the
195
    // original ones.
196
0
    std::map<OString, sal_Int32> aRet;
197
198
    // Get the rKind subset of the resource dictionary.
199
0
    std::map<OString, filter::PDFElement*> aItems;
200
0
    filter::PDFObjectElement* pKindObject = nullptr;
201
0
    if (auto pResources
202
0
        = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"_ostr)))
203
0
    {
204
        // Resources is a direct dictionary.
205
0
        filter::PDFElement* pLookup = pResources->LookupElement(rKind);
206
0
        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
207
0
        {
208
            // rKind is an inline dictionary.
209
0
            aItems = pDictionary->GetItems();
210
0
        }
211
0
        else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
212
0
        {
213
            // rKind refers to a dictionary.
214
0
            filter::PDFObjectElement* pReferenced = pReference->LookupObject();
215
0
            if (!pReferenced)
216
0
            {
217
0
                return {};
218
0
            }
219
220
0
            pKindObject = pReferenced;
221
0
            aItems = pReferenced->GetDictionaryItems();
222
0
        }
223
0
    }
224
0
    else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"_ostr))
225
0
    {
226
        // Resources is an indirect object.
227
0
        filter::PDFElement* pValue = pPageResources->Lookup(rKind);
228
0
        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
229
0
        {
230
            // Kind is a direct dictionary.
231
0
            aItems = pDictionary->GetItems();
232
0
        }
233
0
        else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
234
0
        {
235
            // Kind is an indirect object.
236
0
            aItems = pObject->GetDictionaryItems();
237
0
            pKindObject = pObject;
238
0
        }
239
0
    }
240
0
    if (aItems.empty())
241
0
        return {};
242
243
0
    SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
244
0
    bool bHasDictValue = false;
245
246
0
    for (const auto& rItem : aItems)
247
0
    {
248
        // For each item copy it over to our output then insert it into aRet.
249
0
        auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
250
0
        if (!pReference)
251
0
        {
252
0
            if (pKindObject && dynamic_cast<filter::PDFDictionaryElement*>(rItem.second))
253
0
            {
254
0
                bHasDictValue = true;
255
0
                break;
256
0
            }
257
258
0
            continue;
259
0
        }
260
261
0
        filter::PDFObjectElement* pValue = pReference->LookupObject();
262
0
        if (!pValue)
263
0
            continue;
264
265
        // Then copying over an object copy its dictionary and its stream.
266
0
        sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
267
0
        aRet[rItem.first] = nObject;
268
0
    }
269
270
0
    if (bHasDictValue && pKindObject)
271
0
    {
272
0
        sal_Int32 nObject = copyExternalResource(rDocBuffer, *pKindObject, rCopiedResources);
273
0
        return "/" + rKind + " " + OString::number(nObject) + " 0 R";
274
0
    }
275
276
    // Build the dictionary entry string.
277
0
    OStringBuffer sRet("/" + rKind + "<<");
278
0
    for (const auto& rPair : aRet)
279
0
    {
280
0
        sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R");
281
0
    }
282
0
    sRet.append(">>");
283
284
0
    return sRet.makeStringAndClear();
285
0
}
286
287
void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine)
288
0
{
289
    // Maps from source object id (PDF image) to target object id (export result).
290
0
    std::map<sal_Int32, sal_Int32> aCopiedResources;
291
0
    copyPageResources(pPage, rLine, aCopiedResources);
292
0
}
293
294
void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
295
                                        std::map<sal_Int32, sal_Int32>& rCopiedResources)
296
0
{
297
0
    rLine.append(" /Resources <<");
298
0
    static const std::initializer_list<OString> aKeys
299
0
        = { "ColorSpace"_ostr, "ExtGState"_ostr, "Font"_ostr,
300
0
            "XObject"_ostr,    "Shading"_ostr,   "Pattern"_ostr };
301
0
    for (const auto& rKey : aKeys)
302
0
    {
303
0
        rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
304
0
    }
305
0
    rLine.append(">>");
306
0
}
307
308
sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
309
                                           SvMemoryStream& rStream, bool& rCompressed,
310
                                           bool bIsTaggedNonReferenceXObject)
311
0
{
312
0
    for (auto pContent : rContentStreams)
313
0
    {
314
0
        filter::PDFStreamElement* pPageStream = pContent->GetStream();
315
0
        if (!pPageStream)
316
0
        {
317
0
            SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
318
0
            continue;
319
0
        }
320
321
0
        SvMemoryStream& rPageStream = pPageStream->GetMemory();
322
323
0
        auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"_ostr));
324
0
        auto pFilterArray = dynamic_cast<filter::PDFArrayElement*>(pContent->Lookup("Filter"_ostr));
325
0
        if (!pFilter && pFilterArray)
326
0
        {
327
0
            auto& aElements = pFilterArray->GetElements();
328
0
            if (!aElements.empty())
329
0
                pFilter = dynamic_cast<filter::PDFNameElement*>(aElements[0]);
330
0
        }
331
332
0
        if (pFilter)
333
0
        {
334
0
            if (pFilter->GetValue() != "FlateDecode")
335
0
            {
336
0
                continue;
337
0
            }
338
339
0
            SvMemoryStream aMemoryStream;
340
0
            ZCodec aZCodec;
341
0
            rPageStream.Seek(0);
342
0
            aZCodec.BeginCompression();
343
0
            aZCodec.Decompress(rPageStream, aMemoryStream);
344
0
            if (!aZCodec.EndCompression())
345
0
            {
346
0
                SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
347
0
                continue;
348
0
            }
349
350
0
            bool bHasArtifact = false;
351
0
            if (bIsTaggedNonReferenceXObject)
352
0
            {
353
0
                auto pStart = static_cast<const char*>(aMemoryStream.GetData());
354
0
                const char* const pEnd = pStart + aMemoryStream.GetSize();
355
0
                std::string_view aStreamView(pStart, pEnd - pStart);
356
357
0
                std::string_view sArtifact = "/Artifact";
358
0
                std::size_t nPosArtifact = aStreamView.find(sArtifact);
359
0
                if (nPosArtifact != std::string_view::npos)
360
0
                {
361
0
                    bHasArtifact = true;
362
0
                    SvMemoryStream aTmpStream;
363
0
                    std::string_view sBMC = "BMC";
364
0
                    std::string_view sBDC = "BDC";
365
0
                    std::string_view sEMC = "EMC";
366
367
0
                    while (!aStreamView.empty())
368
0
                    {
369
0
                        aTmpStream.WriteOString(aStreamView.substr(0, nPosArtifact));
370
0
                        aStreamView.remove_prefix(nPosArtifact + sArtifact.size());
371
372
0
                        std::size_t nPosBMC = aStreamView.find(sBMC);
373
0
                        std::size_t nPosBDC = aStreamView.find(sBDC);
374
0
                        std::size_t nPos = std::min(nPosBMC, nPosBDC);
375
376
0
                        if (nPos != std::string_view::npos)
377
0
                        {
378
0
                            if (nPos == nPosBMC)
379
0
                                aStreamView.remove_prefix(nPos + sBMC.size() + 1);
380
0
                            else
381
0
                                aStreamView.remove_prefix(nPos + sBDC.size() + 1);
382
383
0
                            std::size_t nPosEMC = aStreamView.find(sEMC);
384
0
                            if (nPosEMC != std::string_view::npos)
385
0
                            {
386
0
                                aTmpStream.WriteOString(aStreamView.substr(0, nPosEMC));
387
0
                                aStreamView.remove_prefix(nPosEMC + sEMC.size() + 1);
388
0
                            }
389
0
                        }
390
391
0
                        nPosArtifact = aStreamView.find(sArtifact);
392
0
                        if (nPosArtifact == std::string_view::npos)
393
0
                        {
394
0
                            aTmpStream.WriteOString(aStreamView);
395
0
                            break;
396
0
                        }
397
0
                    }
398
0
                    rStream.WriteBytes(aTmpStream.GetData(), aTmpStream.GetSize());
399
0
                }
400
0
            }
401
402
0
            if (!bHasArtifact)
403
0
                rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
404
0
        }
405
0
        else
406
0
        {
407
0
            rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
408
0
        }
409
0
    }
410
411
0
    rCompressed = PDFWriterImpl::compressStream(&rStream);
412
413
0
    return rStream.Tell();
414
0
}
415
}
416
417
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */