Coverage Report

Created: 2025-12-31 10:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/sdext/source/pdfimport/pdfparse/pdfentries.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
21
#include <pdfparse.hxx>
22
23
#include <comphelper/hash.hxx>
24
25
#include <rtl/strbuf.hxx>
26
#include <rtl/ustring.hxx>
27
#include <rtl/ustrbuf.hxx>
28
#include <rtl/digest.h>
29
#include <rtl/cipher.h>
30
#include <sal/log.hxx>
31
32
#include <zlib.h>
33
34
#include <math.h>
35
#include <map>
36
37
#include <string.h>
38
39
40
namespace pdfparse
41
{
42
43
struct EmitImplData
44
{
45
    // xref table: maps object number to a pair of (generation, buffer offset)
46
    typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47
    XRefTable m_aXRefTable;
48
    // container of all indirect objects (usually a PDFFile*)
49
    const PDFContainer* m_pObjectContainer;
50
    unsigned int m_nDecryptObject;
51
    unsigned int m_nDecryptGeneration;
52
53
    // returns true if the xref table was updated
54
    bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55
0
    {
56
0
        XRefTable::iterator it = m_aXRefTable.find( nObject );
57
0
        if( it == m_aXRefTable.end() )
58
0
        {
59
            // new entry
60
0
            m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61
0
            return true;
62
0
        }
63
        // update old entry, if generation number is higher
64
0
        if( it->second.first < nGeneration )
65
0
        {
66
0
            it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67
0
            return true;
68
0
        }
69
0
        return false;
70
0
    }
71
72
    explicit EmitImplData( const PDFContainer* pTopContainer ) :
73
0
        m_pObjectContainer( pTopContainer ),
74
0
        m_nDecryptObject( 0 ),
75
0
        m_nDecryptGeneration( 0 )
76
0
    {}
77
    void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78
                  unsigned int nObject, unsigned int nGeneration ) const
79
0
    {
80
0
        const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81
0
        pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82
0
    }
83
84
    void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85
0
    {
86
0
        m_nDecryptObject = nObject;
87
0
        m_nDecryptGeneration = nGeneration;
88
0
    }
89
};
90
91
}
92
93
using namespace pdfparse;
94
95
EmitContext::EmitContext( const PDFContainer* pTop ) :
96
0
    m_bDeflate( false ),
97
0
    m_bDecrypt( false )
98
0
{
99
0
    if( pTop )
100
0
        m_pImplData.reset( new EmitImplData( pTop ) );
101
0
}
102
103
EmitContext::~EmitContext()
104
0
{
105
0
}
106
107
PDFEntry::~PDFEntry()
108
0
{
109
0
}
110
111
EmitImplData* PDFEntry::getEmitData( EmitContext const & rContext )
112
0
{
113
0
    return rContext.m_pImplData.get();
114
0
}
115
116
void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117
0
{
118
0
    if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119
0
        rContext.m_pImplData.reset();
120
0
    rContext.m_pImplData.reset( pNewEmitData );
121
0
}
122
123
PDFValue::~PDFValue()
124
{
125
}
126
127
PDFComment::~PDFComment()
128
0
{
129
0
}
130
131
bool PDFComment::emit( EmitContext& rWriteContext ) const
132
0
{
133
0
    return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134
0
}
135
136
PDFEntry* PDFComment::clone() const
137
0
{
138
0
    return new PDFComment( m_aComment );
139
0
}
140
141
PDFName::~PDFName()
142
0
{
143
0
}
144
145
bool PDFName::emit( EmitContext& rWriteContext ) const
146
0
{
147
0
    if( ! rWriteContext.write( " /", 2 ) )
148
0
        return false;
149
0
    return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150
0
}
151
152
PDFEntry* PDFName::clone() const
153
0
{
154
0
    return new PDFName( m_aName );
155
0
}
156
157
OUString PDFName::getFilteredName() const
158
0
{
159
0
    OStringBuffer aFilter( m_aName.getLength() );
160
0
    const char* pStr = m_aName.getStr();
161
0
    unsigned int nLen = m_aName.getLength();
162
0
    for( unsigned int i = 0; i < nLen; i++ )
163
0
    {
164
0
        if( (i < nLen - 3) && pStr[i] == '#' )
165
0
        {
166
0
            char rResult = 0;
167
0
            i++;
168
0
            if( pStr[i] >= '0' && pStr[i] <= '9' )
169
0
                rResult = char( pStr[i]-'0' ) << 4;
170
0
            else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171
0
                rResult = char( pStr[i]-'a' + 10 ) << 4;
172
0
            else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173
0
                rResult = char( pStr[i]-'A' + 10 ) << 4;
174
0
            i++;
175
0
            if( pStr[i] >= '0' && pStr[i] <= '9' )
176
0
                rResult |= char( pStr[i]-'0' );
177
0
            else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178
0
                rResult |= char( pStr[i]-'a' + 10 );
179
0
            else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180
0
                rResult |= char( pStr[i]-'A' + 10 );
181
0
            aFilter.append( rResult );
182
0
        }
183
0
        else
184
0
            aFilter.append( pStr[i] );
185
0
    }
186
0
    return OStringToOUString( aFilter, RTL_TEXTENCODING_UTF8 );
187
0
}
188
189
PDFString::~PDFString()
190
0
{
191
0
}
192
193
bool PDFString::emit( EmitContext& rWriteContext ) const
194
0
{
195
0
    if( ! rWriteContext.write( " ", 1 ) )
196
0
        return false;
197
0
    EmitImplData* pEData = getEmitData( rWriteContext );
198
0
    if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199
0
    {
200
0
        OString aFiltered( getFilteredString() );
201
        // decrypt inplace (evil since OString is supposed to be const
202
        // however in this case we know that getFilteredString returned a singular string instance
203
0
        pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204
0
                         reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205
0
                         pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206
        // check for string or hex string
207
0
        const char* pStr = aFiltered.getStr();
208
0
        if( aFiltered.getLength() > 1 &&
209
0
           ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210
0
             (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211
0
        {
212
0
            static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213
0
                                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214
0
            if( ! rWriteContext.write( "<", 1 ) )
215
0
                return false;
216
0
            for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217
0
            {
218
0
                if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219
0
                    return false;
220
0
                if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221
0
                    return false;
222
0
            }
223
0
            if( ! rWriteContext.write( ">", 1 ) )
224
0
                return false;
225
0
        }
226
0
        else
227
0
        {
228
0
            if( ! rWriteContext.write( "(", 1 ) )
229
0
                return false;
230
0
            if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231
0
                return false;
232
0
            if( ! rWriteContext.write( ")", 1 ) )
233
0
                return false;
234
0
        }
235
0
        return true;
236
0
    }
237
0
    return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238
0
}
239
240
PDFEntry* PDFString::clone() const
241
0
{
242
0
    return new PDFString( m_aString );
243
0
}
244
245
OString PDFString::getFilteredString() const
246
0
{
247
0
    int nLen = m_aString.getLength();
248
0
    OStringBuffer aBuf( nLen );
249
250
0
    const char* pStr = m_aString.getStr();
251
0
    if( *pStr == '(' )
252
0
    {
253
0
        const char* pRun = pStr+1;
254
0
        while( pRun - pStr < nLen-1 )
255
0
        {
256
0
            if( *pRun == '\\' )
257
0
            {
258
0
                pRun++;
259
0
                if( pRun - pStr < nLen )
260
0
                {
261
0
                    char aEsc = 0;
262
0
                    if( *pRun == 'n' )
263
0
                        aEsc = '\n';
264
0
                    else if( *pRun == 'r' )
265
0
                        aEsc = '\r';
266
0
                    else if( *pRun == 't' )
267
0
                        aEsc = '\t';
268
0
                    else if( *pRun == 'b' )
269
0
                        aEsc = '\b';
270
0
                    else if( *pRun == 'f' )
271
0
                        aEsc = '\f';
272
0
                    else if( *pRun == '(' )
273
0
                        aEsc = '(';
274
0
                    else if( *pRun == ')' )
275
0
                        aEsc = ')';
276
0
                    else if( *pRun == '\\' )
277
0
                        aEsc = '\\';
278
0
                    else if( *pRun == '\n' )
279
0
                    {
280
0
                        pRun++;
281
0
                        continue;
282
0
                    }
283
0
                    else if( *pRun == '\r' )
284
0
                    {
285
0
                        pRun++;
286
0
                        if( *pRun == '\n' )
287
0
                            pRun++;
288
0
                        continue;
289
0
                    }
290
0
                    else
291
0
                    {
292
0
                        int i = 0;
293
0
                        while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294
0
                            aEsc = 8*aEsc + (*pRun++ - '0');
295
                        // move pointer back to last character of octal sequence
296
0
                        pRun--;
297
0
                    }
298
0
                    aBuf.append( aEsc );
299
0
                }
300
0
            }
301
0
            else
302
0
                aBuf.append( *pRun );
303
            // move pointer to next character
304
0
            pRun++;
305
0
        }
306
0
    }
307
0
    else if( *pStr == '<' )
308
0
    {
309
0
        const char* pRun = pStr+1;
310
0
        while( *pRun != '>' && pRun - pStr < nLen )
311
0
        {
312
0
            char rResult = 0;
313
0
            if( *pRun >= '0' && *pRun <= '9' )
314
0
                rResult = char( ( *pRun-'0' ) << 4 );
315
0
            else if( *pRun >= 'a' && *pRun <= 'f' )
316
0
                rResult = char( ( *pRun-'a' + 10 ) << 4 );
317
0
            else if( *pRun >= 'A' && *pRun <= 'F' )
318
0
                rResult = char( ( *pRun-'A' + 10 ) << 4 );
319
0
            pRun++;
320
0
            if( *pRun != '>' && pRun - pStr < nLen )
321
0
            {
322
0
                if( *pRun >= '0' && *pRun <= '9' )
323
0
                    rResult |= char( *pRun-'0' );
324
0
                else if( *pRun >= 'a' && *pRun <= 'f' )
325
0
                    rResult |= char( *pRun-'a' + 10 );
326
0
                else if( *pRun >= 'A' && *pRun <= 'F' )
327
0
                    rResult |= char( *pRun-'A' + 10 );
328
0
            }
329
0
            pRun++;
330
0
            aBuf.append( rResult );
331
0
        }
332
0
    }
333
334
0
    return aBuf.makeStringAndClear();
335
0
}
336
337
PDFNumber::~PDFNumber()
338
{
339
}
340
341
bool PDFNumber::emit( EmitContext& rWriteContext ) const
342
0
{
343
0
    OStringBuffer aBuf( 32 );
344
0
    aBuf.append( ' ' );
345
346
0
    double fValue = m_fValue;
347
0
    bool bNeg = false;
348
0
    int nPrecision = 5;
349
0
    if( fValue < 0.0 )
350
0
    {
351
0
        bNeg = true;
352
0
        fValue=-fValue;
353
0
    }
354
355
0
    sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356
0
    fValue -= static_cast<double>(nInt);
357
    // optimizing hardware may lead to a value of 1.0 after the subtraction
358
0
    if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359
0
    {
360
0
        nInt++;
361
0
        fValue = 0.0;
362
0
    }
363
0
    sal_Int64 nFrac = 0;
364
0
    if( fValue )
365
0
    {
366
0
        fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367
0
        nFrac = static_cast<sal_Int64>(fValue);
368
0
    }
369
0
    if( bNeg && ( nInt || nFrac ) )
370
0
        aBuf.append( '-' );
371
0
    aBuf.append( nInt );
372
0
    if( nFrac )
373
0
    {
374
0
        int i;
375
0
        aBuf.append( '.' );
376
0
        sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377
0
        for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378
0
        {
379
0
            sal_Int64 nNumb = nFrac / nBound;
380
0
            nFrac -= nNumb * nBound;
381
0
            aBuf.append( nNumb );
382
0
            nBound /= 10;
383
0
        }
384
0
    }
385
386
0
    return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387
0
}
388
389
PDFEntry* PDFNumber::clone() const
390
0
{
391
0
    return new PDFNumber( m_fValue );
392
0
}
393
394
395
PDFBool::~PDFBool()
396
{
397
}
398
399
bool PDFBool::emit( EmitContext& rWriteContext ) const
400
0
{
401
0
    return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402
0
}
403
404
PDFEntry* PDFBool::clone() const
405
0
{
406
0
    return new PDFBool( m_bValue );
407
0
}
408
409
PDFNull::~PDFNull()
410
{
411
}
412
413
bool PDFNull::emit( EmitContext& rWriteContext ) const
414
0
{
415
0
    return rWriteContext.write( " null", 5 );
416
0
}
417
418
PDFEntry* PDFNull::clone() const
419
0
{
420
0
    return new PDFNull();
421
0
}
422
423
424
PDFObjectRef::~PDFObjectRef()
425
{
426
}
427
428
bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429
0
{
430
0
    OString aBuf =
431
0
        " " +
432
0
        OString::number( sal_Int32( m_nNumber ) ) +
433
0
        " " +
434
0
        OString::number( sal_Int32( m_nGeneration ) ) +
435
0
        " R";
436
0
    return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437
0
}
438
439
PDFEntry* PDFObjectRef::clone() const
440
0
{
441
0
    return new PDFObjectRef( m_nNumber, m_nGeneration );
442
0
}
443
444
PDFContainer::~PDFContainer()
445
0
{
446
0
}
447
448
bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449
0
{
450
0
    int nEle = m_aSubElements.size();
451
0
    for( int i = 0; i < nEle; i++ )
452
0
    {
453
0
        if( rWriteContext.m_bDecrypt )
454
0
        {
455
0
            const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456
0
            if (pName && pName->m_aName == "Encrypt")
457
0
            {
458
0
                i++;
459
0
                continue;
460
0
            }
461
0
        }
462
0
        if( ! m_aSubElements[i]->emit( rWriteContext ) )
463
0
            return false;
464
0
    }
465
0
    return true;
466
0
}
467
468
void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469
0
{
470
0
    int nEle = m_aSubElements.size();
471
0
    for( int i = 0; i < nEle; i++ )
472
0
        rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473
0
}
474
475
PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476
0
{
477
0
    unsigned int nEle = m_aSubElements.size();
478
0
    for( unsigned int i = 0; i < nEle; i++ )
479
0
    {
480
0
        PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481
0
        if( pObject &&
482
0
            pObject->m_nNumber == nNumber &&
483
0
            pObject->m_nGeneration == nGeneration )
484
0
        {
485
0
            return pObject;
486
0
        }
487
0
    }
488
0
    return nullptr;
489
0
}
490
491
PDFArray::~PDFArray()
492
{
493
}
494
495
bool PDFArray::emit( EmitContext& rWriteContext ) const
496
0
{
497
0
    if( ! rWriteContext.write( "[", 1 ) )
498
0
        return false;
499
0
    if( ! emitSubElements( rWriteContext ) )
500
0
        return false;
501
0
    return rWriteContext.write( "]", 1 );
502
0
}
503
504
PDFEntry* PDFArray::clone() const
505
0
{
506
0
    PDFArray* pNewAr = new PDFArray();
507
0
    cloneSubElements( pNewAr->m_aSubElements );
508
0
    return pNewAr;
509
0
}
510
511
PDFDict::~PDFDict()
512
0
{
513
0
}
514
515
bool PDFDict::emit( EmitContext& rWriteContext ) const
516
0
{
517
0
    if( ! rWriteContext.write( "<<\n", 3 ) )
518
0
        return false;
519
0
    if( ! emitSubElements( rWriteContext ) )
520
0
        return false;
521
0
    return rWriteContext.write( "\n>>\n", 4 );
522
0
}
523
524
void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525
0
{
526
0
    if( ! pValue )
527
0
        eraseValue( rName );
528
529
0
    PDFEntry* pValueTmp = nullptr;
530
0
    std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531
0
    if( it == m_aMap.end() )
532
0
    {
533
        // new name/value, pair, append it
534
0
        m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535
0
        m_aSubElements.emplace_back( std::move(pValue) );
536
0
        pValueTmp = m_aSubElements.back().get();
537
0
    }
538
0
    else
539
0
    {
540
0
        unsigned int nSub = m_aSubElements.size();
541
0
        for( unsigned int i = 0; i < nSub; i++ )
542
0
            if( m_aSubElements[i].get() == it->second )
543
0
            {
544
0
                m_aSubElements[i] = std::move(pValue);
545
0
                pValueTmp = m_aSubElements[i].get();
546
0
                break;
547
0
            }
548
0
    }
549
0
    assert(pValueTmp);
550
0
    m_aMap[ rName ] = pValueTmp;
551
0
}
552
553
void PDFDict::eraseValue( std::string_view rName )
554
0
{
555
0
    unsigned int nEle = m_aSubElements.size();
556
0
    for( unsigned int i = 0; i < nEle; i++ )
557
0
    {
558
0
        PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
559
0
        if( pName && pName->m_aName == rName )
560
0
        {
561
0
            for( unsigned int j = i+1; j < nEle; j++ )
562
0
            {
563
0
                if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
564
0
                {
565
                    // remove and free subelements from vector
566
0
                    m_aSubElements.erase( m_aSubElements.begin()+j );
567
0
                    m_aSubElements.erase( m_aSubElements.begin()+i );
568
0
                    buildMap();
569
0
                    return;
570
0
                }
571
0
            }
572
0
        }
573
0
    }
574
0
}
575
576
PDFEntry* PDFDict::buildMap()
577
0
{
578
    // clear map
579
0
    m_aMap.clear();
580
    // build map
581
0
    unsigned int nEle = m_aSubElements.size();
582
0
    PDFName* pName = nullptr;
583
0
    for( unsigned int i = 0; i < nEle; i++ )
584
0
    {
585
0
        if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
586
0
        {
587
0
            if( pName )
588
0
            {
589
0
                m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
590
0
                pName = nullptr;
591
0
            }
592
0
            else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
593
0
                return m_aSubElements[i].get();
594
0
        }
595
0
    }
596
0
    return pName;
597
0
}
598
599
PDFEntry* PDFDict::clone() const
600
0
{
601
0
    PDFDict* pNewDict = new PDFDict();
602
0
    cloneSubElements( pNewDict->m_aSubElements );
603
0
    pNewDict->buildMap();
604
0
    return pNewDict;
605
0
}
606
607
PDFStream::~PDFStream()
608
{
609
}
610
611
bool PDFStream::emit( EmitContext& rWriteContext ) const
612
0
{
613
0
    return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
614
0
}
615
616
PDFEntry* PDFStream::clone() const
617
0
{
618
0
    return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
619
0
}
620
621
unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
622
0
{
623
0
    if( ! m_pDict )
624
0
        return 0;
625
    // find /Length entry, can either be a direct or indirect number object
626
0
    std::unordered_map<OString,PDFEntry*>::const_iterator it =
627
0
        m_pDict->m_aMap.find( "Length"_ostr );
628
0
    if( it == m_pDict->m_aMap.end() )
629
0
        return 0;
630
0
    PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
631
0
    if( ! pNum && pContainer )
632
0
    {
633
0
        PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
634
0
        if( pRef )
635
0
        {
636
0
            int nEle = pContainer->m_aSubElements.size();
637
0
            for (int i = 0; i < nEle; i++)
638
0
            {
639
0
                PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
640
0
                if( pObj &&
641
0
                    pObj->m_nNumber == pRef->m_nNumber &&
642
0
                    pObj->m_nGeneration == pRef->m_nGeneration )
643
0
                {
644
0
                    if( pObj->m_pObject )
645
0
                        pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
646
0
                    break;
647
0
                }
648
0
            }
649
0
        }
650
0
    }
651
0
    return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
652
0
}
653
654
PDFObject::~PDFObject()
655
{
656
}
657
658
bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
659
0
{
660
0
    bool bIsDeflated = false;
661
0
    if( m_pStream && m_pStream->m_pDict &&
662
0
        m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
663
0
        )
664
0
    {
665
0
        unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
666
0
        rpStream.reset(new char[ nOuterStreamLen ]);
667
0
        unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
668
0
        if( nRead != nOuterStreamLen )
669
0
        {
670
0
            rpStream.reset();
671
0
            *pBytes = 0;
672
0
            return false;
673
0
        }
674
        // is there a filter entry ?
675
0
        std::unordered_map<OString,PDFEntry*>::const_iterator it =
676
0
            m_pStream->m_pDict->m_aMap.find( "Filter"_ostr );
677
0
        if( it != m_pStream->m_pDict->m_aMap.end() )
678
0
        {
679
0
            PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
680
0
            if( ! pFilter )
681
0
            {
682
0
                PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
683
0
                if( pArray && ! pArray->m_aSubElements.empty() )
684
0
                {
685
0
                    pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
686
0
                }
687
0
            }
688
689
            // is the (first) filter FlateDecode ?
690
0
            if (pFilter && pFilter->m_aName == "FlateDecode")
691
0
            {
692
0
                bIsDeflated = true;
693
0
            }
694
0
        }
695
        // prepare compressed data section
696
0
        char* pStream = rpStream.get();
697
0
        if( pStream[0] == 's' )
698
0
            pStream += 6; // skip "stream"
699
        // skip line end after "stream"
700
0
        while( *pStream == '\r' || *pStream == '\n' )
701
0
            pStream++;
702
        // get the compressed length
703
0
        *pBytes = m_pStream->getDictLength( pObjectContainer );
704
0
        if( pStream != rpStream.get() )
705
0
            memmove( rpStream.get(), pStream, *pBytes );
706
0
        if( rContext.m_bDecrypt )
707
0
        {
708
0
            EmitImplData* pEData = getEmitData( rContext );
709
0
            pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
710
0
                             *pBytes,
711
0
                             reinterpret_cast<sal_uInt8*>(rpStream.get()),
712
0
                             m_nNumber,
713
0
                             m_nGeneration
714
0
                             ); // decrypt inplace
715
0
        }
716
0
    }
717
0
    else
718
0
    {
719
0
        *pBytes = 0;
720
0
    }
721
0
    return bIsDeflated;
722
0
}
723
724
static void unzipToBuffer( char* pBegin, unsigned int nLen,
725
                           sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
726
0
{
727
0
    z_stream aZStr;
728
0
    aZStr.next_in       = reinterpret_cast<Bytef *>(pBegin);
729
0
    aZStr.avail_in      = nLen;
730
0
    aZStr.total_out = aZStr.total_in = 0;
731
0
    aZStr.zalloc        = nullptr;
732
0
    aZStr.zfree         = nullptr;
733
0
    aZStr.opaque        = nullptr;
734
735
0
    int err = inflateInit(&aZStr);
736
737
0
    const unsigned int buf_increment_size = 16384;
738
739
0
    if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
740
0
    {
741
0
        *pOutBuf = p;
742
0
        aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
743
0
        aZStr.avail_out = buf_increment_size;
744
0
        *pOutLen = buf_increment_size;
745
0
    }
746
0
    else
747
0
        err = Z_MEM_ERROR;
748
0
    while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
749
0
    {
750
0
        err = inflate( &aZStr, Z_NO_FLUSH );
751
0
        if( aZStr.avail_out == 0 )
752
0
        {
753
0
            if( err != Z_STREAM_END )
754
0
            {
755
0
                const int nNewAlloc = *pOutLen + buf_increment_size;
756
0
                if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
757
0
                {
758
0
                    *pOutBuf = p;
759
0
                    aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
760
0
                    aZStr.avail_out = buf_increment_size;
761
0
                    *pOutLen = nNewAlloc;
762
0
                }
763
0
                else
764
0
                    err = Z_MEM_ERROR;
765
0
            }
766
0
        }
767
0
    }
768
0
    if( err == Z_STREAM_END )
769
0
    {
770
0
        if( aZStr.avail_out > 0 )
771
0
            *pOutLen -= aZStr.avail_out;
772
0
    }
773
0
    inflateEnd(&aZStr);
774
0
    if( err < Z_OK )
775
0
    {
776
0
        std::free( *pOutBuf );
777
0
        *pOutBuf = nullptr;
778
0
        *pOutLen = 0;
779
0
    }
780
0
}
781
782
void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
783
0
{
784
0
    if( !m_pStream )
785
0
        return;
786
787
0
    std::unique_ptr<char[]> pStream;
788
0
    unsigned int nBytes = 0;
789
0
    if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
790
0
    {
791
0
        sal_uInt8* pOutBytes = nullptr;
792
0
        sal_uInt32 nOutBytes = 0;
793
0
        unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
794
0
        rWriteContext.write( pOutBytes, nOutBytes );
795
0
        std::free( pOutBytes );
796
0
    }
797
0
    else if( pStream && nBytes )
798
0
        rWriteContext.write( pStream.get(), nBytes );
799
0
}
800
801
bool PDFObject::emit( EmitContext& rWriteContext ) const
802
0
{
803
0
    if( ! rWriteContext.write( "\n", 1 ) )
804
0
        return false;
805
806
0
    EmitImplData* pEData = getEmitData( rWriteContext );
807
0
    if( pEData )
808
0
        pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
809
810
0
    OString aBuf =
811
0
        OString::number( sal_Int32( m_nNumber ) ) +
812
0
        " " +
813
0
        OString::number( sal_Int32( m_nGeneration ) ) +
814
0
        " obj\n";
815
0
    if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
816
0
        return false;
817
818
0
    if( pEData )
819
0
        pEData->setDecryptObject( m_nNumber, m_nGeneration );
820
0
    if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
821
0
    {
822
0
        std::unique_ptr<char[]> pStream;
823
0
        unsigned int nBytes = 0;
824
0
        bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
825
0
        if( pStream && nBytes )
826
0
        {
827
            // unzip the stream
828
0
            sal_uInt8* pOutBytes = nullptr;
829
0
            sal_uInt32 nOutBytes = 0;
830
0
            if( bDeflate && rWriteContext.m_bDeflate )
831
0
                unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
832
0
            else
833
0
            {
834
                // nothing to deflate, but decryption has happened
835
0
                pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
836
0
                nOutBytes = static_cast<sal_uInt32>(nBytes);
837
0
            }
838
839
0
            if( nOutBytes )
840
0
            {
841
                // clone this object
842
0
                std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
843
                // set length in the dictionary to new stream length
844
0
                std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
845
0
                pClone->m_pStream->m_pDict->insertValue( "Length"_ostr, std::move(pNewLen) );
846
847
0
                if( bDeflate && rWriteContext.m_bDeflate )
848
0
                {
849
                    // delete flatedecode filter
850
0
                    std::unordered_map<OString,PDFEntry*>::const_iterator it =
851
0
                    pClone->m_pStream->m_pDict->m_aMap.find( "Filter"_ostr );
852
0
                    if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
853
0
                    {
854
0
                        PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
855
0
                        if (pFilter && pFilter->m_aName == "FlateDecode")
856
0
                            pClone->m_pStream->m_pDict->eraseValue( "Filter" );
857
0
                        else
858
0
                        {
859
0
                            PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
860
0
                            if( pArray && ! pArray->m_aSubElements.empty() )
861
0
                            {
862
0
                                pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
863
0
                                if (pFilter && pFilter->m_aName == "FlateDecode")
864
0
                                {
865
0
                                    pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
866
0
                                }
867
0
                            }
868
0
                        }
869
0
                    }
870
0
                }
871
872
                // write sub elements except stream
873
0
                bool bRet = true;
874
0
                unsigned int nEle = pClone->m_aSubElements.size();
875
0
                for( unsigned int i = 0; i < nEle && bRet; i++ )
876
0
                {
877
0
                    if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
878
0
                        bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
879
0
                }
880
0
                pClone.reset();
881
                // write stream
882
0
                if( bRet )
883
0
                    bRet = rWriteContext.write("stream\n", 7)
884
0
                           && rWriteContext.write(pOutBytes, nOutBytes)
885
0
                           && rWriteContext.write("\nendstream\nendobj\n", 18);
886
0
                if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
887
0
                    std::free( pOutBytes );
888
0
                pEData->setDecryptObject( 0, 0 );
889
0
                return bRet;
890
0
            }
891
0
            if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
892
0
                std::free( pOutBytes );
893
0
        }
894
0
    }
895
896
0
    bool bRet = emitSubElements( rWriteContext ) &&
897
0
                rWriteContext.write( "\nendobj\n", 8 );
898
0
    if( pEData )
899
0
        pEData->setDecryptObject( 0, 0 );
900
0
    return bRet;
901
0
}
902
903
PDFEntry* PDFObject::clone() const
904
0
{
905
0
    PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
906
0
    cloneSubElements( pNewOb->m_aSubElements );
907
0
    unsigned int nEle = m_aSubElements.size();
908
0
    for( unsigned int i = 0; i < nEle; i++ )
909
0
    {
910
0
        if( m_aSubElements[i].get() == m_pObject )
911
0
            pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
912
0
        else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
913
0
        {
914
0
            pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
915
0
            PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
916
0
            if (pNewDict && pNewOb->m_pStream)
917
0
                pNewOb->m_pStream->m_pDict = pNewDict;
918
0
        }
919
0
    }
920
0
    return pNewOb;
921
0
}
922
923
PDFTrailer::~PDFTrailer()
924
{
925
}
926
927
bool PDFTrailer::emit( EmitContext& rWriteContext ) const
928
0
{
929
    // get xref offset
930
0
    unsigned int nXRefPos = rWriteContext.getCurPos();
931
    // begin xref section, object 0 is always free
932
0
    if( ! rWriteContext.write( "xref\r\n"
933
0
                               "0 1\r\n"
934
0
                               "0000000000 65535 f\r\n", 31 ) )
935
0
        return false;
936
    // check if we are emitting a complete PDF file
937
0
    EmitImplData* pEData = getEmitData( rWriteContext );
938
0
    if( pEData )
939
0
    {
940
        // emit object xrefs
941
0
        const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
942
0
        EmitImplData::XRefTable::const_iterator section_begin, section_end;
943
0
        section_begin = rXRefs.begin();
944
0
        while( section_begin != rXRefs.end() )
945
0
        {
946
            // find end of continuous object numbers
947
0
            section_end = section_begin;
948
0
            unsigned int nLast = section_begin->first;
949
0
            while( (++section_end) != rXRefs.end() &&
950
0
                   section_end->first == nLast+1 )
951
0
                nLast = section_end->first;
952
            // write first object number and number of following entries
953
0
            OStringBuffer aBuf =
954
0
                OString::number(sal_Int32( section_begin->first ) )
955
0
                + " "
956
0
                + OString::number(sal_Int32(nLast - section_begin->first + 1))
957
0
                + "\r\n";
958
0
            if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
959
0
                return false;
960
0
            while( section_begin != section_end )
961
0
            {
962
                // write 20 char entry of form
963
                // 0000offset 00gen n\r\n
964
0
                aBuf.setLength( 0 );
965
0
                OString aOffset( OString::number( section_begin->second.second ) );
966
0
                int nPad = 10 - aOffset.getLength();
967
0
                for( int i = 0; i < nPad; i++ )
968
0
                    aBuf.append( '0' );
969
0
                aBuf.append( aOffset +  " " );
970
0
                OString aGeneration( OString::number( section_begin->second.first ) );
971
0
                nPad = 5 - aGeneration.getLength();
972
0
                for( int i = 0; i < nPad; i++ )
973
0
                    aBuf.append( '0' );
974
0
                aBuf.append( aGeneration + " n\r\n" );
975
0
                if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
976
0
                    return false;
977
0
                ++section_begin;
978
0
            }
979
0
        }
980
0
    }
981
0
    if( ! rWriteContext.write( "trailer\n", 8 ) )
982
0
        return false;
983
0
    if( ! emitSubElements( rWriteContext ) )
984
0
        return false;
985
0
    if( ! rWriteContext.write( "startxref\n", 10 ) )
986
0
        return false;
987
0
    OString aOffset( OString::number( nXRefPos ) );
988
0
    if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
989
0
        return false;
990
0
    return rWriteContext.write( "\n%%EOF\n", 7 );
991
0
}
992
993
PDFEntry* PDFTrailer::clone() const
994
0
{
995
0
    PDFTrailer* pNewTr = new PDFTrailer();
996
0
    cloneSubElements( pNewTr->m_aSubElements );
997
0
    unsigned int nEle = m_aSubElements.size();
998
0
    for( unsigned int i = 0; i < nEle; i++ )
999
0
    {
1000
0
        if( m_aSubElements[i].get() == m_pDict )
1001
0
        {
1002
0
            pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1003
0
            break;
1004
0
        }
1005
0
    }
1006
0
    return pNewTr;
1007
0
}
1008
1009
#define ENCRYPTION_KEY_LEN 16
1010
#define ENCRYPTION_BUF_LEN 32
1011
1012
namespace pdfparse {
1013
struct PDFFileImplData
1014
{
1015
    bool        m_bIsEncrypted;
1016
    bool        m_bStandardHandler;
1017
    sal_uInt32  m_nAlgoVersion;
1018
    sal_uInt32  m_nStandardRevision;
1019
    sal_uInt32  m_nKeyLength;
1020
    sal_uInt8   m_aOEntry[32] = {};
1021
    sal_uInt8   m_aUEntry[32] = {};
1022
    sal_uInt32  m_nPEntry;
1023
    OString     m_aDocID;
1024
    rtlCipher   m_aCipher;
1025
1026
    sal_uInt8   m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1027
1028
    PDFFileImplData() :
1029
0
        m_bIsEncrypted( false ),
1030
0
        m_bStandardHandler( false ),
1031
0
        m_nAlgoVersion( 0 ),
1032
0
        m_nStandardRevision( 0 ),
1033
0
        m_nKeyLength( 0 ),
1034
0
        m_nPEntry( 0 ),
1035
0
        m_aCipher( nullptr )
1036
0
    {
1037
0
    }
1038
1039
    ~PDFFileImplData()
1040
0
    {
1041
0
        if( m_aCipher )
1042
0
            rtl_cipher_destroyARCFOUR( m_aCipher );
1043
0
    }
1044
};
1045
}
1046
1047
PDFFile::PDFFile()
1048
0
   :  m_nMajor( 0 ), m_nMinor( 0 )
1049
0
{
1050
0
}
1051
1052
PDFFile::~PDFFile()
1053
0
{
1054
0
}
1055
1056
bool PDFFile::isEncrypted() const
1057
0
{
1058
0
    return impl_getData()->m_bIsEncrypted;
1059
0
}
1060
1061
bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1062
                       unsigned int nObject, unsigned int nGeneration ) const
1063
0
{
1064
0
    if( ! isEncrypted() )
1065
0
        return false;
1066
1067
0
    if( ! m_pData->m_aCipher )
1068
0
        m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1069
1070
    // modify encryption key
1071
0
    sal_uInt32 i = m_pData->m_nKeyLength;
1072
0
    m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1073
0
    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1074
0
    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1075
0
    m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1076
0
    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1077
1078
0
    ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1079
0
                m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1080
1081
0
    if( i > 16 )
1082
0
        i = 16;
1083
1084
0
    rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1085
0
                                                  rtl_Cipher_DirectionDecode,
1086
0
                                                  aSum.data(), i,
1087
0
                                                  nullptr, 0 );
1088
0
    if( aErr == rtl_Cipher_E_None )
1089
0
        aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1090
0
                                         pInBuffer, nLen,
1091
0
                                         pOutBuffer, nLen );
1092
0
    return aErr == rtl_Cipher_E_None;
1093
0
}
1094
1095
const sal_uInt8 nPadString[32] =
1096
{
1097
    0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1098
    0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1099
};
1100
1101
static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1102
0
{
1103
0
    int nLen = rStr.getLength();
1104
0
    if( nLen > 32 )
1105
0
        nLen = 32;
1106
0
    const char* pStr = rStr.getStr();
1107
0
    memcpy( pBuffer, pStr, nLen );
1108
0
    int i = 0;
1109
0
    while( nLen < 32 )
1110
0
        pBuffer[nLen++] = nPadString[i++];
1111
0
}
1112
1113
// pass at least pData->m_nKeyLength bytes in
1114
static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1115
0
{
1116
    // see PDF reference 1.4 Algorithm 3.2
1117
    // encrypt pad string
1118
0
    char aPadPwd[ENCRYPTION_BUF_LEN];
1119
0
    pad_or_truncate_to_32( rPwd, aPadPwd );
1120
0
    ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1121
0
    aDigest.update(aPadPwd, sizeof(aPadPwd));
1122
0
    if( ! bComputeO )
1123
0
    {
1124
0
        aDigest.update(pData->m_aOEntry, 32);
1125
0
        sal_uInt8 aPEntry[4];
1126
0
        aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1127
0
        aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1128
0
        aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1129
0
        aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1130
0
        aDigest.update(aPEntry, sizeof(aPEntry));
1131
0
        aDigest.update(pData->m_aDocID.getStr(), pData->m_aDocID.getLength());
1132
0
    }
1133
0
    ::std::vector<unsigned char> nSum(aDigest.finalize());
1134
0
    if( pData->m_nStandardRevision == 3 )
1135
0
    {
1136
0
        for( int i = 0; i < 50; i++ )
1137
0
        {
1138
0
            nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1139
0
                    ::comphelper::HashType::MD5);
1140
0
        }
1141
0
    }
1142
0
    sal_uInt32 nLen = pData->m_nKeyLength;
1143
0
    if( nLen > RTL_DIGEST_LENGTH_MD5 )
1144
0
        nLen = RTL_DIGEST_LENGTH_MD5;
1145
0
    memcpy( pOutKey, nSum.data(), nLen );
1146
0
    return nLen;
1147
0
}
1148
1149
static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1150
0
{
1151
    // see PDF reference 1.4 Algorithm 3.6
1152
0
    bool bValid = false;
1153
0
    sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1154
0
    sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1155
    // save (at this time potential) decryption key for later use
1156
0
    memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1157
0
    if( pData->m_nStandardRevision == 2 )
1158
0
    {
1159
0
        sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1160
        // see PDF reference 1.4 Algorithm 3.4
1161
        // encrypt pad string
1162
0
        if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1163
0
                                    aKey, nKeyLen,
1164
0
                                    nullptr, 0 )
1165
0
            != rtl_Cipher_E_None)
1166
0
        {
1167
0
            return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1168
0
        }
1169
0
        rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1170
0
                                  nEncryptedEntry, sizeof( nEncryptedEntry ) );
1171
0
        bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1172
0
    }
1173
0
    else if( pData->m_nStandardRevision == 3 )
1174
0
    {
1175
        // see PDF reference 1.4 Algorithm 3.5
1176
0
        ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1177
0
        aDigest.update(nPadString, sizeof(nPadString));
1178
0
        aDigest.update(pData->m_aDocID.getStr(), pData->m_aDocID.getLength());
1179
0
        ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1180
0
        if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1181
0
                                    aKey, sizeof(aKey), nullptr, 0 )
1182
0
            != rtl_Cipher_E_None)
1183
0
        {
1184
0
            return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1185
0
        }
1186
0
        rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1187
0
                                  nEncryptedEntry.data(), 16,
1188
0
                                  nEncryptedEntry.data(), 16 ); // encrypt in place
1189
0
        for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1190
0
        {
1191
0
            sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1192
0
            for( size_t j = 0; j < sizeof(aTempKey); j++ )
1193
0
                aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1194
1195
0
            if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1196
0
                                        aTempKey, sizeof(aTempKey), nullptr, 0 )
1197
0
                != rtl_Cipher_E_None)
1198
0
            {
1199
0
                return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1200
0
            }
1201
0
            rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1202
0
                                      nEncryptedEntry.data(), 16,
1203
0
                                      nEncryptedEntry.data(), 16 ); // encrypt in place
1204
0
        }
1205
0
        bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1206
0
    }
1207
0
    return bValid;
1208
0
}
1209
1210
bool PDFFile::usesSupportedEncryptionFormat() const
1211
0
{
1212
0
    return m_pData->m_bStandardHandler &&
1213
0
        m_pData->m_nAlgoVersion >= 1 &&
1214
0
        m_pData->m_nAlgoVersion <= 2 &&
1215
0
        m_pData->m_nStandardRevision >= 2 &&
1216
0
        m_pData->m_nStandardRevision <= 3;
1217
0
}
1218
1219
bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1220
0
{
1221
0
    if( !impl_getData()->m_bIsEncrypted )
1222
0
        return rPwd.isEmpty();
1223
1224
    // check if we can handle this encryption at all
1225
0
    if( ! usesSupportedEncryptionFormat() )
1226
0
        return false;
1227
1228
0
    if( ! m_pData->m_aCipher )
1229
0
        m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1230
1231
    // first try user password
1232
0
    bool bValid = check_user_password( rPwd, m_pData.get() );
1233
1234
0
    if( ! bValid )
1235
0
    {
1236
        // try owner password
1237
        // see PDF reference 1.4 Algorithm 3.7
1238
0
        sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1239
0
        sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1240
0
        sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1241
0
        if( m_pData->m_nStandardRevision == 2 )
1242
0
        {
1243
0
            if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1244
0
                                        aKey, nKeyLen, nullptr, 0 )
1245
0
                != rtl_Cipher_E_None)
1246
0
            {
1247
0
                return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1248
0
            }
1249
0
            rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1250
0
                                      m_pData->m_aOEntry, 32,
1251
0
                                      nPwd, 32 );
1252
0
        }
1253
0
        else if( m_pData->m_nStandardRevision == 3 )
1254
0
        {
1255
0
            memcpy( nPwd, m_pData->m_aOEntry, 32 );
1256
0
            for( int i = 19; i >= 0; i-- )
1257
0
            {
1258
0
                sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1259
0
                for( size_t j = 0; j < sizeof(nTempKey); j++ )
1260
0
                    nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1261
0
                if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1262
0
                                            nTempKey, nKeyLen, nullptr, 0 )
1263
0
                    != rtl_Cipher_E_None)
1264
0
                {
1265
0
                    return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1266
0
                }
1267
0
                rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1268
0
                                          nPwd, 32,
1269
0
                                          nPwd, 32 ); // decrypt inplace
1270
0
            }
1271
0
        }
1272
0
        bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1273
0
    }
1274
1275
0
    return bValid;
1276
0
}
1277
1278
PDFFileImplData* PDFFile::impl_getData() const
1279
0
{
1280
0
    if( m_pData )
1281
0
        return m_pData.get();
1282
0
    m_pData.reset( new PDFFileImplData );
1283
    // check for encryption dict in a trailer
1284
0
    unsigned int nElements = m_aSubElements.size();
1285
0
    while( nElements-- > 0 )
1286
0
    {
1287
0
        PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1288
0
        if( pTrailer && pTrailer->m_pDict )
1289
0
        {
1290
            // search doc id
1291
0
            PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID"_ostr );
1292
0
            if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1293
0
            {
1294
0
                PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1295
0
                if( pArr && !pArr->m_aSubElements.empty() )
1296
0
                {
1297
0
                    PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1298
0
                    if( pStr )
1299
0
                        m_pData->m_aDocID = pStr->getFilteredString();
1300
#if OSL_DEBUG_LEVEL > 0
1301
                    OUStringBuffer aTmp;
1302
                    for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1303
                        aTmp.append(static_cast<sal_Int32>(sal_uInt8(m_pData->m_aDocID[i])), 16);
1304
                    SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1305
#endif
1306
0
                }
1307
0
            }
1308
            // search Encrypt entry
1309
0
            PDFDict::Map::iterator enc =
1310
0
                pTrailer->m_pDict->m_aMap.find( "Encrypt"_ostr );
1311
0
            if( enc != pTrailer->m_pDict->m_aMap.end() )
1312
0
            {
1313
0
                PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1314
0
                if( ! pDict )
1315
0
                {
1316
0
                    PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1317
0
                    if( pRef )
1318
0
                    {
1319
0
                        PDFObject* pObj = findObject( pRef );
1320
0
                        if( pObj && pObj->m_pObject )
1321
0
                            pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1322
0
                    }
1323
0
                }
1324
0
                if( pDict )
1325
0
                {
1326
0
                    PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter"_ostr );
1327
0
                    PDFDict::Map::iterator version = pDict->m_aMap.find( "V"_ostr );
1328
0
                    PDFDict::Map::iterator len = pDict->m_aMap.find( "Length"_ostr );
1329
0
                    PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O"_ostr );
1330
0
                    PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U"_ostr );
1331
0
                    PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R"_ostr );
1332
0
                    PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P"_ostr );
1333
0
                    if( filter != pDict->m_aMap.end() )
1334
0
                    {
1335
0
                        m_pData->m_bIsEncrypted = true;
1336
0
                        m_pData->m_nKeyLength = 5;
1337
0
                        if( version != pDict->m_aMap.end() )
1338
0
                        {
1339
0
                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1340
0
                            if( pNum )
1341
0
                                m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1342
0
                        }
1343
0
                        if( m_pData->m_nAlgoVersion >= 3 )
1344
0
                            m_pData->m_nKeyLength = 16;
1345
0
                        if( len != pDict->m_aMap.end() )
1346
0
                        {
1347
0
                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1348
0
                            if( pNum )
1349
0
                                m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1350
0
                        }
1351
0
                        PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1352
0
                        if( pFilter && pFilter->getFilteredName() == "Standard" )
1353
0
                            m_pData->m_bStandardHandler = true;
1354
0
                        if( o_ent != pDict->m_aMap.end() )
1355
0
                        {
1356
0
                            PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1357
0
                            if( pString )
1358
0
                            {
1359
0
                                OString aEnt = pString->getFilteredString();
1360
0
                                if( aEnt.getLength() == 32 )
1361
0
                                    memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1362
#if OSL_DEBUG_LEVEL > 0
1363
                                else
1364
                                {
1365
                                    OUStringBuffer aTmp;
1366
                                    for( int i = 0; i < aEnt.getLength(); i++ )
1367
                                        aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1368
                                    SAL_WARN("sdext.pdfimport.pdfparse",
1369
                                             "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1370
                                }
1371
#endif
1372
0
                            }
1373
0
                        }
1374
0
                        if( u_ent != pDict->m_aMap.end() )
1375
0
                        {
1376
0
                            PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1377
0
                            if( pString )
1378
0
                            {
1379
0
                                OString aEnt = pString->getFilteredString();
1380
0
                                if( aEnt.getLength() == 32 )
1381
0
                                    memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1382
#if OSL_DEBUG_LEVEL > 0
1383
                                else
1384
                                {
1385
                                    OUStringBuffer aTmp;
1386
                                    for( int i = 0; i < aEnt.getLength(); i++ )
1387
                                        aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1388
                                    SAL_WARN("sdext.pdfimport.pdfparse",
1389
                                             "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1390
                                }
1391
#endif
1392
0
                            }
1393
0
                        }
1394
0
                        if( r_ent != pDict->m_aMap.end() )
1395
0
                        {
1396
0
                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1397
0
                            if( pNum )
1398
0
                                m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1399
0
                        }
1400
0
                        if( p_ent != pDict->m_aMap.end() )
1401
0
                        {
1402
0
                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1403
0
                            if( pNum )
1404
0
                                m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1405
0
                            SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1406
0
                        }
1407
1408
0
                        SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : u"<unknown>"_ustr) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1409
0
                        break;
1410
0
                    }
1411
0
                }
1412
0
            }
1413
0
        }
1414
0
    }
1415
1416
0
    return m_pData.get();
1417
0
}
1418
1419
bool PDFFile::emit( EmitContext& rWriteContext ) const
1420
0
{
1421
0
    setEmitData(  rWriteContext, new EmitImplData( this ) );
1422
1423
0
    OString aBuf =
1424
0
        "%PDF-" +
1425
0
        OString::number( sal_Int32( m_nMajor ) ) +
1426
0
        "." +
1427
0
        OString::number( sal_Int32( m_nMinor ) ) +
1428
0
        "\n";
1429
0
    if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1430
0
        return false;
1431
0
    return emitSubElements( rWriteContext );
1432
0
}
1433
1434
PDFEntry* PDFFile::clone() const
1435
0
{
1436
0
    PDFFile* pNewFl = new PDFFile();
1437
0
    pNewFl->m_nMajor = m_nMajor;
1438
0
    pNewFl->m_nMinor = m_nMinor;
1439
0
    cloneSubElements( pNewFl->m_aSubElements );
1440
0
    return pNewFl;
1441
0
}
1442
1443
PDFPart::~PDFPart()
1444
{
1445
}
1446
1447
bool PDFPart::emit( EmitContext& rWriteContext ) const
1448
0
{
1449
0
    return emitSubElements( rWriteContext );
1450
0
}
1451
1452
PDFEntry* PDFPart::clone() const
1453
0
{
1454
0
    PDFPart* pNewPt = new PDFPart();
1455
0
    cloneSubElements( pNewPt->m_aSubElements );
1456
0
    return pNewPt;
1457
0
}
1458
1459
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */