Coverage Report

Created: 2025-07-18 06:59

/src/qpdf/include/qpdf/QPDFObjectHandle.hh
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License");
7
// you may not use this file except in compliance with the License.
8
// You may obtain a copy of the License at
9
//
10
//   http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
// Versions of qpdf prior to version 7 were released under the terms
19
// of version 2.0 of the Artistic License. At your option, you may
20
// continue to consider qpdf to be licensed under those terms. Please
21
// see the manual for additional information.
22
23
#ifndef QPDFOBJECTHANDLE_HH
24
#define QPDFOBJECTHANDLE_HH
25
26
#include <qpdf/ObjectHandle.hh>
27
28
#include <qpdf/Constants.h>
29
#include <qpdf/DLL.h>
30
#include <qpdf/Types.h>
31
32
#include <functional>
33
#include <map>
34
#include <memory>
35
#include <set>
36
#include <string>
37
#include <vector>
38
39
#include <qpdf/Buffer.hh>
40
#include <qpdf/InputSource.hh>
41
#include <qpdf/JSON.hh>
42
#include <qpdf/QPDFObjGen.hh>
43
#include <qpdf/QPDFTokenizer.hh>
44
45
class Pipeline;
46
class QPDF;
47
class QPDF_Array;
48
class QPDF_Bool;
49
class QPDF_Dictionary;
50
class QPDF_InlineImage;
51
class QPDF_Integer;
52
class QPDF_Name;
53
class QPDF_Null;
54
class QPDF_Operator;
55
class QPDF_Real;
56
class QPDF_Reserved;
57
class QPDF_Stream;
58
class QPDF_String;
59
class QPDFObject;
60
class QPDFObjectHandle;
61
class QPDFTokenizer;
62
class QPDFExc;
63
class Pl_QPDFTokenizer;
64
class QPDFMatrix;
65
class QPDFParser;
66
67
class QPDFObjectHandle: public qpdf::BaseHandle
68
{
69
    friend class QPDFParser;
70
71
  public:
72
    // This class is used by replaceStreamData.  It provides an alternative way of associating
73
    // stream data with a stream.  See comments on replaceStreamData and newStream for additional
74
    // details.
75
    class QPDF_DLL_CLASS StreamDataProvider
76
    {
77
      public:
78
        QPDF_DLL
79
        StreamDataProvider(bool supports_retry = false);
80
81
        QPDF_DLL
82
        virtual ~StreamDataProvider();
83
        // The implementation of this function must write stream data to the given pipeline. The
84
        // stream data must conform to whatever filters are explicitly associated with the stream.
85
        // QPDFWriter may, in some cases, add compression, but if it does, it will update the
86
        // filters as needed. Every call to provideStreamData for a given stream must write the same
87
        // data. Note that, when writing linearized files, qpdf will call your provideStreamData
88
        // twice, and if it generates different output, you risk generating invalid output or having
89
        // qpdf throw an exception. The object ID and generation passed to this method are those
90
        // that belong to the stream on behalf of which the provider is called. They may be ignored
91
        // or used by the implementation for indexing or other purposes. This information is made
92
        // available just to make it more convenient to use a single StreamDataProvider object to
93
        // provide data for multiple streams.
94
95
        // A few things to keep in mind:
96
        //
97
        // * Stream data providers must not modify any objects since   they may be called after some
98
        //   parts of the file have already been written.
99
        //
100
        // * Since qpdf may call provideStreamData multiple times when writing linearized files, if
101
        //   the work done by your stream data provider is slow or computationally intensive, you
102
        //   might want to implement your own cache.
103
        //
104
        // * Once you have called replaceStreamData, the original stream data is no longer directly
105
        //   accessible from the   stream, but this is easy to work around by copying the stream to
106
        //   a separate QPDF object. The qpdf library implements this very efficiently without
107
        //   actually making a copy of the stream data. You can find examples of this pattern in
108
        //   some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc.
109
110
        // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or
111
        // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by
112
        // allowing an alternative provideStreamData to be implemented. You must implement at least
113
        // one of the versions of provideStreamData below. If you implement the version that
114
        // supports retry and returns a value, you should pass true as the value of supports_retry
115
        // in the base class constructor. This will cause the library to call that version of the
116
        // method, which should also return a boolean indicating whether it ran without errors.
117
        QPDF_DLL
118
        virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline);
119
        QPDF_DLL
120
        virtual bool provideStreamData(
121
            QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry);
122
        QPDF_DLL virtual void provideStreamData(int objid, int generation, Pipeline* pipeline);
123
        QPDF_DLL virtual bool provideStreamData(
124
            int objid, int generation, Pipeline* pipeline, bool suppress_warnings, bool will_retry);
125
        QPDF_DLL
126
        bool supportsRetry();
127
128
      private:
129
        bool supports_retry;
130
    };
131
132
    // The TokenFilter class provides a way to filter content streams in a lexically aware fashion.
133
    // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter
134
    // methods or can be applied on the spot by filterPageContents. You may also use
135
    // Pl_QPDFTokenizer directly if you need full control.
136
    //
137
    // The handleToken method is called for each token, including the eof token, and then handleEOF
138
    // is called at the very end. Handlers may call write (or writeToken) to pass data downstream.
139
    // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of
140
    // using TokenFilters.
141
    //
142
    // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you
143
    // get the canonical, "parsed" representation of the token. For a string, this means that there
144
    // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits)
145
    // has been resolved. qpdf's internal representation of a name includes the leading slash. As
146
    // such, you can't write the value of token.getValue() directly to output that is supposed to be
147
    // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can
148
    // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new
149
    // string or name token from a canonical representation, use
150
    // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or
151
    // QPDFTokenizer::Token(QPDFTokenizer::tt_name,
152
    // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can
153
    // still write them with writeToken(). Example:
154
    // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain"))
155
    // would write `/text#2fplain`, and
156
    // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`.
157
    class QPDF_DLL_CLASS TokenFilter
158
    {
159
      public:
160
0
        TokenFilter() = default;
161
0
        virtual ~TokenFilter() = default;
162
        virtual void handleToken(QPDFTokenizer::Token const&) = 0;
163
        QPDF_DLL
164
        virtual void handleEOF();
165
166
        class PipelineAccessor
167
        {
168
            friend class Pl_QPDFTokenizer;
169
170
          private:
171
            static void
172
            setPipeline(TokenFilter* f, Pipeline* p)
173
0
            {
174
0
                f->setPipeline(p);
175
0
            }
176
        };
177
178
      protected:
179
        QPDF_DLL
180
        void write(char const* data, size_t len);
181
        QPDF_DLL
182
        void write(std::string const& str);
183
        QPDF_DLL
184
        void writeToken(QPDFTokenizer::Token const&);
185
186
      private:
187
        QPDF_DLL_PRIVATE
188
        void setPipeline(Pipeline*);
189
190
        Pipeline* pipeline;
191
    };
192
193
    // This class is used by parse to decrypt strings when reading an object that contains encrypted
194
    // strings.
195
    class StringDecrypter
196
    {
197
      public:
198
0
        virtual ~StringDecrypter() = default;
199
        virtual void decryptString(std::string& val) = 0;
200
    };
201
202
    // This class is used by parsePageContents. Callers must instantiate a subclass of this with
203
    // handlers defined to accept QPDFObjectHandles that are parsed from the stream.
204
    class QPDF_DLL_CLASS ParserCallbacks
205
    {
206
      public:
207
0
        virtual ~ParserCallbacks() = default;
208
        // One of the handleObject methods must be overridden.
209
        QPDF_DLL
210
        virtual void handleObject(QPDFObjectHandle);
211
        QPDF_DLL
212
        virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length);
213
214
        virtual void handleEOF() = 0;
215
216
        // Override this if you want to know the full size of the contents, possibly after
217
        // concatenation of multiple streams. This is called before the first call to handleObject.
218
        QPDF_DLL
219
        virtual void contentSize(size_t);
220
221
      protected:
222
        // Implementors may call this method during parsing to terminate parsing early. This method
223
        // throws an exception that is caught by parsePageContents, so its effect is immediate.
224
        QPDF_DLL
225
        void terminateParsing();
226
    };
227
228
    // Convenience object for rectangles
229
    class Rectangle
230
    {
231
      public:
232
        Rectangle() :
233
0
            llx(0.0),
234
0
            lly(0.0),
235
0
            urx(0.0),
236
0
            ury(0.0)
237
0
        {
238
0
        }
239
        Rectangle(double llx, double lly, double urx, double ury) :
240
0
            llx(llx),
241
0
            lly(lly),
242
0
            urx(urx),
243
0
            ury(ury)
244
0
        {
245
0
        }
246
247
        double llx;
248
        double lly;
249
        double urx;
250
        double ury;
251
    };
252
253
    // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't
254
    // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity
255
    // transform matrix and this one is all zeroes.
256
    class Matrix
257
    {
258
      public:
259
        Matrix() :
260
0
            a(0.0),
261
0
            b(0.0),
262
0
            c(0.0),
263
0
            d(0.0),
264
0
            e(0.0),
265
0
            f(0.0)
266
0
        {
267
0
        }
268
        Matrix(double a, double b, double c, double d, double e, double f) :
269
0
            a(a),
270
0
            b(b),
271
0
            c(c),
272
0
            d(d),
273
0
            e(e),
274
0
            f(f)
275
0
        {
276
0
        }
277
278
        double a;
279
        double b;
280
        double c;
281
        double d;
282
        double e;
283
        double f;
284
    };
285
286
2.33M
    QPDFObjectHandle() = default;
287
2.33M
    QPDFObjectHandle(QPDFObjectHandle const&) = default;
288
151k
    QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default;
289
2.91M
    QPDFObjectHandle(QPDFObjectHandle&&) = default;
290
2.21M
    QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default;
291
292
    // This method is provided for backward compatibility only. New code should convert to bool
293
    // instead.
294
    inline bool isInitialized() const;
295
296
    // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying
297
    // object, meaning that changes to one are reflected in the other, or "if you paint one, the
298
    // other one changes color." This does not perform a structural comparison of the contents of
299
    // the objects.
300
    QPDF_DLL
301
    bool isSameObjectAs(QPDFObjectHandle const&) const;
302
303
    // Return type code and type name of underlying object.  These are useful for doing rapid type
304
    // tests (like switch statements) or for testing and debugging.
305
    QPDF_DLL
306
    qpdf_object_type_e getTypeCode() const;
307
    QPDF_DLL
308
    char const* getTypeName() const;
309
310
    // Exactly one of these will return true for any initialized object. Operator and InlineImage
311
    // are only allowed in content streams.
312
    QPDF_DLL
313
    bool isBool() const;
314
    QPDF_DLL
315
    bool isNull() const;
316
    QPDF_DLL
317
    bool isInteger() const;
318
    QPDF_DLL
319
    bool isReal() const;
320
    QPDF_DLL
321
    bool isName() const;
322
    QPDF_DLL
323
    bool isString() const;
324
    QPDF_DLL
325
    bool isOperator() const;
326
    QPDF_DLL
327
    bool isInlineImage() const;
328
    QPDF_DLL
329
    bool isArray() const;
330
    QPDF_DLL
331
    bool isDictionary() const;
332
    QPDF_DLL
333
    bool isStream() const;
334
    QPDF_DLL
335
    bool isReserved() const;
336
337
    // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended
338
    // for internal use, but it can be used as an efficient way to check for nulls that are not
339
    // indirect objects.
340
    QPDF_DLL
341
    bool isDirectNull() const;
342
343
    // This returns true in addition to the query for the specific type for indirect objects.
344
    QPDF_DLL
345
    bool isIndirect() const;
346
347
    // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse
348
    // such an object will throw a logic_error.
349
    QPDF_DLL
350
    bool isDestroyed() const;
351
352
    // True for everything except array, dictionary, stream, word, and inline image.
353
    QPDF_DLL
354
    bool isScalar() const;
355
356
    // True if the object is a name object representing the provided name.
357
    QPDF_DLL
358
    bool isNameAndEquals(std::string const& name) const;
359
360
    // True if the object is a dictionary of the specified type and subtype, if any.
361
    QPDF_DLL
362
    bool isDictionaryOfType(std::string const& type, std::string const& subtype = "") const;
363
364
    // True if the object is a stream of the specified type and subtype, if any.
365
    QPDF_DLL
366
    bool isStreamOfType(std::string const& type, std::string const& subtype = "") const;
367
368
    // Public factory methods
369
370
    // Wrap an object in an array if it is not already an array. This is a helper for cases in which
371
    // something in a PDF may either be a single item or an array of items, which is a common idiom.
372
    QPDF_DLL
373
    QPDFObjectHandle wrapInArray();
374
375
    // Construct an object of any type from a string representation of the object.  Throws QPDFExc
376
    // with an empty filename and an offset into the string if there is an error.  Any indirect
377
    // object syntax (obj gen R) will cause a logic_error exception to be thrown.  If
378
    // object_description is provided, it will appear in the message of any QPDFExc exception thrown
379
    // for invalid syntax. See also the global `operator ""_qpdf` defined below.
380
    QPDF_DLL
381
    static QPDFObjectHandle
382
    parse(std::string const& object_str, std::string const& object_description = "");
383
384
    // Construct an object of any type from a string representation of the object. Indirect object
385
    // syntax (obj gen R) is allowed and will create indirect references within the passed-in
386
    // context. If object_description is provided, it will appear in the message of any QPDFExc
387
    // exception thrown for invalid syntax. Note that you can't parse an indirect object reference
388
    // all by itself as parse will stop at the end of the first complete object, which will just be
389
    // the first number and will report that there is trailing data at the end of the string.
390
    QPDF_DLL
391
    static QPDFObjectHandle
392
    parse(QPDF* context, std::string const& object_str, std::string const& object_description = "");
393
394
    // Construct an object as above by reading from the given InputSource at its current position
395
    // and using the tokenizer you supply.  Indirect objects and encrypted strings are permitted.
396
    // This method was intended to be called by QPDF for parsing objects that are read from the
397
    // object's input stream. To be removed in qpdf 13. See
398
    // <https:manual.qpdf.org/release-notes.html#r12-0-0-deprecate>.
399
    [[deprecated("to be removed in qpdf 13")]] QPDF_DLL static QPDFObjectHandle parse(
400
        std::shared_ptr<InputSource> input,
401
        std::string const& object_description,
402
        QPDFTokenizer&,
403
        bool& empty,
404
        StringDecrypter* decrypter,
405
        QPDF* context);
406
407
    // Return the offset where the object was found when parsed. A negative value means that the
408
    // object was created without parsing. If the object is in a stream, the offset is from the
409
    // beginning of the stream. Otherwise, the offset is from the beginning of the file.
410
    QPDF_DLL
411
    qpdf_offset_t getParsedOffset() const;
412
413
    // Older method: stream_or_array should be the value of /Contents from a page object. It's more
414
    // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error
415
    // messages will also be more useful because the page object information will be known.
416
    QPDF_DLL
417
    static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks);
418
419
    // When called on a stream or stream array that is some page's content streams, do the same as
420
    // pipePageContents. This method is a lower level way to do what
421
    // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a
422
    // contents object that is disconnected from a page object. The description argument should
423
    // describe the containing page and is used in error messages. The all_description argument is
424
    // initialized to something that could be used to describe the result of the pipeline. It is the
425
    // description amended with the identifiers of the underlying objects. Please note that if there
426
    // is an array of content streams, p->finish() is called after each stream. If you pass a
427
    // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an
428
    // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the
429
    // end.
430
    QPDF_DLL
431
    void
432
    pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description);
433
434
    // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream
435
    // data is passed through the token filter after all original filters but before content stream
436
    // normalization if requested. This is a low-level interface to add it to a stream. You will
437
    // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be
438
    // applied to a page object, and which will automatically handle the case of pages whose
439
    // contents are split across multiple streams.
440
    QPDF_DLL
441
    void addTokenFilter(std::shared_ptr<TokenFilter> token_filter);
442
443
    // Legacy helpers for parsing content streams. These methods are not going away, but newer code
444
    // should call the correspond methods in QPDFPageObjectHelper instead. The specification and
445
    // behavior of these methods are the same as the identically named methods in that class, but
446
    // newer functionality will be added there.
447
    QPDF_DLL
448
    void parsePageContents(ParserCallbacks* callbacks);
449
    QPDF_DLL
450
    void filterPageContents(TokenFilter* filter, Pipeline* next = nullptr);
451
    // See comments for QPDFPageObjectHelper::pipeContents.
452
    QPDF_DLL
453
    void pipePageContents(Pipeline* p);
454
    QPDF_DLL
455
    void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter);
456
    // End legacy content stream helpers
457
458
    // Called on a stream to filter the stream as if it were page contents. This can be used to
459
    // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream.
460
    QPDF_DLL
461
    void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr);
462
    // Called on a stream to parse the stream as page contents. This can be used to parse a form
463
    // XObject.
464
    QPDF_DLL
465
    void parseAsContents(ParserCallbacks* callbacks);
466
467
    // Type-specific factories
468
    QPDF_DLL
469
    static QPDFObjectHandle newNull();
470
    QPDF_DLL
471
    static QPDFObjectHandle newBool(bool value);
472
    QPDF_DLL
473
    static QPDFObjectHandle newInteger(long long value);
474
    QPDF_DLL
475
    static QPDFObjectHandle newReal(std::string const& value);
476
    QPDF_DLL
477
    static QPDFObjectHandle
478
    newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true);
479
    // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes,
480
    // excluding the NUL character, and starting with a slash. Name objects as represented in the
481
    // PDF specification can contain characters escaped with #, but such escaping is not of concern
482
    // when calling QPDFObjectHandle methods not directly relating to parsing. For example,
483
    // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return
484
    // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse()
485
    // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and
486
    // modifying objects, you want to work with the internal, canonical representation. For names
487
    // containing alphanumeric characters, dashes, and underscores, there is no difference between
488
    // the two representations. For a lengthy discussion, see
489
    // https://github.com/qpdf/qpdf/discussions/625.
490
    QPDF_DLL
491
    static QPDFObjectHandle newName(std::string const& name);
492
    QPDF_DLL
493
    static QPDFObjectHandle newString(std::string const& str);
494
    // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in
495
    // PDF files outside of content streams, such as in document metadata form field values, page
496
    // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16
497
    // as needed to successfully encode all the characters.
498
    QPDF_DLL
499
    static QPDFObjectHandle newUnicodeString(std::string const& utf8_str);
500
    QPDF_DLL
501
    static QPDFObjectHandle newOperator(std::string const&);
502
    QPDF_DLL
503
    static QPDFObjectHandle newInlineImage(std::string const&);
504
    QPDF_DLL
505
    static QPDFObjectHandle newArray();
506
    QPDF_DLL
507
    static QPDFObjectHandle newArray(std::vector<QPDFObjectHandle> const& items);
508
    QPDF_DLL
509
    static QPDFObjectHandle newArray(Rectangle const&);
510
    QPDF_DLL
511
    static QPDFObjectHandle newArray(Matrix const&);
512
    QPDF_DLL
513
    static QPDFObjectHandle newArray(QPDFMatrix const&);
514
    QPDF_DLL
515
    static QPDFObjectHandle newDictionary();
516
    QPDF_DLL
517
    static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items);
518
519
    // Create an array from a rectangle. Equivalent to the rectangle form of newArray.
520
    QPDF_DLL
521
    static QPDFObjectHandle newFromRectangle(Rectangle const&);
522
    // Create an array from a matrix. Equivalent to the matrix form of newArray.
523
    QPDF_DLL
524
    static QPDFObjectHandle newFromMatrix(Matrix const&);
525
    QPDF_DLL
526
    static QPDFObjectHandle newFromMatrix(QPDFMatrix const&);
527
528
    // Note: new stream creation methods have were added to the QPDF class starting with
529
    // version 11.2.0. The ones in this class are here for backward compatibility.
530
531
    // Create a new stream and associate it with the given qpdf object. A subsequent call must be
532
    // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be
533
    // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively,
534
    // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can
535
    // call QPDF::newStream() instead.
536
    QPDF_DLL
537
    static QPDFObjectHandle newStream(QPDF* qpdf);
538
539
    // Create a new stream and associate it with the given qpdf object. Use the given buffer as the
540
    // stream data. The stream dictionary's /Length key will automatically be set to the size of the
541
    // data buffer. If additional keys are required, the stream's dictionary may be retrieved by
542
    // calling getDict(), and the resulting dictionary may be modified. This method is just a
543
    // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience
544
    // methods for streams that require no parameters beyond the stream length. Note that you don't
545
    // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will
546
    // automatically compress uncompressed stream data. Example programs are provided that
547
    // illustrate this. From QPDF 11.2, you can call QPDF::newStream()
548
    // instead.
549
    QPDF_DLL
550
    static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data);
551
552
    // Create new stream with data from string. This method will create a copy of the data rather
553
    // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
554
    // From QPDF 11.2, you can call QPDF::newStream() instead.
555
    QPDF_DLL
556
    static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data);
557
558
    // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
559
    // going to be added to the QPDF object.  Normally you don't have to use this type since you can
560
    // just call QPDF::makeIndirectObject.  However, in some cases, if you have to create objects
561
    // with circular references, you may need to create a reserved object so that you can have a
562
    // reference to it and then replace the object later.  Reserved objects have the special
563
    // property that they can't be resolved to direct objects.  This makes it possible to replace a
564
    // reserved object with a new object while preserving existing references to them.  When you are
565
    // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
566
    // purpose rather than the more general QPDF::replaceObject.  It is an error to try to write a
567
    // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call
568
    // QPDF::newReserved() instead.
569
    QPDF_DLL
570
    static QPDFObjectHandle newReserved(QPDF* qpdf);
571
572
    // Provide an owning qpdf and object description. The library does this automatically with
573
    // objects that are read from the input PDF and with objects that are created programmatically
574
    // and inserted into the QPDF as a new indirect object. Most end user code will not need to call
575
    // this. If an object has an owning qpdf and object description, it enables qpdf to give
576
    // warnings with proper context in some cases where it would otherwise raise exceptions. It is
577
    // okay to add objects without an owning_qpdf to objects that have one, but it is an error to
578
    // have a QPDF contain objects with owning_qpdf set to something else. To add objects from
579
    // another qpdf, use copyForeignObject instead.
580
    QPDF_DLL
581
    void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description);
582
    QPDF_DLL
583
    bool hasObjectDescription() const;
584
585
    // Accessor methods
586
    //
587
    // (Note: this comment is referenced in qpdf-c.h and the manual.)
588
    //
589
    // In PDF files, objects have specific types, but there is nothing that prevents PDF files from
590
    // containing objects of types that aren't expected by the specification.
591
    //
592
    // There are two flavors of accessor methods:
593
    //
594
    // * getSomethingValue() returns the value and issues a type   warning if the type is incorrect.
595
    //
596
    // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns
597
    //   true and initializes a reference of the appropriate type. These methods never issue type
598
    //   warnings.
599
    //
600
    // The getSomethingValue() accessors and some of the other methods expect objects of a
601
    // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as
602
    // trying to get a dictionary key from an array, trying to get the string value of a number,
603
    // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using
604
    // the following behavior:
605
    //
606
    // * Requesting a value of the wrong type (int value from string,   array item from a scalar or
607
    //   dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for
608
    //   number, the empty string for string, or the null object for an object handle.
609
    //
610
    // * Accessing an array item that is out of bounds will return a null object.
611
    //
612
    // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to
613
    //   a scalar or array) will be ignored.
614
    //
615
    // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5,
616
    // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code
617
    // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case
618
    // for all objects whose origin was a PDF file), the warning is issued using the normal warning
619
    // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them.
620
    // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created
621
    // programmatically), an exception will be thrown.
622
    //
623
    // The way to avoid getting any type warnings or exceptions, even when working with malformed
624
    // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for
625
    // example, make sure that isString() returns true before calling getStringValue()) and to
626
    // always be sure that any array indices are in bounds.
627
    //
628
    // For additional discussion and rationale for this behavior, see the section in the QPDF manual
629
    // entitled "Object Accessor Methods".
630
631
    // Methods for bool objects
632
    QPDF_DLL
633
    bool getBoolValue() const;
634
    QPDF_DLL
635
    bool getValueAsBool(bool&) const;
636
637
    // Methods for integer objects. Note: if an integer value is too big (too far away from zero in
638
    // either direction) to fit in the requested return type, the maximum or minimum value for that
639
    // return type may be returned. For example, on a system with 32-bit int, a numeric object with
640
    // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX.
641
    QPDF_DLL
642
    long long getIntValue() const;
643
    QPDF_DLL
644
    bool getValueAsInt(long long&) const;
645
    QPDF_DLL
646
    int getIntValueAsInt() const;
647
    QPDF_DLL
648
    bool getValueAsInt(int&) const;
649
    QPDF_DLL
650
    unsigned long long getUIntValue() const;
651
    QPDF_DLL
652
    bool getValueAsUInt(unsigned long long&) const;
653
    QPDF_DLL
654
    unsigned int getUIntValueAsUInt() const;
655
    QPDF_DLL
656
    bool getValueAsUInt(unsigned int&) const;
657
658
    // Methods for real objects
659
    QPDF_DLL
660
    std::string getRealValue() const;
661
    QPDF_DLL
662
    bool getValueAsReal(std::string&) const;
663
664
    // Methods that work for both integer and real objects
665
    QPDF_DLL
666
    bool isNumber() const;
667
    QPDF_DLL
668
    double getNumericValue() const;
669
    QPDF_DLL
670
    bool getValueAsNumber(double&) const;
671
672
    // Methods for name objects. The returned name value is in qpdf's canonical form with all
673
    // escaping resolved. See comments for newName() for details.
674
    QPDF_DLL
675
    std::string getName() const;
676
    QPDF_DLL
677
    bool getValueAsName(std::string&) const;
678
679
    // Methods for string objects
680
    QPDF_DLL
681
    std::string getStringValue() const;
682
    QPDF_DLL
683
    bool getValueAsString(std::string&) const;
684
685
    // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise,
686
    // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to
687
    // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters
688
    // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range
689
    // unmapped.
690
    QPDF_DLL
691
    std::string getUTF8Value() const;
692
    QPDF_DLL
693
    bool getValueAsUTF8(std::string&) const;
694
695
    // Methods for content stream objects
696
    QPDF_DLL
697
    std::string getOperatorValue() const;
698
    QPDF_DLL
699
    bool getValueAsOperator(std::string&) const;
700
    QPDF_DLL
701
    std::string getInlineImageValue() const;
702
    QPDF_DLL
703
    bool getValueAsInlineImage(std::string&) const;
704
705
    // Methods for array objects; see also name and array objects.
706
707
    // Return an object that enables iteration over members. You can do
708
    //
709
    // for (auto iter: obj.aitems())
710
    // {
711
    //     // iter is an array element
712
    // }
713
    class QPDFArrayItems;
714
    QPDF_DLL
715
    QPDFArrayItems aitems();
716
717
    QPDF_DLL
718
    int getArrayNItems() const;
719
    QPDF_DLL
720
    QPDFObjectHandle getArrayItem(int n) const;
721
    // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling
722
    // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of
723
    // nulls.
724
    QPDF_DLL
725
    std::vector<QPDFObjectHandle> getArrayAsVector() const;
726
    QPDF_DLL
727
    bool isRectangle() const;
728
    // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the
729
    // rectangle [0, 0, 0, 0]
730
    QPDF_DLL
731
    Rectangle getArrayAsRectangle() const;
732
    QPDF_DLL
733
    bool isMatrix() const;
734
    // If the array is an array of six numeric values, return as a matrix. Otherwise, return the
735
    // matrix [1, 0, 0, 1, 0, 0]
736
    QPDF_DLL
737
    Matrix getArrayAsMatrix() const;
738
739
    // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as
740
    // canonical name strings starting with a leading slash and not containing any PDF syntax
741
    // escaping. See comments for getName() for details.
742
743
    // Return an object that enables iteration over members. You can do
744
    //
745
    // for (auto iter: obj.ditems())
746
    // {
747
    //     // iter.first is the key
748
    //     // iter.second is the value
749
    // }
750
    class QPDFDictItems;
751
    QPDF_DLL
752
    QPDFDictItems ditems();
753
754
    // Return true if key is present.  Keys with null values are treated as if they are not present.
755
    // This is as per the PDF spec.
756
    QPDF_DLL
757
    bool hasKey(std::string const&) const;
758
    // Return the value for the key.  If the key is not present, null is returned.
759
    QPDF_DLL
760
    QPDFObjectHandle getKey(std::string const&) const;
761
    // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access
762
    // lower-level dictionaries, as in
763
    // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font");
764
    QPDF_DLL
765
    QPDFObjectHandle getKeyIfDict(std::string const&) const;
766
    // Return all keys.  Keys with null values are treated as if they are not present.  This is as
767
    // per the PDF spec.
768
    QPDF_DLL
769
    std::set<std::string> getKeys() const;
770
    // Return dictionary as a map.  Entries with null values are included.
771
    QPDF_DLL
772
    std::map<std::string, QPDFObjectHandle> getDictAsMap() const;
773
774
    // Methods for name and array objects. The name value is in qpdf's canonical form with all
775
    // escaping resolved. See comments for newName() for details.
776
    QPDF_DLL
777
    bool isOrHasName(std::string const&) const;
778
779
    // Make all resources in a resource dictionary indirect. This just goes through all entries of
780
    // top-level subdictionaries and converts any direct objects to indirect objects. This can be
781
    // useful to call before mergeResources if it is going to be called multiple times to prevent
782
    // resources from being copied multiple times.
783
    QPDF_DLL
784
    void makeResourcesIndirect(QPDF& owning_qpdf);
785
786
    // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in
787
    // dictionary subitems are resolved, and "conflicts" is initialized to a map such that
788
    // conflicts[resource_type][old_key] == [new_key]
789
    //
790
    // See also makeResourcesIndirect, which can be useful to call before calling this.
791
    //
792
    // This method does nothing if both this object and the other object are not dictionaries.
793
    // Otherwise, it has following behavior, where "object" refers to the object whose method is
794
    // invoked, and "other" refers to the argument:
795
    //
796
    // * For each key in "other" whose value is an array:
797
    //   * If "object" does not have that entry, shallow copy it.
798
    //   * Otherwise, if "object" has an array in the same place, append to that array any objects
799
    //     in "other"'s array that are not already present.
800
    // * For each key in "other" whose value is a dictionary:
801
    //   * If "object" does not have that entry, shallow copy it.
802
    //   * Otherwise, for each key in the subdictionary:
803
    //     * If key is not present in "object"'s entry, shallow copy it if direct or just add it if
804
    //       indirect.
805
    //     * Otherwise, if conflicts are being detected:
806
    //       * If there is a key (oldkey) already in the dictionary that points to the same indirect
807
    //         destination as key, indicate that key was replaced by oldkey. This would happen if
808
    //         these two resource dictionaries have previously been merged.
809
    //       * Otherwise pick a new key (newkey) that is unique within the resource dictionary,
810
    //         store that in the resource dictionary with key's destination as its destination, and
811
    //         indicate that key was replaced by newkey.
812
    //
813
    // The primary purpose of this method is to facilitate merging of resource dictionaries that are
814
    // supposed to have the same scope as each other. For example, this can be used to merge a form
815
    // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The
816
    // "conflicts" parameter may be previously initialized. This method adds to whatever is already
817
    // there, which can be useful when merging with multiple things.
818
    QPDF_DLL
819
    void mergeResources(
820
        QPDFObjectHandle other,
821
        std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr);
822
823
    // Get all resource names from a resource dictionary. If this object is a dictionary, this
824
    // method returns a set of all the keys in all top-level subdictionaries. For resources
825
    // dictionaries, this is the collection of names that may be referenced in the content stream.
826
    QPDF_DLL
827
    std::set<std::string> getResourceNames() const;
828
829
    // Find a unique name within a resource dictionary starting with a given prefix. This method
830
    // works by appending a number to the given prefix. It searches starting with min_suffix and
831
    // sets min_suffix to selected value upon return. This can be used to increase efficiency if
832
    // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next
833
    // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling
834
    // this multiple times on the same resource dictionary, you can initialize resource_names by
835
    // calling getResourceNames(), incrementally update it as you add resources, and keep passing it
836
    // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time
837
    // it's called.
838
    QPDF_DLL
839
    std::string getUniqueResourceName(
840
        std::string const& prefix,
841
        int& min_suffix,
842
        std::set<std::string>* resource_names = nullptr) const;
843
844
    // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF
845
    // object. Indirect objects always have an owning QPDF. Direct objects that are read from the
846
    // input source will also have an owning QPDF. Programmatically created objects will only have
847
    // one if setObjectDescription was called.
848
    //
849
    // When the QPDF object that owns an object is destroyed, the object is changed into a null, and
850
    // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the
851
    // life of the QPDF. If in doubt, ask for it each time you need it.
852
853
    // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it
854
    // returns a null pointer. Use this when you are able to handle the case of an object that
855
    // doesn't have an owning QPDF.
856
    QPDF_DLL
857
    QPDF* getOwningQPDF() const;
858
    // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a
859
    // runtime_error. Use this when you know the object has to have an owning QPDF, such as when
860
    // it's a known indirect object. Since streams are always indirect objects, this method can be
861
    // used safely for streams. If error_msg is specified, it will be used at the contents of the
862
    // runtime_error if there is now owner.
863
    QPDF_DLL
864
    QPDF& getQPDF(std::string const& error_msg = "") const;
865
866
    // Create a shallow copy of an object as a direct object, but do not traverse across indirect
867
    // object boundaries. That means that, for dictionaries and arrays, any keys or items that were
868
    // indirect objects will still be indirect objects that point to the same place. In the
869
    // strictest sense, this is not a shallow copy because it recursively descends arrays and
870
    // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You
871
    // can't copy a stream this way. See copyStream() instead.
872
    QPDF_DLL
873
    QPDFObjectHandle shallowCopy();
874
875
    // Create a true shallow copy of an array or dictionary, just copying the immediate items
876
    // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in
877
    // the copy, you are modifying the original, which is almost never what you want. However, if
878
    // your intention is merely to *replace* top-level items or keys and not to modify lower-level
879
    // items in the copy, this method is much faster than shallowCopy().
880
    QPDF_DLL
881
    QPDFObjectHandle unsafeShallowCopy();
882
883
    // Create a copy of this stream. The new stream and the old stream are independent: after the
884
    // copy, either the original or the copy's dictionary or data can be modified without affecting
885
    // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's
886
    // data are made. If the source stream's data is already being provided by a StreamDataProvider,
887
    // the new stream will use the same one, so you have to make sure your StreamDataProvider can
888
    // handle that case. But if you're already using a StreamDataProvider, you probably don't need
889
    // to call this method.
890
    QPDF_DLL
891
    QPDFObjectHandle copyStream();
892
893
    // Mutator methods.
894
895
    // Since qpdf 11: for mutators that may add or remove an item, there are additional versions
896
    // whose names contain "AndGet" that return the added or removed item. For example:
897
    //
898
    //   auto new_dict = dict.replaceKeyAndGetNew(
899
    //       "/New", QPDFObjectHandle::newDictionary());
900
    //
901
    //   auto old_value = dict.replaceKeyAndGetOld(
902
    //       "/New", "(something)"_qpdf);
903
904
    // Recursively copy this object, making it direct. An exception is thrown if a loop is detected.
905
    // With allow_streams true, keep indirect object references to streams. Otherwise, throw an
906
    // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream
907
    // is found, the resulting object is still associated with the containing qpdf. When
908
    // allow_streams is false, the object will no longer be connected to the original QPDF object
909
    // after this call completes successfully.
910
    QPDF_DLL
911
    void makeDirect(bool allow_streams = false);
912
913
    // Mutator methods for array objects
914
    QPDF_DLL
915
    void setArrayItem(int, QPDFObjectHandle const&);
916
    QPDF_DLL
917
    void setArrayFromVector(std::vector<QPDFObjectHandle> const& items);
918
    // Insert an item before the item at the given position ("at") so that it has that position
919
    // after insertion. If "at" is equal to the size of the array, insert the item at the end.
920
    QPDF_DLL
921
    void insertItem(int at, QPDFObjectHandle const& item);
922
    // Like insertItem but return the item that was inserted.
923
    QPDF_DLL
924
    QPDFObjectHandle insertItemAndGetNew(int at, QPDFObjectHandle const& item);
925
    // Append an item to an array.
926
    QPDF_DLL
927
    void appendItem(QPDFObjectHandle const& item);
928
    // Append an item, and return the newly added item.
929
    QPDF_DLL
930
    QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item);
931
    // Remove the item at that position, reducing the size of the array by one.
932
    QPDF_DLL
933
    void eraseItem(int at);
934
    // Erase and item and return the item that was removed.
935
    QPDF_DLL
936
    QPDFObjectHandle eraseItemAndGetOld(int at);
937
938
    // Mutator methods for dictionary objects
939
940
    // Replace value of key, adding it if it does not exist. If value is null, remove the key.
941
    QPDF_DLL
942
    void replaceKey(std::string const& key, QPDFObjectHandle const& value);
943
    // Replace value of key and return the value.
944
    QPDF_DLL
945
    QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value);
946
    // Replace value of key and return the old value, or null if the key was previously not present.
947
    QPDF_DLL
948
    QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value);
949
    // Remove key, doing nothing if key does not exist.
950
    QPDF_DLL
951
    void removeKey(std::string const& key);
952
    // Remove key and return the old value. If the old value didn't exist, return a null object.
953
    QPDF_DLL
954
    QPDFObjectHandle removeKeyAndGetOld(std::string const& key);
955
956
    // Methods for stream objects
957
    QPDF_DLL
958
    QPDFObjectHandle getDict() const;
959
960
    // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode
961
    // level, whether compression is enabled, and its ability to filter. Passing false will prevent
962
    // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding
963
    // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and
964
    // recompressing a stream that it knows how to operate on for any application-specific reason,
965
    // such as that you have already optimized its filtering. Note that this doesn't affect any
966
    // other ways to get the stream's data, such as pipeStreamData or getStreamData.
967
    QPDF_DLL
968
    void setFilterOnWrite(bool);
969
    QPDF_DLL
970
    bool getFilterOnWrite();
971
972
    // If addTokenFilter has been called for this stream, then the original data should be
973
    // considered to be modified. This means we should avoid optimizations such as not filtering a
974
    // stream that is already compressed.
975
    QPDF_DLL
976
    bool isDataModified();
977
978
    // Returns filtered (uncompressed) stream data.  Throws an exception if the stream is filtered
979
    // and we can't decode it.
980
    QPDF_DLL
981
    std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized);
982
983
    // Returns unfiltered (raw) stream data.
984
    QPDF_DLL
985
    std::shared_ptr<Buffer> getRawStreamData();
986
987
    // Write stream data through the given pipeline. A null pipeline value may be used if all you
988
    // want to do is determine whether a stream is filterable and would be filtered based on the
989
    // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags
990
    // alter the behavior in the following way:
991
    //
992
    // encode_flags:
993
    //
994
    // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are
995
    // applied.
996
    //
997
    // qpdf_sf_normalize -- tokenize as content stream and normalize tokens
998
    //
999
    // decode_level:
1000
    //
1001
    // qpdf_dl_none -- do not decode any streams.
1002
    //
1003
    // qpdf_dl_generalized -- decode supported general-purpose filters. This includes
1004
    // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode.
1005
    //
1006
    // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy
1007
    // specialized filters. This includes /RunLengthDecode.
1008
    //
1009
    // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported
1010
    // lossy filters. This includes /DCTDecode.
1011
    //
1012
    // If, based on the flags and the filters and decode parameters, we determine that we know how
1013
    // to apply all requested filters, do so and return true if we are successful.
1014
    //
1015
    // The exact meaning of the return value differs the different versions of this function, but
1016
    // for any version, the meaning has been the same. For the main version, added in qpdf 10, the
1017
    // return value indicates whether the overall operation succeeded. The filter parameter, if
1018
    // specified, will be set to whether or not filtering was attempted. If filtering was not
1019
    // requested, this value will be false even if the overall operation succeeded.
1020
    //
1021
    // If filtering is requested but this method returns false, it means there was some error in the
1022
    // filtering, in which case the resulting data is likely partially filtered and/or incomplete
1023
    // and may not be consistent with the configured filters. QPDFWriter handles this by attempting
1024
    // to get the stream data without filtering, but callers should consider a false return value
1025
    // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry
1026
    // in that case, pass true as the value of will_retry. This changes the warning issued by the
1027
    // library to indicate that the operation will be retried without filtering to avoid data loss.
1028
1029
    // Return value is overall success, even if filtering is not requested.
1030
    QPDF_DLL
1031
    bool pipeStreamData(
1032
        Pipeline*,
1033
        bool* filtering_attempted,
1034
        int encode_flags,
1035
        qpdf_stream_decode_level_e decode_level,
1036
        bool suppress_warnings = false,
1037
        bool will_retry = false);
1038
1039
    // Legacy version. Return value is whether filtering was attempted. There is no way to determine
1040
    // success if filtering was not attempted.
1041
    QPDF_DLL
1042
    bool pipeStreamData(
1043
        Pipeline*,
1044
        int encode_flags,
1045
        qpdf_stream_decode_level_e decode_level,
1046
        bool suppress_warnings = false,
1047
        bool will_retry = false);
1048
1049
    // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows:
1050
    //  filter = false                  -> encode_flags = 0
1051
    //  filter = true                   -> decode_level = qpdf_dl_generalized
1052
    //    normalize = true -> encode_flags |= qpdf_sf_normalize
1053
    //    compress = true  -> encode_flags |= qpdf_sf_compress
1054
    // Return value is whether filtering was attempted.
1055
    QPDF_DLL
1056
    bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress);
1057
1058
    // Replace a stream's dictionary.  The new dictionary must be consistent with the stream's data.
1059
    // This is most appropriately used when creating streams from scratch that will use a stream
1060
    // data provider and therefore start with an empty dictionary.  It may be more convenient in
1061
    // this case than calling getDict and modifying it for each key.  The pdf-create example does
1062
    // this.
1063
    QPDF_DLL
1064
    void replaceDict(QPDFObjectHandle const&);
1065
1066
    // Test whether a stream is the root XMP /Metadata object of its owning QPDF.
1067
    QPDF_DLL
1068
    bool isRootMetadata() const;
1069
1070
    // REPLACING STREAM DATA
1071
1072
    // Note about all replaceStreamData methods: whatever values are passed as filter and
1073
    // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object
1074
    // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf
1075
    // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing
1076
    // values untouched.
1077
1078
    // Replace this stream's stream data with the given data buffer. The stream's /Length key is
1079
    // replaced with the length of the data buffer. The stream is interpreted as if the data read
1080
    // from the file, after any decryption filters have been applied, is as presented.
1081
    QPDF_DLL
1082
    void replaceStreamData(
1083
        std::shared_ptr<Buffer> data,
1084
        QPDFObjectHandle const& filter,
1085
        QPDFObjectHandle const& decode_parms);
1086
1087
    // Replace the stream's stream data with the given string. This method will create a copy of the
1088
    // data rather than using the user-provided buffer as in the std::shared_ptr<Buffer> version of
1089
    // replaceStreamData.
1090
    QPDF_DLL
1091
    void replaceStreamData(
1092
        std::string const& data,
1093
        QPDFObjectHandle const& filter,
1094
        QPDFObjectHandle const& decode_parms);
1095
1096
    // As above, replace this stream's stream data.  Instead of directly providing a buffer with the
1097
    // stream data, call the given provider's provideStreamData method.  See comments on the
1098
    // StreamDataProvider class (defined above) for details on the method.  The data must be
1099
    // consistent with filter and decode_parms as provided.  Although it is more complex to use this
1100
    // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid
1101
    // allocating memory for the stream data.  Example programs are provided that use both forms of
1102
    // replaceStreamData.
1103
1104
    // Note about stream length: for any given stream, the provider must provide the same amount of
1105
    // data each time it is called. This is critical for making linearization work properly.
1106
    // Versions of qpdf before 3.0.0 required a length to be specified here.  Starting with
1107
    // version 3.0.0, this is no longer necessary (or permitted).  The first time the stream data
1108
    // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is
1109
    // enforced that the length be the same as the first time.
1110
1111
    // If you have gotten a compile error here while building code that worked with older versions
1112
    // of qpdf, just omit the length parameter.  You can also simplify your code by not having to
1113
    // compute the length in advance.
1114
    QPDF_DLL
1115
    void replaceStreamData(
1116
        std::shared_ptr<StreamDataProvider> provider,
1117
        QPDFObjectHandle const& filter,
1118
        QPDFObjectHandle const& decode_parms);
1119
1120
    // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider.
1121
1122
    // The provider should write the stream data to the pipeline. For a one-liner to replace stream
1123
    // data with the contents of a file, pass QUtil::file_provider(filename) as provider.
1124
    QPDF_DLL
1125
    void replaceStreamData(
1126
        std::function<void(Pipeline*)> provider,
1127
        QPDFObjectHandle const& filter,
1128
        QPDFObjectHandle const& decode_parms);
1129
    // The provider should write the stream data to the pipeline, returning true if it succeeded
1130
    // without errors.
1131
    QPDF_DLL
1132
    void replaceStreamData(
1133
        std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider,
1134
        QPDFObjectHandle const& filter,
1135
        QPDFObjectHandle const& decode_parms);
1136
1137
    // Access object ID and generation.  For direct objects, return object ID 0.
1138
1139
    // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to
1140
    // the pattern of depending on object ID or generation without the other.  In general, when
1141
    // keeping track of object IDs, it's better to use QPDFObjGen instead.
1142
1143
    QPDF_DLL
1144
    QPDFObjGen getObjGen() const;
1145
    QPDF_DLL
1146
    int getObjectID() const;
1147
    QPDF_DLL
1148
    int getGeneration() const;
1149
1150
    QPDF_DLL
1151
    std::string unparse() const;
1152
    QPDF_DLL
1153
    std::string unparseResolved() const;
1154
    // For strings only, force binary representation. Otherwise, same as unparse.
1155
    QPDF_DLL
1156
    std::string unparseBinary() const;
1157
1158
    // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available
1159
    // JSON version. The JSON is generated as follows:
1160
    // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their
1161
    //   native JSON types.
1162
    // * Names are encoded as strings representing the canonical representation (after parsing #xx)
1163
    //   and preceded by a slash, just as unparse() returns. For example, the JSON for the
1164
    //   PDF-syntax name /Text#2fPlain would be "/Text/Plain".
1165
    // * Indirect references are encoded as strings containing "obj gen R"
1166
    // * Strings
1167
    //   * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters
1168
    //     encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode
1169
    //     mappings are not reversible. There is no way to tell the difference between a string that
1170
    //     looks like a name or indirect object from an actual name or indirect object.
1171
    //   * JSON v2:
1172
    //     * Unicode strings and strings encoded with PDF Doc encoding that can be bidirectionally
1173
    //       mapped to Unicode (which is all strings without undefined characters) are represented
1174
    //       as "u:" followed by the UTF-8 encoded string. Example:
1175
    //       "u:potato".
1176
    //     * All other strings are represented as "b:" followed by a hexadecimal encoding of the
1177
    //       string. Example: "b:0102cacb"
1178
    // * Streams
1179
    //   * JSON v1: Only the stream's dictionary is encoded. There is no way to tell a stream from a
1180
    //     dictionary other than context.
1181
    //   * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the
1182
    //     stream's dictionary. Since "dict" does not otherwise represent anything, this is
1183
    //     unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data.
1184
    // * Object types that are only valid in content streams (inline image, operator) are serialized
1185
    //   as "null". Attempting to serialize a "reserved" object is an error.
1186
    // If dereference_indirect is true and this is an indirect object, show the actual contents of
1187
    // the object. The effect of dereference_indirect applies only to this object. It is not
1188
    // recursive.
1189
    QPDF_DLL
1190
    JSON getJSON(int json_version, bool dereference_indirect = false) const;
1191
1192
    // Write the object encoded as JSON to a pipeline. This is equivalent to, but more efficient
1193
    // than, calling getJSON(json_version, dereference_indirect).write(p, depth). See the
1194
    // documentation for getJSON and JSON::write for further detail.
1195
    QPDF_DLL
1196
    void writeJSON(
1197
        int json_version, Pipeline* p, bool dereference_indirect = false, size_t depth = 0) const;
1198
1199
    // This method can be called on a stream to get a more extended JSON representation of the
1200
    // stream that includes the stream's data. The JSON object returned is always a dictionary whose
1201
    // "dict" key is an encoding of the stream's dictionary. The representation of the data is
1202
    // determined by the json_data field.
1203
    //
1204
    // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file.
1205
    //
1206
    // If json_data is qpdf_sj_none, stream data is not represented.
1207
    //
1208
    // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on
1209
    // the value of decode_level, which has the same meaning as with pipeStreamData.
1210
    //
1211
    // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data"
1212
    // field of the dictionary that is returned.
1213
    //
1214
    // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be
1215
    // supplied. The value of data_filename is stored in the resulting json in the "datafile" key
1216
    // but is not otherwise use. The stream data itself (raw or filtered depending on decode level),
1217
    // is written to the pipeline via pipeStreamData().
1218
    //
1219
    // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must
1220
    // remain valid until after the JSON object is written.
1221
    QPDF_DLL
1222
    JSON getStreamJSON(
1223
        int json_version,
1224
        qpdf_json_stream_data_e json_data,
1225
        qpdf_stream_decode_level_e decode_level,
1226
        Pipeline* p,
1227
        std::string const& data_filename);
1228
1229
    // Legacy helper methods for commonly performed operations on pages. Newer code should use
1230
    // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as
1231
    // the identically named methods in that class, but newer functionality will be added there.
1232
    QPDF_DLL
1233
    std::map<std::string, QPDFObjectHandle> getPageImages();
1234
    QPDF_DLL
1235
    std::vector<QPDFObjectHandle> getPageContents();
1236
    QPDF_DLL
1237
    void addPageContents(QPDFObjectHandle contents, bool first);
1238
    QPDF_DLL
1239
    void rotatePage(int angle, bool relative);
1240
    QPDF_DLL
1241
    void coalesceContentStreams();
1242
    // End legacy page helpers
1243
1244
    // Issue a warning about this object if possible. If the object has a description, a warning
1245
    // will be issued using the owning QPDF as context. Otherwise, a message will be written to the
1246
    // default logger's error stream, which is standard error if not overridden. Objects read
1247
    // normally from the file have descriptions. See comments on setObjectDescription for additional
1248
    // details.
1249
    QPDF_DLL
1250
    void warnIfPossible(std::string const& warning) const;
1251
1252
    // Convenience routine: Throws if the assumption is violated. Your code will be better if you
1253
    // call one of the isType methods and handle the case of the type being wrong, but these can be
1254
    // convenient if you have already verified the type.
1255
    QPDF_DLL
1256
    void assertInitialized() const;
1257
1258
    QPDF_DLL
1259
    void assertNull() const;
1260
    QPDF_DLL
1261
    void assertBool() const;
1262
    QPDF_DLL
1263
    void assertInteger() const;
1264
    QPDF_DLL
1265
    void assertReal() const;
1266
    QPDF_DLL
1267
    void assertName() const;
1268
    QPDF_DLL
1269
    void assertString() const;
1270
    QPDF_DLL
1271
    void assertOperator() const;
1272
    QPDF_DLL
1273
    void assertInlineImage() const;
1274
    QPDF_DLL
1275
    void assertArray() const;
1276
    QPDF_DLL
1277
    void assertDictionary() const;
1278
    QPDF_DLL
1279
    void assertStream() const;
1280
    QPDF_DLL
1281
    void assertReserved() const;
1282
1283
    QPDF_DLL
1284
    void assertIndirect() const;
1285
    QPDF_DLL
1286
    void assertScalar() const;
1287
    QPDF_DLL
1288
    void assertNumber() const;
1289
1290
    // The isPageObject method checks the /Type key of the object. This is not completely reliable
1291
    // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is
1292
    // slightly more accepting but may still return false here when treating the object as a page
1293
    // would work. Use this sparingly.
1294
    QPDF_DLL
1295
    bool isPageObject() const;
1296
    QPDF_DLL
1297
    bool isPagesObject() const;
1298
    QPDF_DLL
1299
    void assertPageObject() const;
1300
1301
    QPDF_DLL
1302
    bool isFormXObject() const;
1303
1304
    // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as
1305
    // images.
1306
    QPDF_DLL
1307
    bool isImage(bool exclude_imagemask = true) const;
1308
1309
    // The following methods do not form part of the public API and are for internal use only.
1310
1311
    QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) :
1312
35.5k
        qpdf::BaseHandle(obj)
1313
35.5k
    {
1314
35.5k
    }
1315
    QPDFObjectHandle(std::shared_ptr<QPDFObject>&& obj) :
1316
2.14M
        qpdf::BaseHandle(std::move(obj))
1317
2.14M
    {
1318
2.14M
    }
1319
    std::shared_ptr<QPDFObject>
1320
    getObj()
1321
19.0k
    {
1322
19.0k
        return obj;
1323
19.0k
    }
1324
    std::shared_ptr<QPDFObject>
1325
    getObj() const
1326
0
    {
1327
0
        return obj;
1328
0
    }
1329
    QPDFObject*
1330
    getObjectPtr()
1331
1.76M
    {
1332
1.76M
        return obj.get();
1333
1.76M
    }
1334
    QPDFObject* const
1335
    getObjectPtr() const
1336
0
    {
1337
0
        return obj.get();
1338
0
    }
1339
1340
    void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const;
1341
1342
    inline qpdf::Array as_array(qpdf::typed options = qpdf::typed::any) const;
1343
    inline qpdf::Dictionary as_dictionary(qpdf::typed options = qpdf::typed::any) const;
1344
    inline qpdf::Stream as_stream(qpdf::typed options = qpdf::typed::strict) const;
1345
1346
  private:
1347
    void typeWarning(char const* expected_type, std::string const& warning) const;
1348
    void objectWarning(std::string const& warning) const;
1349
    void assertType(char const* type_name, bool istype) const;
1350
    void makeDirect(QPDFObjGen::set& visited, bool stop_at_streams);
1351
    void setParsedOffset(qpdf_offset_t offset);
1352
    void parseContentStream_internal(std::string const& description, ParserCallbacks* callbacks);
1353
    static void parseContentStream_data(
1354
        std::shared_ptr<Buffer>,
1355
        std::string const& description,
1356
        ParserCallbacks* callbacks,
1357
        QPDF* context);
1358
    std::vector<QPDFObjectHandle>
1359
    arrayOrStreamToStreamArray(std::string const& description, std::string& all_description);
1360
    static void warn(QPDF*, QPDFExc const&);
1361
    void checkOwnership(QPDFObjectHandle const&) const;
1362
};
1363
1364
#ifndef QPDF_NO_QPDF_STRING
1365
// This is short for QPDFObjectHandle::parse, so you can do
1366
1367
// auto oh = "<< /Key (value) >>"_qpdf;
1368
1369
// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration
1370
// from being here.
1371
1372
/* clang-format off */
1373
  // Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest
1374
  // of the file as a string if clang-format removes the space after "operator", and as of
1375
  // clang-format 15, there's no way to prevent it from doing so.
1376
  QPDF_DLL
1377
  QPDFObjectHandle operator ""_qpdf(char const* v, size_t len);
1378
/* clang-format on */
1379
1380
#endif // QPDF_NO_QPDF_STRING
1381
1382
class QPDFObjectHandle::QPDFDictItems
1383
{
1384
    // This class allows C++-style iteration, including range-for iteration, around dictionaries.
1385
    // You can write
1386
1387
    // for (auto iter: QPDFDictItems(dictionary_obj))
1388
    // {
1389
    //     // iter.first is a string
1390
    //     // iter.second is a QPDFObjectHandle
1391
    // }
1392
1393
    // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
1394
1395
  public:
1396
    QPDF_DLL
1397
    QPDFDictItems(QPDFObjectHandle const& oh);
1398
1399
    class iterator
1400
    {
1401
        friend class QPDFDictItems;
1402
1403
      public:
1404
        typedef std::pair<std::string, QPDFObjectHandle> T;
1405
        using iterator_category = std::bidirectional_iterator_tag;
1406
        using value_type = T;
1407
        using difference_type = long;
1408
        using pointer = T*;
1409
        using reference = T&;
1410
1411
0
        virtual ~iterator() = default;
1412
        QPDF_DLL
1413
        iterator& operator++();
1414
        iterator
1415
        operator++(int)
1416
0
        {
1417
0
            iterator t = *this;
1418
0
            ++(*this);
1419
0
            return t;
1420
0
        }
1421
        QPDF_DLL
1422
        iterator& operator--();
1423
        iterator
1424
        operator--(int)
1425
0
        {
1426
0
            iterator t = *this;
1427
0
            --(*this);
1428
0
            return t;
1429
0
        }
1430
        QPDF_DLL
1431
        reference operator*();
1432
        QPDF_DLL
1433
        pointer operator->();
1434
        QPDF_DLL
1435
        bool operator==(iterator const& other) const;
1436
        bool
1437
        operator!=(iterator const& other) const
1438
0
        {
1439
0
            return !operator==(other);
1440
0
        }
1441
1442
      private:
1443
        iterator(QPDFObjectHandle& oh, bool for_begin);
1444
        void updateIValue();
1445
1446
        class Members
1447
        {
1448
            friend class QPDFDictItems::iterator;
1449
1450
          public:
1451
0
            ~Members() = default;
1452
1453
          private:
1454
            Members(QPDFObjectHandle& oh, bool for_begin);
1455
            Members() = delete;
1456
            Members(Members const&) = delete;
1457
1458
            QPDFObjectHandle& oh;
1459
            std::set<std::string> keys;
1460
            std::set<std::string>::iterator iter;
1461
            bool is_end;
1462
        };
1463
        std::shared_ptr<Members> m;
1464
        value_type ivalue;
1465
    };
1466
1467
    QPDF_DLL
1468
    iterator begin();
1469
    QPDF_DLL
1470
    iterator end();
1471
1472
  private:
1473
    QPDFObjectHandle oh;
1474
};
1475
1476
class QPDFObjectHandle::QPDFArrayItems
1477
{
1478
    // This class allows C++-style iteration, including range-for iteration, around arrays. You can
1479
    // write
1480
1481
    // for (auto iter: QPDFArrayItems(array_obj))
1482
    // {
1483
    //     // iter is a QPDFObjectHandle
1484
    // }
1485
1486
    // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
1487
1488
  public:
1489
    QPDF_DLL
1490
    QPDFArrayItems(QPDFObjectHandle const& oh);
1491
1492
    class iterator
1493
    {
1494
        friend class QPDFArrayItems;
1495
1496
      public:
1497
        typedef QPDFObjectHandle T;
1498
        using iterator_category = std::bidirectional_iterator_tag;
1499
        using value_type = T;
1500
        using difference_type = long;
1501
        using pointer = T*;
1502
        using reference = T&;
1503
1504
0
        virtual ~iterator() = default;
1505
        QPDF_DLL
1506
        iterator& operator++();
1507
        iterator
1508
        operator++(int)
1509
0
        {
1510
0
            iterator t = *this;
1511
0
            ++(*this);
1512
0
            return t;
1513
0
        }
1514
        QPDF_DLL
1515
        iterator& operator--();
1516
        iterator
1517
        operator--(int)
1518
0
        {
1519
0
            iterator t = *this;
1520
0
            --(*this);
1521
0
            return t;
1522
0
        }
1523
        QPDF_DLL
1524
        reference operator*();
1525
        QPDF_DLL
1526
        pointer operator->();
1527
        QPDF_DLL
1528
        bool operator==(iterator const& other) const;
1529
        bool
1530
        operator!=(iterator const& other) const
1531
0
        {
1532
0
            return !operator==(other);
1533
0
        }
1534
1535
      private:
1536
        iterator(QPDFObjectHandle& oh, bool for_begin);
1537
        void updateIValue();
1538
1539
        class Members
1540
        {
1541
            friend class QPDFArrayItems::iterator;
1542
1543
          public:
1544
            ~Members() = default;
1545
1546
          private:
1547
            Members(QPDFObjectHandle& oh, bool for_begin);
1548
            Members() = delete;
1549
            Members(Members const&) = delete;
1550
1551
            QPDFObjectHandle& oh;
1552
            int item_number;
1553
            bool is_end;
1554
        };
1555
        std::shared_ptr<Members> m;
1556
        value_type ivalue;
1557
    };
1558
1559
    QPDF_DLL
1560
    iterator begin();
1561
    QPDF_DLL
1562
    iterator end();
1563
1564
  private:
1565
    QPDFObjectHandle oh;
1566
};
1567
1568
namespace qpdf
1569
{
1570
    inline BaseHandle::
1571
    operator bool() const
1572
6.43M
    {
1573
6.43M
        return static_cast<bool>(obj);
1574
6.43M
    }
1575
1576
    inline BaseHandle::
1577
    operator QPDFObjectHandle() const
1578
3.89k
    {
1579
3.89k
        return {obj};
1580
3.89k
    }
1581
1582
} // namespace qpdf
1583
1584
inline bool
1585
QPDFObjectHandle::isInitialized() const
1586
0
{
1587
0
    return obj != nullptr;
1588
0
}
1589
1590
#endif // QPDFOBJECTHANDLE_HH