Coverage Report

Created: 2025-10-10 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/include/qpdf/QPDFAcroFormDocumentHelper.hh
Line
Count
Source
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed under the License
12
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
// or implied. See the License for the specific language governing permissions and limitations under
14
// the License.
15
//
16
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
17
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
18
// Please see the manual for additional information.
19
20
#ifndef QPDFACROFORMDOCUMENTHELPER_HH
21
#define QPDFACROFORMDOCUMENTHELPER_HH
22
23
#include <qpdf/QPDFDocumentHelper.hh>
24
25
#include <qpdf/DLL.h>
26
27
#include <qpdf/QPDFAnnotationObjectHelper.hh>
28
#include <qpdf/QPDFFormFieldObjectHelper.hh>
29
#include <qpdf/QPDFPageObjectHelper.hh>
30
31
#include <map>
32
#include <set>
33
#include <vector>
34
35
// This document helper is intended to help with operations on interactive forms. Here are the key
36
// things to know:
37
38
// * The PDF specification talks about interactive forms and also about form XObjects. While form
39
//   XObjects appear in parts of interactive forms, this class is concerned about interactive forms,
40
//   not form XObjects.
41
//
42
// * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7.
43
//   Also relevant is the section about Widget annotations. Annotations are discussed in section
44
//   12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed
45
//   specifically in section 12.5.6.19.
46
//
47
// * What you need to know about the structure of interactive forms in PDF files:
48
//
49
//   - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are
50
//     represented as a tree structure much like pages. Nodes in the fields tree may contain other
51
//     fields. Fields may inherit values of many of their attributes from ancestors in the tree.
52
//
53
//   - Fields may also have children that are widget annotations. As a special case, and a cause of
54
//     considerable confusion, if a field has a single annotation as a child, the annotation
55
//     dictionary may be merged with the field dictionary. In that case, the field and the
56
//     annotation are in the same object. Note that, while field dictionary attributes are
57
//     inherited, annotation dictionary attributes are not.
58
//
59
//   - A page dictionary contains a key called "/Annots" which contains a simple list of
60
//     annotations. For any given annotation of subtype "/Widget", you should encounter that
61
//     annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by
62
//     traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case
63
//     (and also a very common case), a form field's widget annotation will be merged with the field
64
//     object, and the object will appear directly both under "/Annots" in the page dictionary and
65
//     under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace
66
//     through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation
67
//     dictionary.
68
class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
69
{
70
  public:
71
    // Get a shared document helper for a given QPDF object.
72
    //
73
    // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated
74
    // validation of the Acroform structure, which can be expensive.
75
    QPDF_DLL
76
    static QPDFAcroFormDocumentHelper& get(QPDF& qpdf);
77
78
    // Re-validate the AcroForm structure. This is useful if you have modified the structure of the
79
    // AcroForm dictionary in a way that would invalidate the cache.
80
    //
81
    // If repair is true, the document will be repaired if possible if the validation encounters
82
    // errors.
83
    QPDF_DLL
84
    void validate(bool repair = true);
85
86
    QPDF_DLL
87
    QPDFAcroFormDocumentHelper(QPDF&);
88
89
0
    ~QPDFAcroFormDocumentHelper() override = default;
90
91
    // This class lazily creates an internal cache of the mapping among form fields, annotations,
92
    // and pages. Methods within this class preserve the validity of this cache. However, if you
93
    // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form
94
    // fields manually in a way that alters the association between forms, fields, annotations, and
95
    // pages, it may cause this cache to become invalid. This method marks the cache invalid and
96
    // forces it to be regenerated the next time it is needed.
97
    QPDF_DLL
98
    void invalidateCache();
99
100
    QPDF_DLL
101
    bool hasAcroForm();
102
103
    // Add a form field, initializing the document's AcroForm dictionary if needed, updating the
104
    // cache if necessary. Note that you are adding fields that are copies of other fields, this
105
    // method may result in multiple fields existing with the same qualified name, which can have
106
    // unexpected side effects. In that case, you should use addAndRenameFormFields() instead.
107
    QPDF_DLL
108
    void addFormField(QPDFFormFieldObjectHelper);
109
110
    // Add a collection of form fields making sure that their fully qualified names don't conflict
111
    // with already present form fields. Fields within the collection of new fields that have the
112
    // same name as each other will continue to do so.
113
    QPDF_DLL
114
    void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
115
116
    // Remove fields from the fields array
117
    QPDF_DLL
118
    void removeFormFields(std::set<QPDFObjGen> const&);
119
120
    // Set the name of a field, updating internal records of field names. Name should be UTF-8
121
    // encoded.
122
    QPDF_DLL
123
    void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
124
125
    // Return a vector of all terminal fields in a document. Terminal fields are fields that have no
126
    // children that are also fields. Terminal fields may still have children that are annotations.
127
    // Intermediate nodes in the fields tree are not included in this list, but you can still reach
128
    // them through the getParent method of the field object helper.
129
    QPDF_DLL
130
    std::vector<QPDFFormFieldObjectHelper> getFormFields();
131
132
    // Return all the form fields that have the given fully-qualified name and also have an explicit
133
    // "/T" attribute. For this information to be accurate, any changes to field names must be done
134
    // through setFormFieldName() above.
135
    QPDF_DLL
136
    std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name);
137
138
    // Return the annotations associated with a terminal field. Note that in the case of a field
139
    // having a single annotation, the underlying object will typically be the same as the
140
    // underlying object for the field.
141
    QPDF_DLL
142
    std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper);
143
144
    // Return annotations of subtype /Widget for a page.
145
    QPDF_DLL
146
    std::vector<QPDFAnnotationObjectHelper> getWidgetAnnotationsForPage(QPDFPageObjectHelper);
147
148
    // Return top-level form fields for a page.
149
    QPDF_DLL
150
    std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper);
151
152
    // Return the terminal field that is associated with this annotation. If the annotation
153
    // dictionary is merged with the field dictionary, the underlying object will be the same, but
154
    // this is not always the case. Note that if you call this method with an annotation that is not
155
    // a widget annotation, there will not be an associated field, and this method will return a
156
    // helper associated with a null object (isNull() == true).
157
    QPDF_DLL
158
    QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper);
159
160
    // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as
161
    // that is how PDF viewers are supposed to interpret it.
162
    QPDF_DLL
163
    bool getNeedAppearances();
164
165
    // Indicate whether appearance streams must be regenerated. If you modify a field value, you
166
    // should call setNeedAppearances(true) unless you also generate an appearance stream for the
167
    // corresponding annotation at the same time. If you generate appearance streams for all fields,
168
    // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will
169
    // automatically call this method unless you tell it not to.
170
    QPDF_DLL
171
    void setNeedAppearances(bool);
172
173
    // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all
174
    // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for
175
    // generateAppearance for limitations. For checkbox and radio button fields, this code ensures
176
    // that appearance state is consistent with the field's value and uses any pre-existing
177
    // appearance streams.
178
    QPDF_DLL
179
    void generateAppearancesIfNeeded();
180
181
    // Disable Digital Signature Fields. Remove all digital signature fields from the document,
182
    // leaving any annotation showing the content of the field intact. This also calls
183
    // QPDF::removeSecurityRestrictions.
184
    QPDF_DLL
185
    void disableDigitalSignatures();
186
187
    // Note: this method works on all annotations, not just ones with associated fields. For each
188
    // annotation in old_annots, apply the given transformation matrix to create a new annotation.
189
    // New annotations are appended to new_annots. If the annotation is associated with a form
190
    // field, a new form field is created that points to the new annotation and is appended to
191
    // new_fields, and the old field is added to old_fields.
192
    //
193
    // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf,
194
    // and copyForeignObject will be called automatically. If this is the case, for efficiency, you
195
    // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of
196
    // creating one for each call to transformAnnotations. New fields and annotations are not added
197
    // to the document or pages. You have to do that yourself after calling transformAnnotations. If
198
    // this operation will leave orphaned fields behind, such as if you are replacing the old
199
    // annotations with the new ones on the same page and the fields and annotations are not shared,
200
    // you will also need to remove the old fields to prevent them from hanging around unreferenced.
201
    QPDF_DLL
202
    void transformAnnotations(
203
        QPDFObjectHandle old_annots,
204
        std::vector<QPDFObjectHandle>& new_annots,
205
        std::vector<QPDFObjectHandle>& new_fields,
206
        std::set<QPDFObjGen>& old_fields,
207
        QPDFMatrix const& cm,
208
        QPDF* from_qpdf = nullptr,
209
        QPDFAcroFormDocumentHelper* from_afdh = nullptr);
210
211
    // Copy form fields and annotations from one page to another, allowing the from page to be in a
212
    // different QPDF or in the same QPDF. This would typically be called after calling addPage to
213
    // add field/annotation awareness. When just copying the page by itself, annotations end up
214
    // being shared, and fields end up being omitted because there is no reference to the field from
215
    // the page. This method ensures that each separate copy of a page has private annotations and
216
    // that fields and annotations are properly updated to resolve conflicts that may occur from
217
    // common resource and field names across documents. It is basically a wrapper around
218
    // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any
219
    // newly created fields are added to it.
220
    QPDF_DLL
221
    void fixCopiedAnnotations(
222
        QPDFObjectHandle to_page,
223
        QPDFObjectHandle from_page,
224
        QPDFAcroFormDocumentHelper& from_afdh,
225
        std::set<QPDFObjGen>* new_fields = nullptr);
226
227
  private:
228
    void analyze();
229
    void traverseField(
230
        QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited);
231
    QPDFObjectHandle getOrCreateAcroForm();
232
    void adjustInheritedFields(
233
        QPDFObjectHandle obj,
234
        bool override_da,
235
        std::string const& from_default_da,
236
        bool override_q,
237
        int from_default_q);
238
    void adjustDefaultAppearances(
239
        QPDFObjectHandle obj,
240
        std::map<std::string, std::map<std::string, std::string>> const& dr_map);
241
    void adjustAppearanceStream(
242
        QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map);
243
244
    class Members;
245
246
    std::shared_ptr<Members> m;
247
};
248
249
#endif // QPDFACROFORMDOCUMENTHELPER_HH