/src/qpdf/include/qpdf/QPDFAcroFormDocumentHelper.hh
Line | Count | Source |
1 | | // Copyright (c) 2005-2021 Jay Berkenbilt |
2 | | // Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger |
3 | | // |
4 | | // This file is part of qpdf. |
5 | | // |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
7 | | // in compliance with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
12 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
13 | | // or implied. See the License for the specific language governing permissions and limitations under |
14 | | // the License. |
15 | | // |
16 | | // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
17 | | // License. At your option, you may continue to consider qpdf to be licensed under those terms. |
18 | | // Please see the manual for additional information. |
19 | | |
20 | | #ifndef QPDFACROFORMDOCUMENTHELPER_HH |
21 | | #define QPDFACROFORMDOCUMENTHELPER_HH |
22 | | |
23 | | #include <qpdf/QPDFDocumentHelper.hh> |
24 | | |
25 | | #include <qpdf/DLL.h> |
26 | | |
27 | | #include <qpdf/QPDFAnnotationObjectHelper.hh> |
28 | | #include <qpdf/QPDFFormFieldObjectHelper.hh> |
29 | | #include <qpdf/QPDFPageObjectHelper.hh> |
30 | | |
31 | | #include <map> |
32 | | #include <set> |
33 | | #include <vector> |
34 | | |
35 | | // This document helper is intended to help with operations on interactive forms. Here are the key |
36 | | // things to know: |
37 | | |
38 | | // * The PDF specification talks about interactive forms and also about form XObjects. While form |
39 | | // XObjects appear in parts of interactive forms, this class is concerned about interactive forms, |
40 | | // not form XObjects. |
41 | | // |
42 | | // * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7. |
43 | | // Also relevant is the section about Widget annotations. Annotations are discussed in section |
44 | | // 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed |
45 | | // specifically in section 12.5.6.19. |
46 | | // |
47 | | // * What you need to know about the structure of interactive forms in PDF files: |
48 | | // |
49 | | // - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are |
50 | | // represented as a tree structure much like pages. Nodes in the fields tree may contain other |
51 | | // fields. Fields may inherit values of many of their attributes from ancestors in the tree. |
52 | | // |
53 | | // - Fields may also have children that are widget annotations. As a special case, and a cause of |
54 | | // considerable confusion, if a field has a single annotation as a child, the annotation |
55 | | // dictionary may be merged with the field dictionary. In that case, the field and the |
56 | | // annotation are in the same object. Note that, while field dictionary attributes are |
57 | | // inherited, annotation dictionary attributes are not. |
58 | | // |
59 | | // - A page dictionary contains a key called "/Annots" which contains a simple list of |
60 | | // annotations. For any given annotation of subtype "/Widget", you should encounter that |
61 | | // annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by |
62 | | // traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case |
63 | | // (and also a very common case), a form field's widget annotation will be merged with the field |
64 | | // object, and the object will appear directly both under "/Annots" in the page dictionary and |
65 | | // under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace |
66 | | // through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation |
67 | | // dictionary. |
68 | | class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper |
69 | | { |
70 | | public: |
71 | | // Get a shared document helper for a given QPDF object. |
72 | | // |
73 | | // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated |
74 | | // validation of the Acroform structure, which can be expensive. |
75 | | QPDF_DLL |
76 | | static QPDFAcroFormDocumentHelper& get(QPDF& qpdf); |
77 | | |
78 | | // Re-validate the AcroForm structure. This is useful if you have modified the structure of the |
79 | | // AcroForm dictionary in a way that would invalidate the cache. |
80 | | // |
81 | | // If repair is true, the document will be repaired if possible if the validation encounters |
82 | | // errors. |
83 | | QPDF_DLL |
84 | | void validate(bool repair = true); |
85 | | |
86 | | QPDF_DLL |
87 | | QPDFAcroFormDocumentHelper(QPDF&); |
88 | | |
89 | 0 | ~QPDFAcroFormDocumentHelper() override = default; |
90 | | |
91 | | // This class lazily creates an internal cache of the mapping among form fields, annotations, |
92 | | // and pages. Methods within this class preserve the validity of this cache. However, if you |
93 | | // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form |
94 | | // fields manually in a way that alters the association between forms, fields, annotations, and |
95 | | // pages, it may cause this cache to become invalid. This method marks the cache invalid and |
96 | | // forces it to be regenerated the next time it is needed. |
97 | | QPDF_DLL |
98 | | void invalidateCache(); |
99 | | |
100 | | QPDF_DLL |
101 | | bool hasAcroForm(); |
102 | | |
103 | | // Add a form field, initializing the document's AcroForm dictionary if needed, updating the |
104 | | // cache if necessary. Note that you are adding fields that are copies of other fields, this |
105 | | // method may result in multiple fields existing with the same qualified name, which can have |
106 | | // unexpected side effects. In that case, you should use addAndRenameFormFields() instead. |
107 | | QPDF_DLL |
108 | | void addFormField(QPDFFormFieldObjectHelper); |
109 | | |
110 | | // Add a collection of form fields making sure that their fully qualified names don't conflict |
111 | | // with already present form fields. Fields within the collection of new fields that have the |
112 | | // same name as each other will continue to do so. |
113 | | QPDF_DLL |
114 | | void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); |
115 | | |
116 | | // Remove fields from the fields array |
117 | | QPDF_DLL |
118 | | void removeFormFields(std::set<QPDFObjGen> const&); |
119 | | |
120 | | // Set the name of a field, updating internal records of field names. Name should be UTF-8 |
121 | | // encoded. |
122 | | QPDF_DLL |
123 | | void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); |
124 | | |
125 | | // Return a vector of all terminal fields in a document. Terminal fields are fields that have no |
126 | | // children that are also fields. Terminal fields may still have children that are annotations. |
127 | | // Intermediate nodes in the fields tree are not included in this list, but you can still reach |
128 | | // them through the getParent method of the field object helper. |
129 | | QPDF_DLL |
130 | | std::vector<QPDFFormFieldObjectHelper> getFormFields(); |
131 | | |
132 | | // Return all the form fields that have the given fully-qualified name and also have an explicit |
133 | | // "/T" attribute. For this information to be accurate, any changes to field names must be done |
134 | | // through setFormFieldName() above. |
135 | | QPDF_DLL |
136 | | std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name); |
137 | | |
138 | | // Return the annotations associated with a terminal field. Note that in the case of a field |
139 | | // having a single annotation, the underlying object will typically be the same as the |
140 | | // underlying object for the field. |
141 | | QPDF_DLL |
142 | | std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper); |
143 | | |
144 | | // Return annotations of subtype /Widget for a page. |
145 | | QPDF_DLL |
146 | | std::vector<QPDFAnnotationObjectHelper> getWidgetAnnotationsForPage(QPDFPageObjectHelper); |
147 | | |
148 | | // Return top-level form fields for a page. |
149 | | QPDF_DLL |
150 | | std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper); |
151 | | |
152 | | // Return the terminal field that is associated with this annotation. If the annotation |
153 | | // dictionary is merged with the field dictionary, the underlying object will be the same, but |
154 | | // this is not always the case. Note that if you call this method with an annotation that is not |
155 | | // a widget annotation, there will not be an associated field, and this method will return a |
156 | | // helper associated with a null object (isNull() == true). |
157 | | QPDF_DLL |
158 | | QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); |
159 | | |
160 | | // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as |
161 | | // that is how PDF viewers are supposed to interpret it. |
162 | | QPDF_DLL |
163 | | bool getNeedAppearances(); |
164 | | |
165 | | // Indicate whether appearance streams must be regenerated. If you modify a field value, you |
166 | | // should call setNeedAppearances(true) unless you also generate an appearance stream for the |
167 | | // corresponding annotation at the same time. If you generate appearance streams for all fields, |
168 | | // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will |
169 | | // automatically call this method unless you tell it not to. |
170 | | QPDF_DLL |
171 | | void setNeedAppearances(bool); |
172 | | |
173 | | // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all |
174 | | // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for |
175 | | // generateAppearance for limitations. For checkbox and radio button fields, this code ensures |
176 | | // that appearance state is consistent with the field's value and uses any pre-existing |
177 | | // appearance streams. |
178 | | QPDF_DLL |
179 | | void generateAppearancesIfNeeded(); |
180 | | |
181 | | // Disable Digital Signature Fields. Remove all digital signature fields from the document, |
182 | | // leaving any annotation showing the content of the field intact. This also calls |
183 | | // QPDF::removeSecurityRestrictions. |
184 | | QPDF_DLL |
185 | | void disableDigitalSignatures(); |
186 | | |
187 | | // Note: this method works on all annotations, not just ones with associated fields. For each |
188 | | // annotation in old_annots, apply the given transformation matrix to create a new annotation. |
189 | | // New annotations are appended to new_annots. If the annotation is associated with a form |
190 | | // field, a new form field is created that points to the new annotation and is appended to |
191 | | // new_fields, and the old field is added to old_fields. |
192 | | // |
193 | | // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf, |
194 | | // and copyForeignObject will be called automatically. If this is the case, for efficiency, you |
195 | | // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of |
196 | | // creating one for each call to transformAnnotations. New fields and annotations are not added |
197 | | // to the document or pages. You have to do that yourself after calling transformAnnotations. If |
198 | | // this operation will leave orphaned fields behind, such as if you are replacing the old |
199 | | // annotations with the new ones on the same page and the fields and annotations are not shared, |
200 | | // you will also need to remove the old fields to prevent them from hanging around unreferenced. |
201 | | QPDF_DLL |
202 | | void transformAnnotations( |
203 | | QPDFObjectHandle old_annots, |
204 | | std::vector<QPDFObjectHandle>& new_annots, |
205 | | std::vector<QPDFObjectHandle>& new_fields, |
206 | | std::set<QPDFObjGen>& old_fields, |
207 | | QPDFMatrix const& cm, |
208 | | QPDF* from_qpdf = nullptr, |
209 | | QPDFAcroFormDocumentHelper* from_afdh = nullptr); |
210 | | |
211 | | // Copy form fields and annotations from one page to another, allowing the from page to be in a |
212 | | // different QPDF or in the same QPDF. This would typically be called after calling addPage to |
213 | | // add field/annotation awareness. When just copying the page by itself, annotations end up |
214 | | // being shared, and fields end up being omitted because there is no reference to the field from |
215 | | // the page. This method ensures that each separate copy of a page has private annotations and |
216 | | // that fields and annotations are properly updated to resolve conflicts that may occur from |
217 | | // common resource and field names across documents. It is basically a wrapper around |
218 | | // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any |
219 | | // newly created fields are added to it. |
220 | | QPDF_DLL |
221 | | void fixCopiedAnnotations( |
222 | | QPDFObjectHandle to_page, |
223 | | QPDFObjectHandle from_page, |
224 | | QPDFAcroFormDocumentHelper& from_afdh, |
225 | | std::set<QPDFObjGen>* new_fields = nullptr); |
226 | | |
227 | | private: |
228 | | void analyze(); |
229 | | void traverseField( |
230 | | QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited); |
231 | | QPDFObjectHandle getOrCreateAcroForm(); |
232 | | void adjustInheritedFields( |
233 | | QPDFObjectHandle obj, |
234 | | bool override_da, |
235 | | std::string const& from_default_da, |
236 | | bool override_q, |
237 | | int from_default_q); |
238 | | void adjustDefaultAppearances( |
239 | | QPDFObjectHandle obj, |
240 | | std::map<std::string, std::map<std::string, std::string>> const& dr_map); |
241 | | void adjustAppearanceStream( |
242 | | QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map); |
243 | | |
244 | | class Members; |
245 | | |
246 | | std::shared_ptr<Members> m; |
247 | | }; |
248 | | |
249 | | #endif // QPDFACROFORMDOCUMENTHELPER_HH |