/src/clamav/libclamav/pdf.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  *  Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved.  | 
3  |  |  *  Copyright (C) 2007-2013 Sourcefire, Inc.  | 
4  |  |  *  | 
5  |  |  *  Authors: Nigel Horne, Török Edvin  | 
6  |  |  *  | 
7  |  |  *  Also based on Matt Olney's pdf parser in snort-nrt.  | 
8  |  |  *  | 
9  |  |  *  This program is free software; you can redistribute it and/or modify  | 
10  |  |  *  it under the terms of the GNU General Public License version 2 as  | 
11  |  |  *  published by the Free Software Foundation.  | 
12  |  |  *  | 
13  |  |  *  This program is distributed in the hope that it will be useful,  | 
14  |  |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
15  |  |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
16  |  |  *  GNU General Public License for more details.  | 
17  |  |  *  | 
18  |  |  *  You should have received a copy of the GNU General Public License  | 
19  |  |  *  along with this program; if not, write to the Free Software  | 
20  |  |  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,  | 
21  |  |  *  MA 02110-1301, USA.  | 
22  |  |  *  | 
23  |  |  * TODO: Embedded fonts  | 
24  |  |  * TODO: Predictor image handling  | 
25  |  |  */  | 
26  |  |  | 
27  |  | #if HAVE_CONFIG_H  | 
28  |  | #include "clamav-config.h"  | 
29  |  | #endif  | 
30  |  |  | 
31  |  | #include <stdio.h>  | 
32  |  | #include <sys/types.h>  | 
33  |  | #include <sys/stat.h>  | 
34  |  | #include <ctype.h>  | 
35  |  | #include <string.h>  | 
36  |  | #include <fcntl.h>  | 
37  |  | #include <stdlib.h>  | 
38  |  | #include <errno.h>  | 
39  |  | #ifdef HAVE_LIMITS_H  | 
40  |  | #include <limits.h>  | 
41  |  | #endif  | 
42  |  | #ifdef HAVE_UNISTD_H  | 
43  |  | #include <unistd.h>  | 
44  |  | #endif  | 
45  |  | #include <zlib.h>  | 
46  |  |  | 
47  |  | #if HAVE_ICONV  | 
48  |  | #include <iconv.h>  | 
49  |  | #endif  | 
50  |  |  | 
51  |  | #ifdef _WIN32  | 
52  |  | #include <stdint.h>  | 
53  |  | #endif  | 
54  |  |  | 
55  |  | #include "clamav.h"  | 
56  |  | #include "others.h"  | 
57  |  | #include "pdf.h"  | 
58  |  | #include "pdfdecode.h"  | 
59  |  | #include "scanners.h"  | 
60  |  | #include "fmap.h"  | 
61  |  | #include "str.h"  | 
62  |  | #include "entconv.h"  | 
63  |  | #include "bytecode.h"  | 
64  |  | #include "bytecode_api.h"  | 
65  |  | #include "arc4.h"  | 
66  |  | #include "rijndael.h"  | 
67  |  | #include "textnorm.h"  | 
68  |  | #include "conv.h"  | 
69  |  | #include "json_api.h"  | 
70  |  |  | 
71  |  | #ifdef CL_DEBUG  | 
72  |  | /*#define SAVE_TMP  | 
73  |  |  *Save the file being worked on in tmp */  | 
74  |  | #endif  | 
75  |  |  | 
76  | 2.33M  | #define MAX_PDF_OBJECTS (64 * 1024)  | 
77  |  |  | 
78  |  | struct pdf_struct;  | 
79  |  |  | 
80  |  | static const char *pdf_nextlinestart(const char *ptr, size_t len);  | 
81  |  | static const char *pdf_nextobject(const char *ptr, size_t len);  | 
82  |  |  | 
83  |  | /* PDF statistics callbacks and related */  | 
84  |  | struct pdfname_action;  | 
85  |  |  | 
86  |  | static void pdf_export_json(struct pdf_struct *);  | 
87  |  |  | 
88  |  | static void ASCIIHexDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
89  |  | static void ASCII85Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
90  |  | static void EmbeddedFile_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
91  |  | static void FlateDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
92  |  | static void Image_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
93  |  | static void LZWDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
94  |  | static void RunLengthDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
95  |  | static void CCITTFaxDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
96  |  | static void JBIG2Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
97  |  | static void DCTDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
98  |  | static void JPXDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
99  |  | static void Crypt_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
100  |  | static void Standard_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
101  |  | static void Sig_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
102  |  | static void JavaScript_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
103  |  | static void OpenAction_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
104  |  | static void Launch_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
105  |  | static void Page_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
106  |  | static void Author_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
107  |  | static void Creator_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
108  |  | static void Producer_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
109  |  | static void CreationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
110  |  | static void ModificationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
111  |  | static void Title_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
112  |  | static void Subject_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
113  |  | static void Keywords_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
114  |  | static void Pages_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);  | 
115  |  | static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);  | 
116  |  | static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);  | 
117  |  | static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);  | 
118  |  | static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);  | 
119  |  |  | 
120  |  | /* End PDF statistics callbacks and related */  | 
121  |  |  | 
122  |  | static int pdf_readint(const char *q0, int len, const char *key);  | 
123  |  | static const char *pdf_getdict(const char *q0, int *len, const char *key);  | 
124  |  | static char *pdf_readval(const char *q, int len, const char *key);  | 
125  |  | static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, bool noescape);  | 
126  |  |  | 
127  |  | static int xrefCheck(const char *xref, const char *eof)  | 
128  | 25.0k  | { | 
129  | 25.0k  |     const char *q;  | 
130  |  |  | 
131  | 26.3k  |     while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r'))  | 
132  | 1.32k  |         xref++;  | 
133  |  |  | 
134  | 25.0k  |     if (xref + 4 >= eof)  | 
135  | 271  |         return -1;  | 
136  |  |  | 
137  | 24.8k  |     if (!memcmp(xref, "xref", strlen("xref"))) { | 
138  | 395  |         cli_dbgmsg("cli_pdf: found xref\n"); | 
139  | 395  |         return 0;  | 
140  | 395  |     }  | 
141  |  |  | 
142  |  |     /* could be xref stream */  | 
143  | 45.0M  |     for (q = xref; q + 5 < eof; q++) { | 
144  | 45.0M  |         if (!memcmp(q, "/XRef", strlen("/XRef"))) { | 
145  | 4.32k  |             cli_dbgmsg("cli_pdf: found /XRef\n"); | 
146  | 4.32k  |             return 0;  | 
147  | 4.32k  |         }  | 
148  | 45.0M  |     }  | 
149  |  |  | 
150  | 20.0k  |     return -1;  | 
151  | 24.4k  | }  | 
152  |  |  | 
153  |  | /* define this to be noisy about things that we can't parse properly */  | 
154  |  | #undef NOISY  | 
155  |  |  | 
156  |  | #ifdef NOISY  | 
157  |  | #define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)  | 
158  |  | #define noisy_warnmsg(...) cli_warnmsg(__VA_ARGS__)  | 
159  |  | #else  | 
160  |  | #define noisy_msg(pdf, ...)  | 
161  |  | #define noisy_warnmsg(...)  | 
162  |  | #endif  | 
163  |  |  | 
164  |  | /**  | 
165  |  |  * @brief   Searching BACKwards, find the next character that is not a whitespace.  | 
166  |  |  *  | 
167  |  |  * @param q         Index to start from (at the end of the search space)  | 
168  |  |  * @param start     Beginning of the search space.  | 
169  |  |  *  | 
170  |  |  * @return const char*  Address of the final non-whitespace character OR the same address as the start.  | 
171  |  |  */  | 
172  |  | static const char *findNextNonWSBack(const char *q, const char *start)  | 
173  | 3.16M  | { | 
174  | 3.59M  |     while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))  | 
175  | 435k  |         q--;  | 
176  |  |  | 
177  | 3.16M  |     return q;  | 
178  | 3.16M  | }  | 
179  |  |  | 
180  |  | /**  | 
181  |  |  * @brief   Searching FORwards, find the next character that is not a whitespace.  | 
182  |  |  *  | 
183  |  |  * @param q         Index to start from (at the end of the search space)  | 
184  |  |  * @param end       End of the search space.  | 
185  |  |  *  | 
186  |  |  * @return const char*  Address of the final non-whitespace character OR the same address as the start.  | 
187  |  |  */  | 
188  |  | static const char *findNextNonWS(const char *q, const char *end)  | 
189  | 718k  | { | 
190  | 1.46M  |     while (q < end && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))  | 
191  | 741k  |         q++;  | 
192  |  |  | 
193  | 718k  |     return q;  | 
194  | 718k  | }  | 
195  |  |  | 
196  |  | /**  | 
197  |  |  * @brief   Find bounds of stream.  | 
198  |  |  *  | 
199  |  |  * PDF streams are prefixed with "stream" and suffixed with "endstream".  | 
200  |  |  * Return value indicates success or failure.  | 
201  |  |  *  | 
202  |  |  * @param start             start address of search space.  | 
203  |  |  * @param size              size of search space  | 
204  |  |  * @param[out] stream       output param, address of start of stream data  | 
205  |  |  * @param[out] stream_size  output param, size of stream data  | 
206  |  |  * @param newline_hack      hack to support newlines that are \r\n, and not just \n or just \r.  | 
207  |  |  *  | 
208  |  |  * @return cl_error_t       CL_SUCCESS if stream bounds were found.  | 
209  |  |  * @return cl_error_t       CL_BREAK if stream bounds could not be found.  | 
210  |  |  * @return cl_error_t       CL_EFORMAT if stream start was found, but not end. (truncated)  | 
211  |  |  * @return cl_error_t       CL_EARG if invalid args were provided.  | 
212  |  |  */  | 
213  |  | static cl_error_t find_stream_bounds(  | 
214  |  |     const char *start,  | 
215  |  |     size_t size,  | 
216  |  |     const char **stream,  | 
217  |  |     size_t *stream_size,  | 
218  |  |     int newline_hack)  | 
219  | 1.35M  | { | 
220  | 1.35M  |     cl_error_t status = CL_BREAK;  | 
221  |  |  | 
222  | 1.35M  |     const char *idx;  | 
223  | 1.35M  |     const char *stream_begin;  | 
224  | 1.35M  |     const char *endstream_begin;  | 
225  | 1.35M  |     size_t bytesleft = size;  | 
226  |  |  | 
227  | 1.35M  |     if ((NULL == start) || (0 == bytesleft) || (NULL == stream) || (NULL == stream_size)) { | 
228  | 0  |         status = CL_EARG;  | 
229  | 0  |         return status;  | 
230  | 0  |     }  | 
231  |  |  | 
232  | 1.35M  |     *stream      = NULL;  | 
233  | 1.35M  |     *stream_size = 0;  | 
234  |  |  | 
235  |  |     /* Begin by finding the "stream" string that prefixes stream data. */  | 
236  | 1.35M  |     if ((stream_begin = cli_memstr(start, bytesleft, "stream", strlen("stream")))) { | 
237  | 713k  |         idx = stream_begin + strlen("stream"); | 
238  | 713k  |         if ((size_t)(idx - start) >= bytesleft)  | 
239  | 1.21k  |             goto done;  | 
240  | 712k  |         bytesleft -= idx - start;  | 
241  |  |  | 
242  |  |         /* Skip any new line characters. */  | 
243  | 712k  |         if (bytesleft >= 2 && idx[0] == '\xd' && idx[1] == '\xa') { | 
244  | 296k  |             idx += 2;  | 
245  | 296k  |             bytesleft -= 2;  | 
246  | 296k  |             if (newline_hack && (bytesleft > 2) && idx[0] == '\xa') { | 
247  | 592  |                 idx++;  | 
248  | 592  |                 bytesleft--;  | 
249  | 592  |             }  | 
250  | 415k  |         } else if (bytesleft && idx[0] == '\xa') { | 
251  | 68.8k  |             idx++;  | 
252  | 68.8k  |             bytesleft--;  | 
253  | 68.8k  |         }  | 
254  |  |  | 
255  |  |         /* Pass back start of the stream data. */  | 
256  | 712k  |         *stream = idx;  | 
257  |  |  | 
258  |  |         /* Now find the "endstream" string that suffixes stream data. */  | 
259  | 712k  |         endstream_begin = cli_memstr(idx, bytesleft, "endstream", strlen("endstream")); | 
260  | 712k  |         if (!endstream_begin) { | 
261  |  |             /* Couldn't find "endstream", but that's ok --  | 
262  |  |              * -- we'll just count the rest of the provided buffer. */  | 
263  | 520k  |             cli_dbgmsg("find_stream_bounds: Truncated stream found!\n"); | 
264  | 520k  |             endstream_begin = start + size;  | 
265  | 520k  |             status          = CL_EFORMAT;  | 
266  | 520k  |         }  | 
267  |  |  | 
268  |  |         /* Pass back end of the stream data, as offset from start. */  | 
269  | 712k  |         *stream_size = endstream_begin - *stream;  | 
270  |  |  | 
271  | 712k  |         if (CL_EFORMAT != status)  | 
272  | 191k  |             status = CL_SUCCESS;  | 
273  | 712k  |     }  | 
274  |  |  | 
275  | 1.35M  | done:  | 
276  |  |  | 
277  | 1.35M  |     return status;  | 
278  | 1.35M  | }  | 
279  |  |  | 
280  |  | /**  | 
281  |  |  * @brief Find the next *indirect* object in an object stream, adds it to our list of  | 
282  |  |  *        objects, and increments nobj.  | 
283  |  |  *  | 
284  |  |  * Indirect objects in a stream DON'T begin with "obj" and end with "endobj".  | 
285  |  |  * Instead, they have an objid and an offset from the first object to point you  | 
286  |  |  * right at them.  | 
287  |  |  *  | 
288  |  |  * If found, objstm->current will be updated to the next objid.  | 
289  |  |  *  | 
290  |  |  * All objects in an object stream are indirect and thus do not begin or start  | 
291  |  |  * with "obj" or "endobj".  Instead, the object stream takes the following  | 
292  |  |  * format.  | 
293  |  |  *  | 
294  |  |  *      <dictionary describing stream> objstm content endobjstm  | 
295  |  |  *  | 
296  |  |  * where content looks something like the following:  | 
297  |  |  *  | 
298  |  |  *      15 0 16 3 17 46 (ab)<</IDS 8 0 R/JavaScript 27 0 R/URLS 9 0 R>><</Names[(Test)28 0 R]>>  | 
299  |  |  *  | 
300  |  |  * In the above example, the literal string (ab) is indirect object # 15, and  | 
301  |  |  * begins at offset 0 of the set of objects.  The next object, # 16 begis at  | 
302  |  |  * offset 3 is a dictionary.  The final object is also a dictionary, beginning  | 
303  |  |  * at offset 46.  | 
304  |  |  *  | 
305  |  |  * @param pdf   Pdf struct that keeps track of all information found in the PDF.  | 
306  |  |  * @param objstm  | 
307  |  |  *  | 
308  |  |  * @return CL_SUCCESS  if success  | 
309  |  |  * @return CL_EPARSE   if parsing error  | 
310  |  |  * @return CL_EMEM     if error allocating memory  | 
311  |  |  * @return CL_EARG     if invalid arguments  | 
312  |  |  */  | 
313  |  | int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm, struct pdf_obj **obj_found)  | 
314  | 254k  | { | 
315  | 254k  |     cl_error_t status   = CL_EPARSE;  | 
316  | 254k  |     struct pdf_obj *obj = NULL;  | 
317  | 254k  |     unsigned long objid = 0, objoff = 0;  | 
318  | 254k  |     long temp_long         = 0;  | 
319  | 254k  |     const char *index      = NULL;  | 
320  | 254k  |     size_t bytes_remaining = 0;  | 
321  |  |  | 
322  | 254k  |     if (NULL == pdf || NULL == objstm) { | 
323  | 0  |         cli_warnmsg("pdf_findobj_in_objstm: invalid arguments\n"); | 
324  | 0  |         return CL_EARG;  | 
325  | 0  |     }  | 
326  |  |  | 
327  | 254k  |     if (pdf->nobjs >= MAX_PDF_OBJECTS) { | 
328  | 0  |         pdf->flags |= 1 << BAD_PDF_TOOMANYOBJS;  | 
329  |  | 
  | 
330  | 0  |         cli_dbgmsg("pdf_findobj_in_objstm: reached object maximum\n"); | 
331  | 0  |         status = CL_BREAK;  | 
332  | 0  |         goto done;  | 
333  | 0  |     }  | 
334  |  |  | 
335  | 254k  |     *obj_found = NULL;  | 
336  |  |  | 
337  | 254k  |     index           = objstm->streambuf + objstm->current_pair;  | 
338  | 254k  |     bytes_remaining = objstm->streambuf_len - objstm->current_pair;  | 
339  |  |  | 
340  | 254k  |     obj = calloc(sizeof(struct pdf_obj), 1);  | 
341  | 254k  |     if (!obj) { | 
342  | 0  |         cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n"); | 
343  | 0  |         status = CL_EMEM;  | 
344  | 0  |         goto done;  | 
345  | 0  |     }  | 
346  |  |  | 
347  |  |     /* This object is in a stream, not in the regular map buffer. */  | 
348  | 254k  |     obj->objstm = objstm;  | 
349  |  |  | 
350  |  |     /* objstm->current_pair points directly to the objid */  | 
351  | 254k  |     if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
352  |  |         /* Failed to find objid */  | 
353  | 4.23k  |         cli_dbgmsg("pdf_findobj_in_objstm: Failed to find objid for obj in object stream\n"); | 
354  | 4.23k  |         status = CL_EPARSE;  | 
355  | 4.23k  |         goto done;  | 
356  | 250k  |     } else if (temp_long < 0) { | 
357  | 122  |         cli_dbgmsg("pdf_findobj_in_objstm: Encountered invalid negative objid (%ld).\n", temp_long); | 
358  | 122  |         status = CL_EPARSE;  | 
359  | 122  |         goto done;  | 
360  | 122  |     }  | 
361  | 249k  |     objid = (unsigned long)temp_long;  | 
362  |  |  | 
363  |  |     /* Find the obj offset that appears just after the objid*/  | 
364  | 726k  |     while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { | 
365  | 476k  |         index++;  | 
366  | 476k  |         bytes_remaining--;  | 
367  | 476k  |     }  | 
368  | 249k  |     index           = findNextNonWS(index, objstm->streambuf + objstm->first);  | 
369  | 249k  |     bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;  | 
370  |  |  | 
371  | 249k  |     if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
372  |  |         /* Failed to find obj offset */  | 
373  | 495  |         cli_dbgmsg("pdf_findobj_in_objstm: Failed to find obj offset for obj in object stream\n"); | 
374  | 495  |         status = CL_EPARSE;  | 
375  | 495  |         goto done;  | 
376  | 249k  |     } else if (temp_long < 0) { | 
377  | 646  |         cli_dbgmsg("pdf_findobj_in_objstm: Encountered invalid negative obj offset (%ld).\n", temp_long); | 
378  | 646  |         status = CL_EPARSE;  | 
379  | 646  |         goto done;  | 
380  | 646  |     }  | 
381  | 248k  |     objoff = (unsigned long)temp_long;  | 
382  |  |  | 
383  | 248k  |     if ((size_t)objstm->first + (size_t)objoff > objstm->streambuf_len) { | 
384  |  |         /* Alleged obj location is further than the length of the stream */  | 
385  | 971  |         cli_dbgmsg("pdf_findobj_in_objstm: obj offset found is greater than the length of the stream.\n"); | 
386  | 971  |         status = CL_EPARSE;  | 
387  | 971  |         goto done;  | 
388  | 971  |     }  | 
389  |  |  | 
390  | 247k  |     objstm->current = objstm->first + objoff;  | 
391  |  |  | 
392  | 247k  |     obj->id    = (objid << 8) | (0 & 0xff);  | 
393  | 247k  |     obj->start = objstm->current;  | 
394  | 247k  |     obj->flags = 0;  | 
395  |  |  | 
396  | 247k  |     objstm->nobjs_found++;  | 
397  |  |  | 
398  | 1.08M  |     while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { | 
399  | 832k  |         index++;  | 
400  | 832k  |         bytes_remaining--;  | 
401  | 832k  |     }  | 
402  | 247k  |     objstm->current_pair = (uint32_t)(findNextNonWS(index, objstm->streambuf + objstm->first) - objstm->streambuf);  | 
403  |  |  | 
404  |  |     /* Update current_pair, if there are more */  | 
405  | 247k  |     if ((objstm->nobjs_found < objstm->n) &&  | 
406  | 247k  |         (index < objstm->streambuf + objstm->streambuf_len)) { | 
407  | 221k  |         unsigned long next_objoff = 0;  | 
408  |  |  | 
409  |  |         /*  | 
410  |  |          * While we're at it,  | 
411  |  |          *   lets record the size as running up to the next object offset.  | 
412  |  |          *  | 
413  |  |          * To do so, we will need to parse the next obj pair.  | 
414  |  |          */  | 
415  |  |         /* objstm->current_pair points directly to the objid */  | 
416  | 221k  |         index           = objstm->streambuf + objstm->current_pair;  | 
417  | 221k  |         bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;  | 
418  |  |  | 
419  |  |         /* We don't actually care about the object id at this point, so reading the object id is commented out.  | 
420  |  |            I didn't delete it entirely in case the object id is needed in the future. */  | 
421  |  |         // if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
422  |  |         //     /* Failed to find objid for next obj */  | 
423  |  |         //     cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next objid for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); | 
424  |  |         //     status = CL_EPARSE;  | 
425  |  |         //     goto done;  | 
426  |  |         // } else if (temp_long < 0) { | 
427  |  |         //     cli_dbgmsg("pdf_findobj_in_objstm: Encountered invalid negative objid (%ld).\n", temp_long); | 
428  |  |         //     status = CL_EPARSE;  | 
429  |  |         //     goto done;  | 
430  |  |         // }  | 
431  |  |         // next_objid = (unsigned long)temp_long;  | 
432  |  |  | 
433  |  |         /* Find the obj offset that appears just after the objid*/  | 
434  | 669k  |         while ((index < objstm->streambuf + objstm->streambuf_len) && isdigit(*index)) { | 
435  | 448k  |             index++;  | 
436  | 448k  |             bytes_remaining--;  | 
437  | 448k  |         }  | 
438  | 221k  |         index           = findNextNonWS(index, objstm->streambuf + objstm->first);  | 
439  | 221k  |         bytes_remaining = objstm->streambuf + objstm->streambuf_len - index;  | 
440  |  |  | 
441  | 221k  |         if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
442  |  |             /* Failed to find obj offset for next obj */  | 
443  | 4.10k  |             cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); | 
444  | 4.10k  |             status = CL_EPARSE;  | 
445  | 4.10k  |             goto done;  | 
446  | 217k  |         } else if (temp_long < 0) { | 
447  | 399  |             cli_dbgmsg("pdf_findobj_in_objstm: Encountered invalid negative obj offset (%ld).\n", temp_long); | 
448  | 399  |             status = CL_EPARSE;  | 
449  | 399  |             goto done;  | 
450  | 399  |         }  | 
451  | 216k  |         next_objoff = (unsigned long)temp_long;  | 
452  |  |  | 
453  | 216k  |         if (next_objoff <= objoff) { | 
454  |  |             /* Failed to find obj offset for next obj */  | 
455  | 938  |             cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's less than or equal to the current one!\n"); | 
456  | 938  |             status = CL_EPARSE;  | 
457  | 938  |             goto done;  | 
458  | 215k  |         } else if (objstm->first + next_objoff > objstm->streambuf_len) { | 
459  |  |             /* Failed to find obj offset for next obj */  | 
460  | 2.88k  |             cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's further out than the size of the stream!\n"); | 
461  | 2.88k  |             status = CL_EPARSE;  | 
462  | 2.88k  |             goto done;  | 
463  | 2.88k  |         }  | 
464  |  |  | 
465  | 212k  |         obj->size = next_objoff - objoff;  | 
466  | 212k  |     } else { | 
467  |  |         /*  | 
468  |  |          * Should be no more objects. We should verify.  | 
469  |  |          *  | 
470  |  |          * Either way...  | 
471  |  |          *   obj->size should be the rest of the buffer.  | 
472  |  |          */  | 
473  | 26.6k  |         if (objstm->nobjs_found < objstm->n) { | 
474  | 225  |             cli_warnmsg("pdf_findobj_in_objstm: Fewer objects found in object stream than expected!\n"); | 
475  | 225  |         }  | 
476  |  |  | 
477  | 26.6k  |         obj->size = objstm->streambuf_len - obj->start;  | 
478  | 26.6k  |     }  | 
479  |  |  | 
480  |  |     /* Success! Add the object to the list of all objects found. */  | 
481  | 239k  |     pdf->nobjs++;  | 
482  | 239k  |     CLI_MAX_REALLOC_OR_GOTO_DONE(pdf->objs, sizeof(struct pdf_obj *) * pdf->nobjs,  | 
483  | 239k  |                                  cli_warnmsg("pdf_findobj_in_objstm: out of memory finding objects in stream\n"), | 
484  | 239k  |                                  status = CL_EMEM);  | 
485  | 239k  |     pdf->objs[pdf->nobjs - 1] = obj;  | 
486  |  |  | 
487  | 239k  |     *obj_found = obj;  | 
488  |  |  | 
489  | 239k  |     status = CL_SUCCESS;  | 
490  |  |  | 
491  | 254k  | done:  | 
492  | 254k  |     if (CL_SUCCESS != status) { | 
493  | 14.7k  |         if (NULL != obj) { | 
494  | 14.7k  |             free(obj);  | 
495  | 14.7k  |         }  | 
496  | 14.7k  |     }  | 
497  | 254k  |     return status;  | 
498  | 239k  | }  | 
499  |  |  | 
500  |  | /**  | 
501  |  |  * @brief Find the next *indirect* object.  | 
502  |  |  *  | 
503  |  |  * Indirect objects located outside of an object stream are prefaced with:  | 
504  |  |  *      <objid> <genid> obj  | 
505  |  |  *  | 
506  |  |  * Each of the above are separated by whitespace of some sort.  | 
507  |  |  *  | 
508  |  |  * Indirect objects are postfaced with:  | 
509  |  |  *      endobj  | 
510  |  |  *  | 
511  |  |  * The specification does not say if whitespace is required before or after "endobj".  | 
512  |  |  *  | 
513  |  |  * Identify truncated objects.  | 
514  |  |  *  | 
515  |  |  * If found, pdf->offset will be updated to just after the "endobj".  | 
516  |  |  * If truncated, pdf->offset will == pdf->size.  | 
517  |  |  * If not found, pdf->offset will not be updated.  | 
518  |  |  *  | 
519  |  |  * @param pdf   Pdf context struct that keeps track of all information found in the PDF.  | 
520  |  |  *  | 
521  |  |  * @return CL_SUCCESS  if success  | 
522  |  |  * @return CL_BREAK    if no more objects  | 
523  |  |  * @return CL_EPARSE   if parsing error  | 
524  |  |  * @return CL_EMEM     if error allocating memory  | 
525  |  |  */  | 
526  |  | cl_error_t pdf_findobj(struct pdf_struct *pdf)  | 
527  | 2.08M  | { | 
528  | 2.08M  |     cl_error_t status = CL_EPARSE;  | 
529  | 2.08M  |     const char *start, *idx, *genid_search_index, *objid_search_index;  | 
530  |  |  | 
531  | 2.08M  |     const char *obj_begin = NULL, *obj_end = NULL;  | 
532  | 2.08M  |     const char *endobj_begin = NULL, *endobj_end = NULL;  | 
533  |  |  | 
534  | 2.08M  |     struct pdf_obj *obj = NULL;  | 
535  | 2.08M  |     size_t bytesleft;  | 
536  | 2.08M  |     unsigned long genid, objid;  | 
537  | 2.08M  |     long temp_long;  | 
538  |  |  | 
539  | 2.08M  |     if (pdf->nobjs >= MAX_PDF_OBJECTS) { | 
540  | 0  |         pdf->flags |= 1 << BAD_PDF_TOOMANYOBJS;  | 
541  |  | 
  | 
542  | 0  |         cli_dbgmsg("pdf_findobj: reached object maximum\n"); | 
543  | 0  |         status = CL_BREAK;  | 
544  | 0  |         goto done;  | 
545  | 0  |     }  | 
546  | 2.08M  |     pdf->nobjs++;  | 
547  | 2.08M  |     CLI_MAX_REALLOC_OR_GOTO_DONE(pdf->objs, sizeof(struct pdf_obj *) * pdf->nobjs, status = CL_EMEM);  | 
548  |  |  | 
549  | 2.08M  |     obj = malloc(sizeof(struct pdf_obj));  | 
550  | 2.08M  |     if (!obj) { | 
551  | 0  |         status = CL_EMEM;  | 
552  | 0  |         goto done;  | 
553  | 0  |     }  | 
554  | 2.08M  |     pdf->objs[pdf->nobjs - 1] = obj;  | 
555  |  |  | 
556  | 2.08M  |     memset(obj, 0, sizeof(*obj));  | 
557  |  |  | 
558  | 2.08M  |     start     = pdf->map + pdf->offset;  | 
559  | 2.08M  |     bytesleft = pdf->size - pdf->offset;  | 
560  |  |  | 
561  |  |     /*  | 
562  |  |      * Start by searching for "obj"  | 
563  |  |      */  | 
564  | 2.08M  |     idx = start + 1;  | 
565  | 2.27M  |     while (bytesleft > 1 + strlen("obj")) { | 
566  |  |         /* `- 1` accounts for size of white space before obj */  | 
567  | 1.91M  |         idx = cli_memstr(idx, bytesleft - 1, "obj", strlen("obj")); | 
568  | 1.91M  |         if (NULL == idx) { | 
569  | 70.7k  |             status = CL_BREAK;  | 
570  | 70.7k  |             goto done; /* No more objs. */  | 
571  | 70.7k  |         }  | 
572  |  |  | 
573  |  |         /* verify that the word has a whitespace before it, and is not the end of  | 
574  |  |          * a previous word */  | 
575  | 1.84M  |         idx--;  | 
576  | 1.84M  |         bytesleft = (pdf->size - pdf->offset) - (size_t)(idx - start);  | 
577  |  |  | 
578  | 1.84M  |         if (*idx != 0 && *idx != 9 && *idx != 0xa && *idx != 0xc && *idx != 0xd && *idx != 0x20) { | 
579  |  |             /* This instance of "obj" appears to be part of a longer string.  | 
580  |  |              * Skip it, and keep searching for an object. */  | 
581  | 195k  |             idx += 1 + strlen("obj"); | 
582  | 195k  |             bytesleft -= 1 + strlen("obj"); | 
583  | 195k  |             continue;  | 
584  | 195k  |         }  | 
585  |  |  | 
586  |  |         /* Found the beginning of the word */  | 
587  | 1.64M  |         obj_begin = idx;  | 
588  | 1.64M  |         obj_end   = idx + 1 + strlen("obj"); | 
589  |  |  | 
590  | 1.64M  |         break;  | 
591  | 1.84M  |     }  | 
592  |  |  | 
593  | 2.01M  |     if ((NULL == obj_begin) || (NULL == obj_end)) { | 
594  | 362k  |         status = CL_BREAK;  | 
595  | 362k  |         goto done; /* No more objs. */  | 
596  | 362k  |     }  | 
597  |  |  | 
598  |  |     /* Find the generation id (genid) that appears before the "obj" */  | 
599  | 1.64M  |     genid_search_index = findNextNonWSBack(obj_begin - 1, start);  | 
600  | 3.59M  |     while (genid_search_index > start && isdigit(*genid_search_index))  | 
601  | 1.94M  |         genid_search_index--;  | 
602  |  |  | 
603  | 1.64M  |     if (CL_SUCCESS != cli_strntol_wrap(genid_search_index, (size_t)((obj_begin)-genid_search_index), 0, 10, &temp_long)) { | 
604  | 109k  |         cli_dbgmsg("pdf_findobj: Failed to parse object genid (# objects found: %u)\n", pdf->nobjs); | 
605  |  |         /* Failed to parse, probably not a real object.  Skip past the "obj" thing, and continue. */  | 
606  | 109k  |         pdf->offset = obj_end - pdf->map;  | 
607  | 109k  |         status      = CL_EPARSE;  | 
608  | 109k  |         goto done;  | 
609  | 1.53M  |     } else if (temp_long < 0) { | 
610  | 26.4k  |         cli_dbgmsg("pdf_findobj: Encountered invalid negative obj genid (%ld).\n", temp_long); | 
611  | 26.4k  |         pdf->offset = obj_end - pdf->map;  | 
612  | 26.4k  |         status      = CL_EPARSE;  | 
613  | 26.4k  |         goto done;  | 
614  | 26.4k  |     }  | 
615  | 1.51M  |     genid = (unsigned long)temp_long;  | 
616  |  |  | 
617  |  |     /* Find the object id (objid) that appears before the genid */  | 
618  | 1.51M  |     objid_search_index = findNextNonWSBack(genid_search_index - 1, start);  | 
619  | 4.01M  |     while (objid_search_index > start && isdigit(*objid_search_index))  | 
620  | 2.50M  |         objid_search_index--;  | 
621  |  |  | 
622  | 1.51M  |     if (CL_SUCCESS != cli_strntol_wrap(objid_search_index, (size_t)((genid_search_index)-objid_search_index), 0, 10, &temp_long)) { | 
623  |  |         /*  | 
624  |  |          * Edge case:  | 
625  |  |          *  | 
626  |  |          * PDFs with multiple revisions will have %%EOF before the end of the file,  | 
627  |  |          * followed by the next revision of the PDF, which will probably be an immediate objid.  | 
628  |  |          *  | 
629  |  |          * Example:  | 
630  |  |          *   %%EOF1 1 obj <blah> endobj  | 
631  |  |          *  | 
632  |  |          * If this is the case, we can detect it and continue parsing after the %%EOF.  | 
633  |  |          */  | 
634  | 146k  |         if (objid_search_index - strlen("%%EO") > start) { | 
635  | 113k  |             const char *lastfile = objid_search_index - strlen("%%EO"); | 
636  | 113k  |             if (0 != strncmp(lastfile, "%%EOF", 5)) { | 
637  |  |                 /* Nope, wasn't %%EOF */  | 
638  | 112k  |                 cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); | 
639  |  |                 /* Skip past the "obj" thing, and continue. */  | 
640  | 112k  |                 pdf->offset = obj_end - pdf->map;  | 
641  | 112k  |                 status      = CL_EPARSE;  | 
642  | 112k  |                 goto done;  | 
643  | 112k  |             }  | 
644  |  |             /* Yup, Looks, like the file continues after %%EOF.  | 
645  |  |              * Probably another revision.  Keep parsing... */  | 
646  | 1.07k  |             objid_search_index++;  | 
647  | 1.07k  |             cli_dbgmsg("pdf_findobj: %%%%EOF detected before end of file, at offset: %zu\n", (size_t)(objid_search_index - pdf->map)); | 
648  | 33.1k  |         } else { | 
649  |  |             /* Failed parsing at the very beginning */  | 
650  | 33.1k  |             cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); | 
651  |  |             /* Probably not a real object.  Skip past the "obj" thing, and continue. */  | 
652  | 33.1k  |             pdf->offset = obj_end - pdf->map;  | 
653  | 33.1k  |             status      = CL_EPARSE;  | 
654  | 33.1k  |             goto done;  | 
655  | 33.1k  |         }  | 
656  |  |         /* Try again, with offset slightly adjusted */  | 
657  | 1.07k  |         if (CL_SUCCESS != cli_strntol_wrap(objid_search_index, (size_t)((genid_search_index - 1) - objid_search_index), 0, 10, &temp_long)) { | 
658  | 834  |             cli_dbgmsg("pdf_findobj: Failed to parse object objid (# objects found: %u)\n", pdf->nobjs); | 
659  |  |             /* Still failed... Probably not a real object.  Skip past the "obj" thing, and continue. */  | 
660  | 834  |             pdf->offset = obj_end - pdf->map;  | 
661  | 834  |             status      = CL_EPARSE;  | 
662  | 834  |             goto done;  | 
663  | 834  |         } else if (temp_long < 0) { | 
664  | 0  |             cli_dbgmsg("pdf_findobj: Encountered invalid negative objid (%ld).\n", temp_long); | 
665  | 0  |             pdf->offset = obj_end - pdf->map;  | 
666  | 0  |             status      = CL_EPARSE;  | 
667  | 0  |             goto done;  | 
668  | 0  |         }  | 
669  |  |  | 
670  | 236  |         cli_dbgmsg("pdf_findobj: There appears to be an additional revision. Continuing to parse...\n"); | 
671  | 1.36M  |     } else if (temp_long < 0) { | 
672  | 15.6k  |         cli_dbgmsg("pdf_findobj: Encountered invalid negative objid (%ld).\n", temp_long); | 
673  | 15.6k  |         pdf->offset = obj_end - pdf->map;  | 
674  | 15.6k  |         status      = CL_EPARSE;  | 
675  | 15.6k  |         goto done;  | 
676  | 15.6k  |     }  | 
677  | 1.35M  |     objid = (unsigned long)temp_long;  | 
678  |  |  | 
679  | 1.35M  |     obj->id    = (objid << 8) | (genid & 0xff);  | 
680  | 1.35M  |     obj->start = obj_end - pdf->map; /* obj start begins just after the "obj" string */  | 
681  | 1.35M  |     obj->flags = 0;  | 
682  |  |  | 
683  |  |     /*  | 
684  |  |      * We now have the objid, genid, and object start.  | 
685  |  |      * Find the object end ("endobj"). | 
686  |  |      */  | 
687  |  |     /* `- 1` accounts for size of white space before obj */  | 
688  | 1.35M  |     endobj_begin = cli_memstr(obj_end, pdf->map + pdf->size - obj_end, "endobj", strlen("endobj")); | 
689  | 1.35M  |     if (NULL == endobj_begin) { | 
690  |  |         /* No end to object.  | 
691  |  |          * PDF appears to be malformed or truncated.  | 
692  |  |          * Will record the object size as going ot the end of the file.  | 
693  |  |          * Will record that the object is truncated.  | 
694  |  |          * Will position the pdf offset to the end of the PDF.  | 
695  |  |          * The next iteration of this function will find no more objects. */  | 
696  | 352k  |         obj->flags |= 1 << OBJ_TRUNCATED;  | 
697  | 352k  |         obj->size   = (pdf->map + pdf->size) - obj_end;  | 
698  | 352k  |         pdf->offset = pdf->size;  | 
699  |  |  | 
700  |  |         /* Truncated "object" found! */  | 
701  | 352k  |         status = CL_SUCCESS;  | 
702  | 352k  |         goto done;  | 
703  | 352k  |     }  | 
704  | 998k  |     endobj_end = endobj_begin + strlen("endobj"); | 
705  |  |  | 
706  |  |     /* Size of the object goes from "obj" <-> "endobject". */  | 
707  | 998k  |     obj->size   = endobj_begin - obj_end;  | 
708  | 998k  |     pdf->offset = endobj_end - pdf->map;  | 
709  |  |  | 
710  |  |     /*  | 
711  |  |      * Object found!  | 
712  |  |      */  | 
713  | 998k  |     status = CL_SUCCESS; /* truncated file, no end to obj. */  | 
714  |  |  | 
715  | 2.08M  | done:  | 
716  | 2.08M  |     if (status == CL_SUCCESS) { | 
717  | 1.35M  |         cli_dbgmsg("pdf_findobj: found %d %d obj @%lld, size: %zu bytes.\n", obj->id >> 8, obj->id & 0xff, (long long)(obj->start + pdf->startoff), obj->size); | 
718  | 1.35M  |     } else { | 
719  |  |         /* Remove the unused obj reference from our list of objects found */  | 
720  |  |         /* No need to realloc pdf->objs back down.  It won't leak. */  | 
721  | 732k  |         pdf->objs[pdf->nobjs - 1] = NULL;  | 
722  | 732k  |         pdf->nobjs--;  | 
723  |  |  | 
724  |  |         /* Free up the obj struct. */  | 
725  | 732k  |         if (NULL != obj)  | 
726  | 732k  |             free(obj);  | 
727  |  |  | 
728  | 732k  |         if (status == CL_BREAK) { | 
729  | 433k  |             cli_dbgmsg("pdf_findobj: No more objects (# objects found: %u)\n", pdf->nobjs); | 
730  | 433k  |         } else if (status == CL_EMEM) { | 
731  | 0  |             cli_warnmsg("pdf_findobj: Error allocating memory (# objects found: %u)\n", pdf->nobjs); | 
732  | 298k  |         } else { | 
733  | 298k  |             cli_dbgmsg("pdf_findobj: Unexpected status code %d.\n", status); | 
734  | 298k  |         }  | 
735  | 732k  |     }  | 
736  |  |  | 
737  | 2.08M  |     return status;  | 
738  | 998k  | }  | 
739  |  |  | 
740  |  | static size_t filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, size_t len, size_t *sum)  | 
741  | 330k  | { | 
742  | 330k  |     UNUSEDPARAM(obj);  | 
743  |  |  | 
744  | 330k  |     if (cli_checklimits("pdf", pdf->ctx, (uint64_t)*sum, 0, 0)) | 
745  | 7.16k  |         return len;  | 
746  |  |  | 
747  | 323k  |     *sum += len;  | 
748  |  |  | 
749  | 323k  |     return cli_writen(fout, buf, len);  | 
750  | 330k  | }  | 
751  |  |  | 
752  |  | void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag)  | 
753  | 1.19M  | { | 
754  | 1.19M  |     const char *s = "";  | 
755  | 1.19M  |     pdf->flags |= 1 << flag;  | 
756  | 1.19M  |     if (!cli_debug_flag)  | 
757  | 1.19M  |         return;  | 
758  |  |  | 
759  | 0  |     switch (flag) { | 
760  | 0  |         case UNTERMINATED_OBJ_DICT:  | 
761  | 0  |             s = "dictionary not terminated";  | 
762  | 0  |             break;  | 
763  | 0  |         case ESCAPED_COMMON_PDFNAME:  | 
764  |  |             /* like /JavaScript */  | 
765  | 0  |             s = "escaped common pdfname";  | 
766  | 0  |             break;  | 
767  | 0  |         case BAD_STREAM_FILTERS:  | 
768  | 0  |             s = "duplicate stream filters";  | 
769  | 0  |             break;  | 
770  | 0  |         case BAD_PDF_VERSION:  | 
771  | 0  |             s = "bad pdf version";  | 
772  | 0  |             break;  | 
773  | 0  |         case BAD_PDF_HEADERPOS:  | 
774  | 0  |             s = "bad pdf header position";  | 
775  | 0  |             break;  | 
776  | 0  |         case BAD_PDF_TRAILER:  | 
777  | 0  |             s = "bad pdf trailer";  | 
778  | 0  |             break;  | 
779  | 0  |         case BAD_PDF_TOOMANYOBJS:  | 
780  | 0  |             s = "too many pdf objs";  | 
781  | 0  |             break;  | 
782  | 0  |         case BAD_FLATE:  | 
783  | 0  |             s = "bad deflate stream";  | 
784  | 0  |             break;  | 
785  | 0  |         case BAD_FLATESTART:  | 
786  | 0  |             s = "bad deflate stream start";  | 
787  | 0  |             break;  | 
788  | 0  |         case BAD_STREAMSTART:  | 
789  | 0  |             s = "bad stream start";  | 
790  | 0  |             break;  | 
791  | 0  |         case UNKNOWN_FILTER:  | 
792  | 0  |             s = "unknown filter used";  | 
793  | 0  |             break;  | 
794  | 0  |         case BAD_ASCIIDECODE:  | 
795  | 0  |             s = "bad ASCII decode";  | 
796  | 0  |             break;  | 
797  | 0  |         case HEX_JAVASCRIPT:  | 
798  | 0  |             s = "hex javascript";  | 
799  | 0  |             break;  | 
800  | 0  |         case BAD_INDOBJ:  | 
801  | 0  |             s = "referencing nonexistent obj";  | 
802  | 0  |             break;  | 
803  | 0  |         case HAS_OPENACTION:  | 
804  | 0  |             s = "has /OpenAction";  | 
805  | 0  |             break;  | 
806  | 0  |         case HAS_LAUNCHACTION:  | 
807  | 0  |             s = "has /LaunchAction";  | 
808  | 0  |             break;  | 
809  | 0  |         case BAD_STREAMLEN:  | 
810  | 0  |             s = "bad /Length, too small";  | 
811  | 0  |             break;  | 
812  | 0  |         case ENCRYPTED_PDF:  | 
813  | 0  |             s = "PDF is encrypted";  | 
814  | 0  |             break;  | 
815  | 0  |         case LINEARIZED_PDF:  | 
816  | 0  |             s = "linearized PDF";  | 
817  | 0  |             break;  | 
818  | 0  |         case MANY_FILTERS:  | 
819  | 0  |             s = "more than 2 filters per obj";  | 
820  | 0  |             break;  | 
821  | 0  |         case DECRYPTABLE_PDF:  | 
822  | 0  |             s = "decryptable PDF";  | 
823  | 0  |             break;  | 
824  | 0  |     }  | 
825  |  |  | 
826  | 0  |     cli_dbgmsg("pdfobj_flag: %s flagged in object %u %u\n", s, obj->id >> 8, obj->id & 0xff); | 
827  | 0  | }  | 
828  |  |  | 
829  |  | struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)  | 
830  | 339k  | { | 
831  | 339k  |     uint32_t j;  | 
832  | 339k  |     uint32_t i;  | 
833  |  |  | 
834  |  |     /* search starting at previous obj (if exists) */  | 
835  | 67.4M  |     for (i = 0; i < pdf->nobjs; i++) { | 
836  | 67.4M  |         if (pdf->objs[i] == obj)  | 
837  | 338k  |             break;  | 
838  | 67.4M  |     }  | 
839  |  |  | 
840  | 67.3M  |     for (j = i; j < pdf->nobjs; j++) { | 
841  | 67.0M  |         obj = pdf->objs[j];  | 
842  | 67.0M  |         if (obj->id == objid)  | 
843  | 43.4k  |             return obj;  | 
844  | 67.0M  |     }  | 
845  |  |  | 
846  |  |     /* restart search from beginning if not found */  | 
847  | 66.1M  |     for (j = 0; j < i; j++) { | 
848  | 65.8M  |         obj = pdf->objs[j];  | 
849  | 65.8M  |         if (obj->id == objid)  | 
850  | 4.80k  |             return obj;  | 
851  | 65.8M  |     }  | 
852  |  |  | 
853  | 290k  |     return NULL;  | 
854  | 295k  | }  | 
855  |  |  | 
856  |  | /**  | 
857  |  |  * @brief   Find and interpret the "/Length" dictionary key value.  | 
858  |  |  *  | 
859  |  |  * The value may be:  | 
860  |  |  *  - a direct object (i.e. just a number)  | 
861  |  |  *  - an indirect object, where the value is somewhere else in the document and we have to look it up.  | 
862  |  |  *    indirect objects are referenced using an object id (objid), generation id (genid) genid, and the letter 'R'.  | 
863  |  |  *  | 
864  |  |  * Example dictionary with a single key "/Length" that relies direct object for the value.  | 
865  |  |  *  | 
866  |  |  *      1 0 obj  | 
867  |  |  *          << /Length 534  | 
868  |  |  *              /Filter [ /ASCII85Decode /LZWDecode ]  | 
869  |  |  *          >>  | 
870  |  |  *          stream  | 
871  |  |  *              J..)6T`?p&<!J9%_[umg"B7/Z7KNXbN'S+,*Q/&"OLT'FLIDK#!n`$"<Atdi`\Vn%b%)&'cA*VnK\CJY(sF>c!Jnl@  | 
872  |  |  *              RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d&/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb  | 
873  |  |  *              Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1  | 
874  |  |  *              'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sDS]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+  | 
875  |  |  *              ":aAa'S`ViJglLb8<W9k6Yl\\0McJQkDeLWdPN?9A'jX*al>iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL,  | 
876  |  |  *              JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj88l8Q:_CZ(Gm1%X\N1&u!FKHMB~>  | 
877  |  |  *          endstream  | 
878  |  |  *      endobj  | 
879  |  |  *  | 
880  |  |  * Example dictionary with a single key "/Length" that relies on an indirect object for the value.  | 
881  |  |  *  | 
882  |  |  *      7 0 obj  | 
883  |  |  *          << /Length 8 0 R >> % An indirect reference to object 8, with generation id 0.  | 
884  |  |  *          stream  | 
885  |  |  *              BT  | 
886  |  |  *                  /F1 12 Tf  | 
887  |  |  *                   72 712 Td  | 
888  |  |  *                  ( A stream with an indirect length ) Tj  | 
889  |  |  *              ET  | 
890  |  |  *          endstream  | 
891  |  |  *      endobj  | 
892  |  |  *  | 
893  |  |  *      8 0 obj  | 
894  |  |  *          77 % The length of the preceding stream  | 
895  |  |  *      endobj  | 
896  |  |  *  | 
897  |  |  * @param pdf       Pdf context structure.  | 
898  |  |  * @param obj       Pdf object context structure.  | 
899  |  |  * @param start     Pointer start of the dictionary string.  | 
900  |  |  * @param len       Remaining length of the dictioary string in bytes.  | 
901  |  |  * @return size_t   Unsigned integer value of the "/Length" key  | 
902  |  |  */  | 
903  |  | static size_t find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *dict_start, size_t dict_len)  | 
904  | 674k  | { | 
905  | 674k  |     size_t length          = 0;  | 
906  | 674k  |     const char *obj_start  = dict_start;  | 
907  | 674k  |     size_t bytes_remaining = dict_len;  | 
908  | 674k  |     long temp_long         = 0;  | 
909  | 674k  |     const char *index;  | 
910  |  |  | 
911  | 674k  |     if (bytes_remaining < 8) { | 
912  | 25.3k  |         return 0;  | 
913  | 25.3k  |     }  | 
914  |  |  | 
915  |  |     /*  | 
916  |  |      * Find the "/Length" dictionary key  | 
917  |  |      */  | 
918  | 648k  |     index = cli_memstr(obj_start, bytes_remaining, "/Length", 7);  | 
919  | 648k  |     if (!index)  | 
920  | 326k  |         return 0;  | 
921  |  |  | 
922  | 322k  |     bytes_remaining -= index - obj_start;  | 
923  |  |  | 
924  | 322k  |     if (bytes_remaining < 1) { | 
925  | 0  |         return 0;  | 
926  | 0  |     }  | 
927  |  |  | 
928  |  |     /* Step the index into the "/Length" string. */  | 
929  | 322k  |     index++;  | 
930  | 322k  |     bytes_remaining--;  | 
931  |  |  | 
932  |  |     /* Find the start of the next direct or indirect object.  | 
933  |  |      * pdf_nextobject() assumes we started searching from within a previous object */  | 
934  | 322k  |     obj_start = pdf_nextobject(index, bytes_remaining);  | 
935  | 322k  |     if (!obj_start)  | 
936  | 730  |         return 0;  | 
937  |  |  | 
938  | 321k  |     if (bytes_remaining < (size_t)(obj_start - index)) { | 
939  | 0  |         return 0;  | 
940  | 0  |     }  | 
941  | 321k  |     bytes_remaining -= obj_start - index;  | 
942  | 321k  |     index = obj_start;  | 
943  |  |  | 
944  |  |     /* Read the value.  This could either be the direct length value,  | 
945  |  |        or the object id of the indirect object that has the length */  | 
946  | 321k  |     if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
947  | 27.0k  |         cli_dbgmsg("find_length: failed to parse object length or objid\n"); | 
948  | 27.0k  |         return 0;  | 
949  | 294k  |     } else if (temp_long < 0) { | 
950  | 5.59k  |         cli_dbgmsg("find_length: Encountered invalid negative object length or objid (%ld).\n", temp_long); | 
951  | 5.59k  |         return 0;  | 
952  | 5.59k  |     }  | 
953  | 289k  |     length = (size_t)temp_long; /* length or maybe object id */  | 
954  |  |  | 
955  |  |     /*  | 
956  |  |      * Keep parsing, skipping past the first integer that might have been what we wanted.  | 
957  |  |      * If it's an indirect object, we'll find a Generation ID followed by the letter 'R'  | 
958  |  |      * I.e. something like " 0 R"  | 
959  |  |      */  | 
960  | 1.17M  |     while ((bytes_remaining > 0) && isdigit(*index)) { | 
961  | 880k  |         index++;  | 
962  | 880k  |         bytes_remaining--;  | 
963  | 880k  |     }  | 
964  |  |  | 
965  | 289k  |     if ((bytes_remaining > 0) && (*index == ' ')) { | 
966  | 33.3k  |         unsigned long genid;  | 
967  |  |  | 
968  | 33.3k  |         index++;  | 
969  | 33.3k  |         bytes_remaining--;  | 
970  |  |  | 
971  | 33.3k  |         if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
972  | 4.23k  |             cli_dbgmsg("find_length: failed to parse object genid\n"); | 
973  | 4.23k  |             return 0;  | 
974  | 29.0k  |         } else if (temp_long < 0) { | 
975  | 3.80k  |             cli_dbgmsg("find_length: Encountered invalid negative object genid (%ld).\n", temp_long); | 
976  | 3.80k  |             return 0;  | 
977  | 3.80k  |         }  | 
978  | 25.2k  |         genid = (unsigned long)temp_long;  | 
979  |  |  | 
980  | 125k  |         while ((bytes_remaining > 0) && isdigit(*index)) { | 
981  | 100k  |             index++;  | 
982  | 100k  |             bytes_remaining--;  | 
983  | 100k  |         }  | 
984  |  |  | 
985  | 25.2k  |         if (bytes_remaining < 2) { | 
986  | 0  |             return 0;  | 
987  | 0  |         }  | 
988  |  |  | 
989  | 25.2k  |         if (index[0] == ' ' && index[1] == 'R') { | 
990  |  |             /*  | 
991  |  |              * Ok so we found a genid and that 'R'.  Which means that first value  | 
992  |  |              * was actually the objid.  | 
993  |  |              * We can look up the indirect object using this information.  | 
994  |  |              */  | 
995  | 16.6k  |             unsigned long objid            = length;  | 
996  | 16.6k  |             const char *indirect_obj_start = NULL;  | 
997  |  |  | 
998  | 16.6k  |             cli_dbgmsg("find_length: length is in indirect object %lu %lu\n", objid, genid); | 
999  |  |  | 
1000  | 16.6k  |             obj = find_obj(pdf, obj, (length << 8) | (genid & 0xff));  | 
1001  | 16.6k  |             if (!obj) { | 
1002  | 7.52k  |                 cli_dbgmsg("find_length: indirect object not found\n"); | 
1003  | 7.52k  |                 return 0;  | 
1004  | 7.52k  |             }  | 
1005  |  |  | 
1006  | 9.15k  |             indirect_obj_start = pdf->map + obj->start;  | 
1007  | 9.15k  |             bytes_remaining    = pdf->size - obj->start;  | 
1008  |  |  | 
1009  |  |             /* Ok so we found the indirect object, lets read the value. */  | 
1010  | 9.15k  |             index = pdf_nextobject(indirect_obj_start, bytes_remaining);  | 
1011  | 9.15k  |             if (!index) { | 
1012  | 171  |                 cli_dbgmsg("find_length: next object not found\n"); | 
1013  | 171  |                 return 0;  | 
1014  | 171  |             }  | 
1015  |  |  | 
1016  | 8.98k  |             if (bytes_remaining < (size_t)(index - indirect_obj_start)) { | 
1017  | 0  |                 return 0;  | 
1018  | 0  |             }  | 
1019  | 8.98k  |             bytes_remaining -= index - indirect_obj_start;  | 
1020  |  |  | 
1021  |  |             /* Found the value, so lets parse it as a long, but prohibit negative lengths. */  | 
1022  | 8.98k  |             if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { | 
1023  | 4.40k  |                 cli_dbgmsg("find_length: failed to parse object length from indirect object\n"); | 
1024  | 4.40k  |                 return 0;  | 
1025  | 4.58k  |             } else if (temp_long < 0) { | 
1026  | 399  |                 cli_dbgmsg("find_length: Encountered invalid negative obj length (%ld).\n", temp_long); | 
1027  | 399  |                 return 0;  | 
1028  | 399  |             }  | 
1029  | 4.18k  |             length = (size_t)temp_long;  | 
1030  | 4.18k  |         }  | 
1031  | 25.2k  |     }  | 
1032  |  |  | 
1033  |  |     /* limit length */  | 
1034  | 268k  |     if ((size_t)(obj_start - pdf->map) + length + 5 > pdf->size)  | 
1035  | 40.3k  |         length = pdf->size - (obj_start - pdf->map) - 5;  | 
1036  |  |  | 
1037  | 268k  |     return length;  | 
1038  | 289k  | }  | 
1039  |  |  | 
1040  | 1.14M  | #define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))  | 
1041  |  |  | 
1042  |  | static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd)  | 
1043  | 2.25M  | { | 
1044  | 2.25M  |     int ret;  | 
1045  | 2.25M  |     struct cli_bc_ctx *bc_ctx;  | 
1046  | 2.25M  |     cli_ctx *ctx = NULL;  | 
1047  | 2.25M  |     fmap_t *map;  | 
1048  |  |  | 
1049  | 2.25M  |     if (NULL == pdf)  | 
1050  | 0  |         return CL_EARG;  | 
1051  |  |  | 
1052  | 2.25M  |     ctx = pdf->ctx;  | 
1053  |  |  | 
1054  | 2.25M  |     bc_ctx = cli_bytecode_context_alloc();  | 
1055  | 2.25M  |     if (!bc_ctx) { | 
1056  | 0  |         cli_errmsg("run_pdf_hooks: can't allocate memory for bc_ctx\n"); | 
1057  | 0  |         return CL_EMEM;  | 
1058  | 0  |     }  | 
1059  |  |  | 
1060  | 2.25M  |     map = ctx->fmap;  | 
1061  | 2.25M  |     if (fd != -1) { | 
1062  | 959k  |         map = fmap(fd, 0, 0, NULL);  | 
1063  | 959k  |         if (!map) { | 
1064  | 0  |             cli_dbgmsg("run_pdf_hooks: can't mmap pdf extracted obj\n"); | 
1065  | 0  |             map = ctx->fmap;  | 
1066  | 0  |             fd  = -1;  | 
1067  | 0  |         }  | 
1068  | 959k  |     }  | 
1069  |  |  | 
1070  | 2.25M  |     cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs, &pdf->flags, pdf->size, pdf->startoff);  | 
1071  | 2.25M  |     cli_bytecode_context_setctx(bc_ctx, ctx);  | 
1072  | 2.25M  |     ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map);  | 
1073  | 2.25M  |     cli_bytecode_context_destroy(bc_ctx);  | 
1074  |  |  | 
1075  | 2.25M  |     if (fd != -1)  | 
1076  | 959k  |         funmap(map);  | 
1077  |  |  | 
1078  | 2.25M  |     return ret;  | 
1079  | 2.25M  | }  | 
1080  |  |  | 
1081  |  | static void dbg_printhex(const char *msg, const char *hex, unsigned len);  | 
1082  |  |  | 
1083  |  | static void aes_256cbc_decrypt(const unsigned char *in, size_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv)  | 
1084  | 30.5k  | { | 
1085  | 30.5k  |     uint32_t rk[RKLENGTH(256)];  | 
1086  | 30.5k  |     unsigned char iv[16];  | 
1087  | 30.5k  |     size_t len = 0;  | 
1088  | 30.5k  |     unsigned char pad, i;  | 
1089  | 30.5k  |     int nrounds;  | 
1090  |  |  | 
1091  | 30.5k  |     if (in == NULL || length == NULL) { | 
1092  | 0  |         cli_dbgmsg("aes_256cbc_decrypt: invalid NULL parameters!\n"); | 
1093  | 0  |         noisy_warnmsg("aes_256cbc_decrypt: invalid NULL parameters!\n"); | 
1094  | 0  |         return;  | 
1095  | 0  |     }  | 
1096  |  |  | 
1097  | 30.5k  |     len = *length;  | 
1098  |  |  | 
1099  | 30.5k  |     cli_dbgmsg("aes_256cbc_decrypt: key length: %d, data length: %zu\n", key_n, *length); | 
1100  | 30.5k  |     if (!(key_n == 16 || key_n == 24 || key_n == 32)) { | 
1101  | 0  |         cli_dbgmsg("aes_256cbc_decrypt: invalid key length: %u!\n", key_n * 8); | 
1102  | 0  |         noisy_warnmsg("aes_256cbc_decrypt: invalid key length: %u!\n", key_n * 8); | 
1103  | 0  |         return;  | 
1104  | 0  |     }  | 
1105  |  |  | 
1106  | 30.5k  |     if (len < 32) { | 
1107  | 3.94k  |         cli_dbgmsg("aes_256cbc_decrypt: len is <32: %zu\n", len); | 
1108  | 3.94k  |         noisy_warnmsg("aes_256cbc_decrypt: len is <32: %zu\n", len); | 
1109  | 3.94k  |         return;  | 
1110  | 3.94k  |     }  | 
1111  |  |  | 
1112  | 26.6k  |     if (has_iv) { | 
1113  | 24.1k  |         memcpy(iv, in, 16);  | 
1114  | 24.1k  |         in += 16;  | 
1115  | 24.1k  |         len -= 16;  | 
1116  | 24.1k  |     } else { | 
1117  | 2.50k  |         memset(iv, 0, sizeof(iv));  | 
1118  | 2.50k  |     }  | 
1119  |  |  | 
1120  | 26.6k  |     cli_dbgmsg("aes_256cbc_decrypt: Calling rijndaelSetupDecrypt\n"); | 
1121  | 26.6k  |     nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n * 8);  | 
1122  | 26.6k  |     if (!nrounds) { | 
1123  | 0  |         cli_dbgmsg("aes_256cbc_decrypt: nrounds = 0\n"); | 
1124  | 0  |         return;  | 
1125  | 0  |     }  | 
1126  | 26.6k  |     cli_dbgmsg("aes_256cbc_decrypt: Beginning rijndaelDecrypt\n"); | 
1127  |  |  | 
1128  | 954k  |     while (len >= 16) { | 
1129  | 927k  |         unsigned i;  | 
1130  |  |  | 
1131  | 927k  |         rijndaelDecrypt(rk, nrounds, in, q);  | 
1132  | 15.7M  |         for (i = 0; i < 16; i++)  | 
1133  | 14.8M  |             q[i] ^= iv[i];  | 
1134  |  |  | 
1135  | 927k  |         memcpy(iv, in, 16);  | 
1136  |  |  | 
1137  | 927k  |         q += 16;  | 
1138  | 927k  |         in += 16;  | 
1139  | 927k  |         len -= 16;  | 
1140  | 927k  |     }  | 
1141  | 26.6k  |     if (has_iv) { | 
1142  | 24.1k  |         len += 16;  | 
1143  | 24.1k  |         pad = q[-1];  | 
1144  |  |  | 
1145  | 24.1k  |         if (pad > 0x10) { | 
1146  | 18.9k  |             cli_dbgmsg("aes_256cbc_decrypt: bad pad: %x (extra len: %zu)\n", pad, len - 16); | 
1147  | 18.9k  |             noisy_warnmsg("aes_256cbc_decrypt: bad pad: %x (extra len: %zu)\n", pad, len - 16); | 
1148  | 18.9k  |             *length -= len;  | 
1149  | 18.9k  |             return;  | 
1150  | 18.9k  |         }  | 
1151  |  |  | 
1152  | 5.17k  |         q -= pad;  | 
1153  | 21.0k  |         for (i = 1; i < pad; i++) { | 
1154  | 16.5k  |             if (q[i] != pad) { | 
1155  | 611  |                 cli_dbgmsg("aes_256cbc_decrypt: bad pad: %x != %x\n", q[i], pad); | 
1156  | 611  |                 noisy_warnmsg("aes_256cbc_decrypt: bad pad: %x != %x\n", q[i], pad); | 
1157  | 611  |                 *length -= len;  | 
1158  |  |  | 
1159  | 611  |                 return;  | 
1160  | 611  |             }  | 
1161  | 16.5k  |         }  | 
1162  |  |  | 
1163  | 4.56k  |         len += pad;  | 
1164  | 4.56k  |     }  | 
1165  |  |  | 
1166  | 7.06k  |     *length -= len;  | 
1167  |  |  | 
1168  | 7.06k  |     cli_dbgmsg("aes_256cbc_decrypt: length is %zu\n", *length); | 
1169  | 7.06k  | }  | 
1170  |  |  | 
1171  |  | static void aes_128cbc_encrypt(const unsigned char *in, size_t in_length, unsigned char *out, size_t *out_length, const unsigned char *key, size_t key_n, const unsigned char *iv)  | 
1172  | 647k  | { | 
1173  | 647k  |     uint32_t rk[RKLENGTH(128)];  | 
1174  | 647k  |     unsigned char real_iv[16] = {0}; | 
1175  | 647k  |     int nrounds;  | 
1176  | 647k  |     uint8_t i = 0;  | 
1177  |  |  | 
1178  | 647k  |     cli_dbgmsg("cli_pdf: aes_128cbc_encrypt: key length: %zu, data length: %zu\n", key_n, in_length); | 
1179  | 647k  |     if (key_n > 16) { | 
1180  | 0  |         cli_dbgmsg("cli_pdf: aes_128cbc_encrypt: key length is %zu!\n", key_n * 8); | 
1181  | 0  |         return;  | 
1182  | 0  |     }  | 
1183  |  |  | 
1184  | 647k  |     if (in_length < 16) { | 
1185  | 0  |         cli_dbgmsg("cli_pdf: aes_128cbc_encrypt: in_length is <16: %zu\n", in_length); | 
1186  | 0  |         noisy_warnmsg("cli_pdf: aes_128cbc_encrypt: in_length is <16: %zu\n", in_length); | 
1187  | 0  |         return;  | 
1188  | 0  |     }  | 
1189  |  |  | 
1190  | 647k  |     cli_dbgmsg("aes_128cbc_encrypt: Calling rijndaelSetupEncrypt\n"); | 
1191  | 647k  |     nrounds = rijndaelSetupEncrypt(rk, key, key_n * 8);  | 
1192  | 647k  |     if (!nrounds) { | 
1193  | 0  |         cli_dbgmsg("cli_pdf: aes_128cbc_encrypt: nrounds = 0\n"); | 
1194  | 0  |         return;  | 
1195  | 0  |     }  | 
1196  | 647k  |     cli_dbgmsg("aes_128cbc_encrypt: Beginning rijndaelEncrypt\n"); | 
1197  |  |  | 
1198  | 647k  |     if (iv)  | 
1199  | 647k  |         memcpy(real_iv, iv, sizeof(real_iv));  | 
1200  |  |  | 
1201  | 647k  |     *out_length = 0;  | 
1202  | 166M  |     while (in_length >= 16) { | 
1203  | 2.82G  |         for (i = 0; i < 16; i++)  | 
1204  | 2.65G  |             real_iv[i] ^= in[i];  | 
1205  |  |  | 
1206  | 166M  |         rijndaelEncrypt(rk, nrounds, real_iv, real_iv);  | 
1207  |  |  | 
1208  | 2.82G  |         for (i = 0; i < 16; i++)  | 
1209  | 2.65G  |             out[i] = real_iv[i];  | 
1210  |  |  | 
1211  | 166M  |         out += 16;  | 
1212  | 166M  |         *out_length += 16;  | 
1213  | 166M  |         in += 16;  | 
1214  | 166M  |         in_length -= 16;  | 
1215  | 166M  |     }  | 
1216  |  |  | 
1217  | 647k  |     cli_dbgmsg("cli_pdf: aes_128cbc_encrypt: length is %zu\n", *out_length); | 
1218  | 647k  | }  | 
1219  |  |  | 
1220  |  | char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method)  | 
1221  | 50.1k  | { | 
1222  | 50.1k  |     unsigned char *key, *q, result[16];  | 
1223  | 50.1k  |     unsigned n;  | 
1224  | 50.1k  |     struct arc4_state arc4;  | 
1225  |  |  | 
1226  | 50.1k  |     if (!length || !*length || !in) { | 
1227  | 77  |         noisy_warnmsg("decrypt_any: decrypt failed for obj %u %u:  Invalid arguments.\n", id >> 8, id & 0xff); | 
1228  | 77  |         return NULL;  | 
1229  | 77  |     }  | 
1230  |  |  | 
1231  | 50.0k  |     if (NULL == pdf->key || 0 == pdf->keylen) { | 
1232  | 7.01k  |         noisy_warnmsg("decrypt_any: decrypt failed for obj %u %u:  PDF key never identified.\n", id >> 8, id & 0xff); | 
1233  | 7.01k  |         return NULL;  | 
1234  | 7.01k  |     }  | 
1235  |  |  | 
1236  | 43.0k  |     n = pdf->keylen + 5;  | 
1237  | 43.0k  |     if (enc_method == ENC_AESV2)  | 
1238  | 1.61k  |         n += 4;  | 
1239  |  |  | 
1240  | 43.0k  |     key = cli_max_malloc(n);  | 
1241  | 43.0k  |     if (!key) { | 
1242  | 0  |         noisy_warnmsg("decrypt_any: malloc failed\n"); | 
1243  | 0  |         return NULL;  | 
1244  | 0  |     }  | 
1245  |  |  | 
1246  | 43.0k  |     memcpy(key, pdf->key, pdf->keylen);  | 
1247  | 43.0k  |     q    = key + pdf->keylen;  | 
1248  | 43.0k  |     *q++ = id >> 8;  | 
1249  | 43.0k  |     *q++ = id >> 16;  | 
1250  | 43.0k  |     *q++ = id >> 24;  | 
1251  | 43.0k  |     *q++ = id;  | 
1252  | 43.0k  |     *q++ = 0;  | 
1253  | 43.0k  |     if (enc_method == ENC_AESV2)  | 
1254  | 1.61k  |         memcpy(q, "sAlT", 4);  | 
1255  |  |  | 
1256  | 43.0k  |     cl_hash_data("md5", key, n, result, NULL); | 
1257  | 43.0k  |     free(key);  | 
1258  |  |  | 
1259  | 43.0k  |     n = pdf->keylen + 5;  | 
1260  | 43.0k  |     if (n > 16)  | 
1261  | 41.8k  |         n = 16;  | 
1262  |  |  | 
1263  | 43.0k  |     q = cli_max_calloc(*length, sizeof(char));  | 
1264  | 43.0k  |     if (!q) { | 
1265  | 0  |         noisy_warnmsg("decrypt_any: malloc failed\n"); | 
1266  | 0  |         return NULL;  | 
1267  | 0  |     }  | 
1268  |  |  | 
1269  | 43.0k  |     switch (enc_method) { | 
1270  | 1.08k  |         case ENC_V2:  | 
1271  | 1.08k  |             cli_dbgmsg("cli_pdf: enc is v2\n"); | 
1272  | 1.08k  |             memcpy(q, in, *length);  | 
1273  | 1.08k  |             if (false == arc4_init(&arc4, result, n)) { | 
1274  | 0  |                 noisy_warnmsg("decrypt_any: failed to init arc4\n"); | 
1275  | 0  |                 free(q);  | 
1276  | 0  |                 return NULL;  | 
1277  | 0  |             }  | 
1278  | 1.08k  |             arc4_apply(&arc4, q, (unsigned)*length); /* TODO: may truncate for very large lengths */  | 
1279  |  |  | 
1280  | 1.08k  |             noisy_msg(pdf, "decrypt_any: decrypted ARC4 data\n");  | 
1281  |  |  | 
1282  | 1.08k  |             break;  | 
1283  | 1.61k  |         case ENC_AESV2:  | 
1284  | 1.61k  |             cli_dbgmsg("cli_pdf: enc is aesv2\n"); | 
1285  | 1.61k  |             aes_256cbc_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1);  | 
1286  |  |  | 
1287  | 1.61k  |             noisy_msg(pdf, "decrypt_any: decrypted AES(v2) data\n");  | 
1288  |  |  | 
1289  | 1.61k  |             break;  | 
1290  | 26.4k  |         case ENC_AESV3:  | 
1291  | 26.4k  |             cli_dbgmsg("decrypt_any: enc is aesv3\n"); | 
1292  |  |  | 
1293  | 26.4k  |             aes_256cbc_decrypt((const unsigned char *)in, length, q, pdf->key, pdf->keylen, 1);  | 
1294  |  |  | 
1295  | 26.4k  |             noisy_msg(pdf, "decrypted AES(v3) data\n");  | 
1296  |  |  | 
1297  | 26.4k  |             break;  | 
1298  | 3.22k  |         case ENC_IDENTITY:  | 
1299  | 3.22k  |             cli_dbgmsg("decrypt_any: enc is identity\n"); | 
1300  | 3.22k  |             memcpy(q, in, *length);  | 
1301  |  |  | 
1302  | 3.22k  |             noisy_msg(pdf, "decrypt_any: identity encryption\n");  | 
1303  |  |  | 
1304  | 3.22k  |             break;  | 
1305  | 135  |         case ENC_NONE:  | 
1306  | 135  |             cli_dbgmsg("decrypt_any: enc is none\n"); | 
1307  |  |  | 
1308  | 135  |             noisy_msg(pdf, "encryption is none\n");  | 
1309  |  |  | 
1310  | 135  |             free(q);  | 
1311  | 135  |             return NULL;  | 
1312  | 10.5k  |         case ENC_UNKNOWN:  | 
1313  | 10.5k  |             cli_dbgmsg("decrypt_any: enc is unknown\n"); | 
1314  | 10.5k  |             free(q);  | 
1315  |  |  | 
1316  | 10.5k  |             noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n", | 
1317  | 10.5k  |                           id >> 8, id & 0xff);  | 
1318  |  |  | 
1319  | 10.5k  |             return NULL;  | 
1320  | 43.0k  |     }  | 
1321  |  |  | 
1322  | 32.3k  |     return (char *)q;  | 
1323  | 43.0k  | }  | 
1324  |  |  | 
1325  |  | enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)  | 
1326  | 41.9k  | { | 
1327  | 41.9k  |     if (obj->flags & (1 << OBJ_EMBEDDED_FILE))  | 
1328  | 337  |         return pdf->enc_method_embeddedfile;  | 
1329  |  |  | 
1330  | 41.5k  |     if (obj->flags & (1 << OBJ_STREAM))  | 
1331  | 39.6k  |         return pdf->enc_method_stream;  | 
1332  |  |  | 
1333  | 1.97k  |     return pdf->enc_method_string;  | 
1334  | 41.5k  | }  | 
1335  |  |  | 
1336  |  | enum cstate { | 
1337  |  |     CSTATE_NONE,  | 
1338  |  |     CSTATE_TJ,  | 
1339  |  |     CSTATE_TJ_PAROPEN  | 
1340  |  | };  | 
1341  |  |  | 
1342  |  | static void process(struct text_norm_state *s, enum cstate *st, const char *buf, size_t length, int fout)  | 
1343  | 546k  | { | 
1344  | 1.62G  |     do { | 
1345  | 1.62G  |         switch (*st) { | 
1346  | 1.56M  |             case CSTATE_NONE:  | 
1347  | 1.56M  |                 if (*buf == '[') { | 
1348  | 22.7k  |                     *st = CSTATE_TJ;  | 
1349  | 1.54M  |                 } else { | 
1350  | 1.54M  |                     const char *nl = memchr(buf, '\n', length);  | 
1351  | 1.54M  |                     if (!nl)  | 
1352  | 317k  |                         return;  | 
1353  |  |  | 
1354  | 1.22M  |                     if ((size_t)(nl - buf) > length) { | 
1355  | 0  |                         length = 0;  | 
1356  | 1.22M  |                     } else { | 
1357  | 1.22M  |                         length -= nl - buf;  | 
1358  | 1.22M  |                     }  | 
1359  | 1.22M  |                     buf = nl;  | 
1360  | 1.22M  |                 }  | 
1361  |  |  | 
1362  | 1.25M  |                 break;  | 
1363  | 587M  |             case CSTATE_TJ:  | 
1364  | 587M  |                 if (*buf == '(') | 
1365  | 200k  |                     *st = CSTATE_TJ_PAROPEN;  | 
1366  |  |  | 
1367  | 587M  |                 break;  | 
1368  | 1.03G  |             case CSTATE_TJ_PAROPEN:  | 
1369  | 1.03G  |                 if (*buf == ')') { | 
1370  | 184k  |                     *st = CSTATE_TJ;  | 
1371  | 1.03G  |                 } else { | 
1372  | 1.03G  |                     if (text_normalize_buffer(s, (const unsigned char *)buf, 1) != 1) { | 
1373  | 638  |                         cli_writen(fout, s->out, s->out_pos);  | 
1374  | 638  |                         text_normalize_reset(s);  | 
1375  | 638  |                     }  | 
1376  | 1.03G  |                 }  | 
1377  |  |  | 
1378  | 1.03G  |                 break;  | 
1379  | 1.62G  |         }  | 
1380  |  |  | 
1381  | 1.62G  |         buf++;  | 
1382  | 1.62G  |         if (length > 0)  | 
1383  | 1.62G  |             length--;  | 
1384  | 1.62G  |     } while (length > 0);  | 
1385  | 546k  | }  | 
1386  |  |  | 
1387  |  | static int pdf_scan_contents(int fd, struct pdf_struct *pdf, struct pdf_obj *obj)  | 
1388  | 69.5k  | { | 
1389  | 69.5k  |     struct text_norm_state s;  | 
1390  | 69.5k  |     char fullname[1024];  | 
1391  | 69.5k  |     char outbuff[BUFSIZ];  | 
1392  | 69.5k  |     char inbuf[BUFSIZ];  | 
1393  | 69.5k  |     int fout;  | 
1394  | 69.5k  |     size_t n;  | 
1395  | 69.5k  |     cl_error_t rc;  | 
1396  | 69.5k  |     enum cstate st = CSTATE_NONE;  | 
1397  |  |  | 
1398  | 69.5k  |     snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf obj %d %d contents", pdf->dir, obj->id >> 8, obj->id & 0xff);  | 
1399  | 69.5k  |     fout = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);  | 
1400  | 69.5k  |     if (fout < 0) { | 
1401  | 0  |         char err[128];  | 
1402  |  | 
  | 
1403  | 0  |         cli_errmsg("pdf_scan_contents: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); | 
1404  | 0  |         return CL_ETMPFILE;  | 
1405  | 0  |     }  | 
1406  |  |  | 
1407  | 69.5k  |     text_normalize_init(&s, (unsigned char *)outbuff, sizeof(outbuff));  | 
1408  | 616k  |     while (1) { | 
1409  | 616k  |         n = cli_readn(fd, inbuf, sizeof(inbuf));  | 
1410  | 616k  |         if ((n == 0) || (n == (size_t)-1))  | 
1411  | 69.5k  |             break;  | 
1412  |  |  | 
1413  | 546k  |         process(&s, &st, inbuf, n, fout);  | 
1414  | 546k  |     }  | 
1415  |  |  | 
1416  | 69.5k  |     cli_writen(fout, s.out, s.out_pos);  | 
1417  |  |  | 
1418  | 69.5k  |     lseek(fout, 0, SEEK_SET);  | 
1419  | 69.5k  |     rc = cli_magic_scan_desc(fout, fullname, pdf->ctx, NULL, LAYER_ATTRIBUTES_NONE);  | 
1420  | 69.5k  |     close(fout);  | 
1421  |  |  | 
1422  | 69.5k  |     if (!pdf->ctx->engine->keeptmp || (s.out_pos == 0))  | 
1423  | 69.5k  |         if (cli_unlink(fullname) && rc != CL_VIRUS)  | 
1424  | 0  |             rc = CL_EUNLINK;  | 
1425  |  |  | 
1426  | 69.5k  |     return rc;  | 
1427  | 69.5k  | }  | 
1428  |  |  | 
1429  |  | cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)  | 
1430  | 1.58M  | { | 
1431  | 1.58M  |     char fullname[PATH_MAX + 1];  | 
1432  | 1.58M  |     int fout      = -1;  | 
1433  | 1.58M  |     size_t sum    = 0;  | 
1434  | 1.58M  |     cl_error_t rc = CL_SUCCESS;  | 
1435  | 1.58M  |     int dump      = 1;  | 
1436  |  |  | 
1437  | 1.58M  |     cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id >> 8, obj->id & 0xff); | 
1438  |  |  | 
1439  | 1.58M  |     if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) { | 
1440  | 0  |         cli_dbgmsg("pdf_extract_obj: Recursion limit reached.\n"); | 
1441  | 0  |         return CL_SUCCESS;  | 
1442  | 0  |     }  | 
1443  |  |  | 
1444  | 1.58M  |     if (obj->extracted) { | 
1445  |  |         // Should not attempt to extract the same object more than once.  | 
1446  | 1.47k  |         return CL_SUCCESS;  | 
1447  | 1.47k  |     }  | 
1448  |  |     // We're not done yet, but this is enough to say we've tried.  | 
1449  |  |     // Trying again won't help any.  | 
1450  | 1.58M  |     obj->extracted = true;  | 
1451  |  |  | 
1452  | 1.58M  |     if (obj->objstm) { | 
1453  | 239k  |         cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n"); | 
1454  | 239k  |         if (obj->objstm->streambuf == NULL) { | 
1455  | 0  |             cli_warnmsg("pdf_extract_obj: object in object stream has null stream buffer!\n"); | 
1456  | 0  |             return CL_EFORMAT;  | 
1457  | 0  |         }  | 
1458  | 239k  |     }  | 
1459  |  |  | 
1460  |  |     /* TODO: call bytecode hook here, allow override dumpability */  | 
1461  | 1.58M  |     if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) { | 
1462  |  |         /* don't dump all streams */  | 
1463  | 580k  |         dump = 0;  | 
1464  | 580k  |     }  | 
1465  |  |  | 
1466  | 1.58M  |     if ((obj->flags & (1 << OBJ_IMAGE)) && !(obj->flags & (1 << OBJ_FILTER_DCT))) { | 
1467  |  |         /* don't dump / scan non-JPG images */  | 
1468  | 4.48k  |         dump = 0;  | 
1469  | 4.48k  |     }  | 
1470  |  |  | 
1471  | 1.58M  |     if (obj->flags & (1 << OBJ_FORCEDUMP)) { | 
1472  |  |         /* bytecode can force dump by setting this flag */  | 
1473  | 1.38k  |         dump = 1;  | 
1474  | 1.38k  |     }  | 
1475  |  |  | 
1476  | 1.58M  |     if (!dump)  | 
1477  | 583k  |         return CL_CLEAN;  | 
1478  |  |  | 
1479  | 1.00M  |     cli_dbgmsg("pdf_extract_obj: dumping obj %u %u\n", obj->id >> 8, obj->id & 0xff); | 
1480  |  |  | 
1481  | 1.00M  |     snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf obj %d %d", pdf->dir, obj->id >> 8, obj->id & 0xff);  | 
1482  | 1.00M  |     fout = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);  | 
1483  | 1.00M  |     if (fout < 0) { | 
1484  | 25  |         char err[128];  | 
1485  | 25  |         cli_errmsg("pdf_extract_obj: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err))); | 
1486  |  |  | 
1487  | 25  |         return CL_ETMPFILE;  | 
1488  | 25  |     }  | 
1489  |  |  | 
1490  | 1.00M  |     if (!(flags & PDF_EXTRACT_OBJ_SCAN)) { | 
1491  | 1.36k  |         if (NULL != obj->path) { | 
1492  | 0  |             obj->path = strdup(fullname);  | 
1493  | 0  |         }  | 
1494  | 1.36k  |     }  | 
1495  |  |  | 
1496  | 1.00M  |     if ((NULL == obj->objstm) &&  | 
1497  | 1.00M  |         (obj->flags & (1 << OBJ_STREAM))) { | 
1498  |  |         /*  | 
1499  |  |          * Object contains a stream. Parse this now.  | 
1500  |  |          */  | 
1501  | 674k  |         cli_dbgmsg("pdf_extract_obj: parsing a stream in obj %u %u\n", obj->id >> 8, obj->id & 0xff); | 
1502  |  |  | 
1503  | 674k  |         const char *start = pdf->map + obj->start;  | 
1504  |  |  | 
1505  | 674k  |         size_t length;  | 
1506  | 674k  |         size_t orig_length;  | 
1507  | 674k  |         int dict_len = obj->stream - start; /* Dictionary should end where the stream begins */  | 
1508  |  |  | 
1509  | 674k  |         const char *pstr;  | 
1510  | 674k  |         struct pdf_dict *dparams     = NULL;  | 
1511  | 674k  |         struct objstm_struct *objstm = NULL;  | 
1512  | 674k  |         int xref                     = 0;  | 
1513  |  |  | 
1514  |  |         /* Find and interpret the length dictionary value */  | 
1515  | 674k  |         length = find_length(pdf, obj, start, dict_len);  | 
1516  |  |  | 
1517  | 674k  |         orig_length = length;  | 
1518  |  |  | 
1519  | 674k  |         if (length > obj->stream_size) { | 
1520  | 79.5k  |             cli_dbgmsg("cli_pdf: Stream length exceeds object length by %zu bytes. Length truncated to %zu bytes\n", length - obj->stream_size, obj->stream_size); | 
1521  | 79.5k  |             noisy_warnmsg("Stream length exceeds object length by %zu bytes. Length truncated to %zu bytes\n", length - obj->stream_size, obj->stream_size); | 
1522  |  |  | 
1523  | 79.5k  |             length = obj->stream_size;  | 
1524  | 79.5k  |         }  | 
1525  |  |  | 
1526  | 674k  |         if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && (length == 0)) { | 
1527  |  |             /*  | 
1528  |  |              * If the length is unknown and this doesn't contain a FLATE encoded filter...  | 
1529  |  |              * Calculate the length using the stream size, and trimming  | 
1530  |  |              * off any newline/carriage returns from the end of the stream.  | 
1531  |  |              */  | 
1532  | 374k  |             const char *q = start + obj->stream_size;  | 
1533  | 374k  |             length        = obj->stream_size;  | 
1534  | 374k  |             q--;  | 
1535  |  |  | 
1536  | 374k  |             if (length > 0) { | 
1537  | 360k  |                 if (*q == '\n') { | 
1538  | 5.93k  |                     q--;  | 
1539  | 5.93k  |                     length--;  | 
1540  |  |  | 
1541  | 5.93k  |                     if (length > 0 && *q == '\r')  | 
1542  | 1.18k  |                         length--;  | 
1543  | 354k  |                 } else if (*q == '\r') { | 
1544  | 27.7k  |                     length--;  | 
1545  | 27.7k  |                 }  | 
1546  | 360k  |             }  | 
1547  |  |  | 
1548  | 374k  |             cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length); | 
1549  | 374k  |         } else { | 
1550  | 299k  |             if (obj->stream_size > (size_t)length + 2) { | 
1551  | 168k  |                 cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n", | 
1552  | 168k  |                            (size_t)length, obj->stream_size);  | 
1553  | 168k  |                 length = obj->stream_size;  | 
1554  | 168k  |             }  | 
1555  | 299k  |         }  | 
1556  |  |  | 
1557  | 674k  |         if ((0 != orig_length) && (obj->stream_size > (size_t)orig_length + 20)) { | 
1558  | 114k  |             cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n", | 
1559  | 114k  |                        (long long)orig_length, (long long)length, obj->stream_size);  | 
1560  | 114k  |             pdfobj_flag(pdf, obj, BAD_STREAMLEN);  | 
1561  | 114k  |         }  | 
1562  |  |  | 
1563  | 674k  |         if (0 == length) { | 
1564  | 53.1k  |             length = obj->stream_size;  | 
1565  | 53.1k  |             if (0 == length) { | 
1566  | 23.1k  |                 cli_dbgmsg("pdf_extract_obj: Alleged or calculated stream length and stream buffer size both 0\n"); | 
1567  | 23.1k  |                 goto done; /* Empty stream, nothing to scan */  | 
1568  | 23.1k  |             }  | 
1569  | 53.1k  |         }  | 
1570  |  |  | 
1571  |  |         /* Check if XRef is enabled */  | 
1572  | 651k  |         if (cli_memstr(start, dict_len, "/XRef", strlen("/XRef"))) { | 
1573  | 25.2k  |             xref = 1;  | 
1574  | 25.2k  |         }  | 
1575  |  |  | 
1576  |  |         /*  | 
1577  |  |          * Identify the DecodeParms, if available.  | 
1578  |  |          */  | 
1579  | 651k  |         if (NULL != (pstr = pdf_getdict(start, &dict_len, "/DecodeParms"))) { | 
1580  | 66.6k  |             cli_dbgmsg("pdf_extract_obj: Found /DecodeParms\n"); | 
1581  | 584k  |         } else if (NULL != (pstr = pdf_getdict(start, &dict_len, "/DP"))) { | 
1582  | 41.2k  |             cli_dbgmsg("pdf_extract_obj: Found /DP\n"); | 
1583  | 41.2k  |         }  | 
1584  |  |  | 
1585  | 651k  |         if (pstr) { | 
1586  |  |             /* shift pstr left to "<<" for pdf_parse_dict */  | 
1587  | 199k  |             while ((*pstr == '<') && (pstr > start)) { | 
1588  | 91.8k  |                 pstr--;  | 
1589  | 91.8k  |                 dict_len++;  | 
1590  | 91.8k  |             }  | 
1591  |  |  | 
1592  |  |             /* shift pstr right to "<<" for pdf_parse_dict */  | 
1593  | 1.48M  |             while ((*pstr != '<') && (dict_len > 0)) { | 
1594  | 1.38M  |                 pstr++;  | 
1595  | 1.38M  |                 dict_len--;  | 
1596  | 1.38M  |             }  | 
1597  |  |  | 
1598  | 107k  |             if (dict_len > 4) { | 
1599  | 105k  |                 pdf->parse_recursion_depth++;  | 
1600  | 105k  |                 dparams = pdf_parse_dict(pdf, obj, obj->size, (char *)pstr, NULL);  | 
1601  | 105k  |                 pdf->parse_recursion_depth--;  | 
1602  | 105k  |             } else { | 
1603  | 2.29k  |                 cli_dbgmsg("pdf_extract_obj: failed to locate DecodeParms dictionary start\n"); | 
1604  | 2.29k  |             }  | 
1605  | 107k  |         }  | 
1606  |  |  | 
1607  |  |         /*  | 
1608  |  |          * Go back to the start of the dictionary and check to see if the stream  | 
1609  |  |          * is an object stream. If so, collect the relevant info.  | 
1610  |  |          */  | 
1611  | 651k  |         dict_len = obj->stream - start;  | 
1612  | 651k  |         if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) { | 
1613  | 85.5k  |             int32_t objstm_first  = -1;  | 
1614  | 85.5k  |             int32_t objstm_length = -1;  | 
1615  | 85.5k  |             int32_t objstm_n      = -1;  | 
1616  |  |  | 
1617  | 85.5k  |             cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n"); | 
1618  |  |  | 
1619  | 85.5k  |             dict_len = obj->stream - start;  | 
1620  | 85.5k  |             if ((-1 == (objstm_first = pdf_readint(start, dict_len, "/First")))) { | 
1621  | 10.3k  |                 cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n"); | 
1622  | 75.2k  |             } else if ((-1 == (objstm_length = pdf_readint(start, dict_len, "/Length")))) { | 
1623  | 4.55k  |                 cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n"); | 
1624  | 70.6k  |             } else if ((-1 == (objstm_n = pdf_readint(start, dict_len, "/N")))) { | 
1625  | 7.21k  |                 cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n"); | 
1626  | 63.4k  |             } else { | 
1627  |  |                 /* Add objstm to pdf struct, so it can be freed eventually */  | 
1628  | 63.4k  |                 pdf->nobjstms++;  | 
1629  | 63.4k  |                 pdf->objstms = cli_max_realloc_or_free(pdf->objstms, sizeof(struct objstm_struct *) * pdf->nobjstms);  | 
1630  | 63.4k  |                 if (!pdf->objstms) { | 
1631  | 0  |                     cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms); | 
1632  | 0  |                     pdf_free_dict(dparams);  | 
1633  | 0  |                     return CL_EMEM;  | 
1634  | 0  |                 }  | 
1635  |  |  | 
1636  | 63.4k  |                 objstm = malloc(sizeof(struct objstm_struct));  | 
1637  | 63.4k  |                 if (!objstm) { | 
1638  | 0  |                     cli_warnmsg("pdf_extract_obj: out of memory parsing object stream (%u)\n", pdf->nobjstms); | 
1639  | 0  |                     pdf_free_dict(dparams);  | 
1640  | 0  |                     return CL_EMEM;  | 
1641  | 0  |                 }  | 
1642  | 63.4k  |                 pdf->objstms[pdf->nobjstms - 1] = objstm;  | 
1643  |  |  | 
1644  | 63.4k  |                 memset(objstm, 0, sizeof(*objstm));  | 
1645  |  |  | 
1646  | 63.4k  |                 objstm->first        = (uint32_t)objstm_first;  | 
1647  | 63.4k  |                 objstm->current      = (uint32_t)objstm_first;  | 
1648  | 63.4k  |                 objstm->current_pair = 0;  | 
1649  | 63.4k  |                 objstm->length       = (uint32_t)objstm_length;  | 
1650  | 63.4k  |                 objstm->n            = (uint32_t)objstm_n;  | 
1651  |  |  | 
1652  | 63.4k  |                 cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first); | 
1653  | 63.4k  |                 cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length); | 
1654  | 63.4k  |                 cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n); | 
1655  | 63.4k  |             }  | 
1656  | 85.5k  |         }  | 
1657  |  |  | 
1658  | 651k  |         sum = pdf_decodestream(pdf, obj, dparams, obj->stream, (uint32_t)length, xref, fout, &rc, objstm);  | 
1659  | 651k  |         if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) { | 
1660  | 130k  |             cli_dbgmsg("Error decoding stream! Error code: %d\n", rc); | 
1661  |  |  | 
1662  |  |             /* It's ok if we couldn't decode the stream,  | 
1663  |  |              *   make a best effort to keep parsing...  | 
1664  |  |              *   Unless we were unable to allocate memory.*/  | 
1665  | 130k  |             if (CL_EMEM == rc) { | 
1666  | 0  |                 goto really_done;  | 
1667  | 0  |             }  | 
1668  | 130k  |             if (CL_EPARSE == rc) { | 
1669  | 130k  |                 rc = CL_SUCCESS;  | 
1670  | 130k  |             }  | 
1671  |  |  | 
1672  | 130k  |             if (NULL != objstm) { | 
1673  |  |                 /*  | 
1674  |  |                  * If we were expecting an objstm and there was a failure...  | 
1675  |  |                  *   discard the memory for last object stream.  | 
1676  |  |                  */  | 
1677  | 8.92k  |                 if (NULL != pdf->objstms) { | 
1678  | 8.92k  |                     if (NULL != pdf->objstms[pdf->nobjstms - 1]) { | 
1679  | 8.92k  |                         if (NULL != pdf->objstms[pdf->nobjstms - 1]->streambuf) { | 
1680  | 0  |                             free(pdf->objstms[pdf->nobjstms - 1]->streambuf);  | 
1681  | 0  |                             pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;  | 
1682  | 0  |                         }  | 
1683  | 8.92k  |                         free(pdf->objstms[pdf->nobjstms - 1]);  | 
1684  | 8.92k  |                         pdf->objstms[pdf->nobjstms - 1] = NULL;  | 
1685  | 8.92k  |                     }  | 
1686  |  |  | 
1687  |  |                     /* Pop the objstm off the end of the pdf->objstms array. */  | 
1688  | 8.92k  |                     if (pdf->nobjstms > 0) { | 
1689  | 8.92k  |                         pdf->nobjstms--;  | 
1690  | 8.92k  |                         if (0 == pdf->nobjstms) { | 
1691  | 2.05k  |                             free(pdf->objstms);  | 
1692  | 2.05k  |                             pdf->objstms = NULL;  | 
1693  | 6.86k  |                         } else { | 
1694  | 6.86k  |                             pdf->objstms = cli_max_realloc_or_free(pdf->objstms, sizeof(struct objstm_struct *) * pdf->nobjstms);  | 
1695  |  |  | 
1696  | 6.86k  |                             if (!pdf->objstms) { | 
1697  | 0  |                                 cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n"); | 
1698  | 0  |                                 return CL_EMEM;  | 
1699  | 0  |                             }  | 
1700  | 6.86k  |                         }  | 
1701  | 8.92k  |                     } else { | 
1702  |  |                         /* hm.. this shouldn't happen */  | 
1703  | 0  |                         cli_warnmsg("pdf_extract_obj: Failure counting objstms.\n"); | 
1704  | 0  |                     }  | 
1705  | 8.92k  |                 }  | 
1706  | 8.92k  |             }  | 
1707  | 130k  |         }  | 
1708  |  |  | 
1709  | 651k  |         if (dparams)  | 
1710  | 75.2k  |             pdf_free_dict(dparams);  | 
1711  |  |  | 
1712  | 651k  |         if (rc == CL_VIRUS) { | 
1713  | 0  |             sum = 0; /* prevents post-filter scan */  | 
1714  | 0  |             goto done;  | 
1715  | 0  |         }  | 
1716  |  |  | 
1717  | 651k  |     } else if (obj->flags & (1 << OBJ_JAVASCRIPT)) { | 
1718  | 15.0k  |         const char *q2;  | 
1719  | 15.0k  |         const char *q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
1720  | 15.0k  |                                       : (const char *)(obj->start + pdf->map);  | 
1721  |  |  | 
1722  |  |         /* TODO: get obj-endobj size */  | 
1723  | 15.0k  |         off_t bytesleft = obj->size;  | 
1724  |  |  | 
1725  | 15.0k  |         if (bytesleft < 0) { | 
1726  | 0  |             goto done;  | 
1727  | 0  |         }  | 
1728  |  |  | 
1729  | 32.8k  |         do { | 
1730  | 32.8k  |             char *js      = NULL;  | 
1731  | 32.8k  |             size_t js_len = 0;  | 
1732  | 32.8k  |             const char *q3;  | 
1733  |  |  | 
1734  | 32.8k  |             q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);  | 
1735  | 32.8k  |             if (!q2)  | 
1736  | 14.2k  |                 break;  | 
1737  |  |  | 
1738  | 18.6k  |             bytesleft -= q2 - q + 11;  | 
1739  | 18.6k  |             q = q2 + 11;  | 
1740  |  |  | 
1741  | 18.6k  |             js = pdf_readstring(q, bytesleft, "/JS", NULL, &q2, !(pdf->flags & (1 << DECRYPTABLE_PDF)));  | 
1742  | 18.6k  |             bytesleft -= q2 - q;  | 
1743  | 18.6k  |             q = q2;  | 
1744  |  |  | 
1745  | 18.6k  |             if (js) { | 
1746  | 8.89k  |                 char *decrypted = NULL;  | 
1747  | 8.89k  |                 const char *out = js;  | 
1748  | 8.89k  |                 js_len          = strlen(js);  | 
1749  | 8.89k  |                 if (pdf->flags & (1 << DECRYPTABLE_PDF)) { | 
1750  | 2.94k  |                     cli_dbgmsg("pdf_extract_obj: encrypted string\n"); | 
1751  | 2.94k  |                     decrypted = decrypt_any(pdf, obj->id, js, &js_len, pdf->enc_method_string);  | 
1752  |  |  | 
1753  | 2.94k  |                     if (decrypted) { | 
1754  | 2.06k  |                         noisy_msg(pdf, "pdf_extract_obj: decrypted Javascript string from obj %u %u\n", obj->id >> 8, obj->id & 0xff);  | 
1755  | 2.06k  |                         out = decrypted;  | 
1756  | 2.06k  |                     }  | 
1757  | 2.94k  |                 }  | 
1758  |  |  | 
1759  | 8.89k  |                 if ((pdf->ctx->options->general & CL_SCAN_GENERAL_COLLECT_METADATA) && pdf->ctx->wrkproperty != NULL) { | 
1760  | 8.89k  |                     struct json_object *pdfobj, *jbig2arr;  | 
1761  |  |  | 
1762  | 8.89k  |                     if (NULL == (pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats"))) { | 
1763  | 0  |                         cli_errmsg("pdf_extract_obj: failed to get PDFStats JSON object\n"); | 
1764  | 8.89k  |                     } else if (NULL == (jbig2arr = cli_jsonarray(pdfobj, "JavascriptObjects"))) { | 
1765  | 0  |                         cli_errmsg("pdf_extract_obj: failed to get JavascriptObjects JSON object\n"); | 
1766  | 8.89k  |                     } else { | 
1767  | 8.89k  |                         cli_jsonint_array(jbig2arr, obj->id >> 8);  | 
1768  | 8.89k  |                     }  | 
1769  | 8.89k  |                 }  | 
1770  |  |  | 
1771  | 8.89k  |                 pdf->stats.njs++;  | 
1772  |  |  | 
1773  | 8.89k  |                 if (filter_writen(pdf, obj, fout, out, js_len, (size_t *)&sum) != js_len) { | 
1774  | 0  |                     rc = CL_EWRITE;  | 
1775  | 0  |                     free(js);  | 
1776  | 0  |                     break;  | 
1777  | 0  |                 }  | 
1778  |  |  | 
1779  | 8.89k  |                 free(decrypted);  | 
1780  | 8.89k  |                 free(js);  | 
1781  | 8.89k  |                 cli_dbgmsg("pdf_extract_obj: bytesleft: %d\n", (int)bytesleft); | 
1782  |  |  | 
1783  | 8.89k  |                 if (bytesleft > 0) { | 
1784  | 8.89k  |                     q2 = pdf_nextobject(q, bytesleft);  | 
1785  | 8.89k  |                     if (!q2)  | 
1786  | 4.06k  |                         q2 = q + bytesleft - 1;  | 
1787  |  |  | 
1788  |  |                     /* non-conforming PDFs that don't escape ) properly */  | 
1789  | 8.89k  |                     q3 = memchr(q, ')', bytesleft);  | 
1790  | 8.89k  |                     if (q3 && q3 < q2)  | 
1791  | 440  |                         q2 = q3;  | 
1792  |  |  | 
1793  | 11.8k  |                     while (q2 > q && q2[-1] == ' ')  | 
1794  | 2.98k  |                         q2--;  | 
1795  |  |  | 
1796  | 8.89k  |                     if (q2 > q) { | 
1797  | 6.22k  |                         q--;  | 
1798  | 6.22k  |                         filter_writen(pdf, obj, fout, q, q2 - q, (size_t *)&sum);  | 
1799  | 6.22k  |                         q++;  | 
1800  | 6.22k  |                     }  | 
1801  | 8.89k  |                 }  | 
1802  | 8.89k  |             }  | 
1803  |  |  | 
1804  | 18.6k  |         } while (bytesleft > 0);  | 
1805  | 315k  |     } else { | 
1806  | 315k  |         off_t bytesleft = obj->size;  | 
1807  |  |  | 
1808  | 315k  |         if (bytesleft < 0)  | 
1809  | 0  |             rc = CL_EFORMAT;  | 
1810  | 315k  |         else { | 
1811  | 315k  |             if (obj->objstm) { | 
1812  | 8.95k  |                 if (filter_writen(pdf, obj, fout, obj->objstm->streambuf + obj->start, bytesleft, (size_t *)&sum) != (size_t)bytesleft)  | 
1813  | 0  |                     rc = CL_EWRITE;  | 
1814  | 306k  |             } else { | 
1815  | 306k  |                 if (filter_writen(pdf, obj, fout, pdf->map + obj->start, bytesleft, (size_t *)&sum) != (size_t)bytesleft)  | 
1816  | 0  |                     rc = CL_EWRITE;  | 
1817  | 306k  |             }  | 
1818  | 315k  |         }  | 
1819  | 315k  |     }  | 
1820  |  |  | 
1821  | 1.00M  | done:  | 
1822  |  |  | 
1823  | 1.00M  |     cli_dbgmsg("pdf_extract_obj: extracted %td bytes %u %u obj\n", sum, obj->id >> 8, obj->id & 0xff); | 
1824  | 1.00M  |     cli_dbgmsg("pdf_extract_obj:         ... to %s\n", fullname); | 
1825  |  |  | 
1826  | 1.00M  |     if (flags & PDF_EXTRACT_OBJ_SCAN && sum) { | 
1827  | 962k  |         int rc2;  | 
1828  |  |  | 
1829  |  |         /* TODO: invoke bytecode on this pdf obj with metainformation associated */  | 
1830  | 962k  |         lseek(fout, 0, SEEK_SET);  | 
1831  | 962k  |         rc2 = cli_magic_scan_desc(fout, fullname, pdf->ctx, NULL, LAYER_ATTRIBUTES_NONE);  | 
1832  | 962k  |         if (rc2 != CL_SUCCESS) { | 
1833  | 3.16k  |             rc = rc2;  | 
1834  | 3.16k  |             goto really_done;  | 
1835  | 3.16k  |         }  | 
1836  |  |  | 
1837  | 959k  |         if ((rc == CL_CLEAN) || (rc == CL_VIRUS)) { | 
1838  | 959k  |             rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout);  | 
1839  | 959k  |             if (rc2 == CL_VIRUS) { | 
1840  | 0  |                 rc = rc2;  | 
1841  | 0  |                 goto really_done;  | 
1842  | 0  |             }  | 
1843  | 959k  |         }  | 
1844  |  |  | 
1845  | 959k  |         if (((rc == CL_CLEAN) || (rc == CL_VIRUS)) && (obj->flags & (1 << OBJ_CONTENTS))) { | 
1846  | 69.5k  |             lseek(fout, 0, SEEK_SET);  | 
1847  | 69.5k  |             cli_dbgmsg("pdf_extract_obj: dumping contents from obj %u %u\n", obj->id >> 8, obj->id & 0xff); | 
1848  |  |  | 
1849  | 69.5k  |             rc2 = pdf_scan_contents(fout, pdf, obj);  | 
1850  | 69.5k  |             if (rc2 != CL_SUCCESS) { | 
1851  | 5  |                 rc = rc2;  | 
1852  | 5  |                 goto really_done;  | 
1853  | 5  |             }  | 
1854  | 69.5k  |         }  | 
1855  | 959k  |     }  | 
1856  |  |  | 
1857  | 1.00M  | really_done:  | 
1858  | 1.00M  |     close(fout);  | 
1859  |  |  | 
1860  | 1.00M  |     if (CL_EMEM != rc) { | 
1861  | 1.00M  |         if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp)  | 
1862  | 1.00M  |             if (cli_unlink(fullname) && rc != CL_VIRUS)  | 
1863  | 0  |                 rc = CL_EUNLINK;  | 
1864  | 1.00M  |     }  | 
1865  |  |  | 
1866  | 1.00M  |     return rc;  | 
1867  | 1.00M  | }  | 
1868  |  |  | 
1869  |  | enum objstate { | 
1870  |  |     STATE_NONE,  | 
1871  |  |     STATE_S,  | 
1872  |  |     STATE_FILTER,  | 
1873  |  |     STATE_JAVASCRIPT,  | 
1874  |  |     STATE_OPENACTION,  | 
1875  |  |     STATE_LINEARIZED,  | 
1876  |  |     STATE_LAUNCHACTION,  | 
1877  |  |     STATE_CONTENTS,  | 
1878  |  |     STATE_ANY /* for actions table below */  | 
1879  |  | };  | 
1880  |  |  | 
1881  |  | #define NAMEFLAG_NONE 0x0  | 
1882  | 2.44M  | #define NAMEFLAG_HEURISTIC 0x1  | 
1883  |  |  | 
1884  |  | struct pdfname_action { | 
1885  |  |     const char *pdfname;  | 
1886  |  |     enum pdf_objflags set_objflag; /* OBJ_DICT is noop */  | 
1887  |  |     enum objstate from_state;      /* STATE_NONE is noop */  | 
1888  |  |     enum objstate to_state;  | 
1889  |  |     uint32_t nameflags;  | 
1890  |  |     void (*pdf_stats_cb)(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);  | 
1891  |  | };  | 
1892  |  |  | 
1893  |  | static struct pdfname_action pdfname_actions[] = { | 
1894  |  |     {"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb}, | 
1895  |  |     {"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb}, | 
1896  |  |     {"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb}, | 
1897  |  |     {"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb}, | 
1898  |  |     {"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, EmbeddedFile_cb}, | 
1899  |  |     {"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb}, | 
1900  |  |     {"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb}, | 
1901  |  |     {"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Image_cb}, | 
1902  |  |     {"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb}, | 
1903  |  |     {"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb}, | 
1904  |  |     {"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb}, | 
1905  |  |     {"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb}, | 
1906  |  |     {"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb}, | 
1907  |  |     {"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb}, | 
1908  |  |     {"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JBIG2Decode_cb}, | 
1909  |  |     {"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb}, | 
1910  |  |     {"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb}, | 
1911  |  |     {"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JPXDecode_cb}, | 
1912  |  |     {"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, Crypt_cb}, | 
1913  |  |     {"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, Standard_cb}, | 
1914  |  |     {"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, Sig_cb}, | 
1915  |  |     {"V", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL}, | 
1916  |  |     {"R", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL}, | 
1917  |  |     {"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC, NULL}, | 
1918  |  |     {"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC, NULL}, | 
1919  |  |     {"JavaScript", OBJ_JAVASCRIPT, STATE_ANY, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC, JavaScript_cb}, | 
1920  |  |     {"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, NULL}, | 
1921  |  |     {"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC, NULL}, | 
1922  |  |     {"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, NULL}, | 
1923  |  |     {"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC, OpenAction_cb}, | 
1924  |  |     {"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC, Launch_cb}, | 
1925  |  |     {"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Page_cb}, | 
1926  |  |     {"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC, NULL}, | 
1927  |  |     {"Author", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Author_cb}, | 
1928  |  |     {"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Producer_cb}, | 
1929  |  |     {"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, CreationDate_cb}, | 
1930  |  |     {"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, ModificationDate_cb}, | 
1931  |  |     {"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Creator_cb}, | 
1932  |  |     {"Title", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Title_cb}, | 
1933  |  |     {"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Keywords_cb}, | 
1934  |  |     {"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Subject_cb}, | 
1935  |  |     {"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Pages_cb}, | 
1936  |  |     {"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Colors_cb}, | 
1937  |  |     {"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, RichMedia_cb}, | 
1938  |  |     {"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, AcroForm_cb}, | 
1939  |  |     {"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, XFA_cb}}; | 
1940  |  |  | 
1941  | 2.32M  | #define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))  | 
1942  |  |  | 
1943  |  | static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const char *pdfname, int escapes, enum objstate *state)  | 
1944  | 9.07M  | { | 
1945  | 9.07M  |     struct pdfname_action *act = NULL;  | 
1946  | 9.07M  |     unsigned j;  | 
1947  |  |  | 
1948  | 9.07M  |     obj->statsflags |= OBJ_FLAG_PDFNAME_DONE;  | 
1949  |  |  | 
1950  | 374M  |     for (j = 0; j < sizeof(pdfname_actions) / sizeof(pdfname_actions[0]); j++) { | 
1951  | 368M  |         if (!strcmp(pdfname, pdfname_actions[j].pdfname)) { | 
1952  | 2.44M  |             act = &pdfname_actions[j];  | 
1953  | 2.44M  |             break;  | 
1954  | 2.44M  |         }  | 
1955  | 368M  |     }  | 
1956  |  |  | 
1957  | 9.07M  |     if (!act) { | 
1958  |  |         /* these are digital signature objects, filter doesn't matter,  | 
1959  |  |          * we don't need them anyway */  | 
1960  | 6.63M  |         if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) { | 
1961  | 274k  |             cli_dbgmsg("handle_pdfname: unknown filter %s\n", pdfname); | 
1962  | 274k  |             obj->flags |= 1 << OBJ_FILTER_UNKNOWN;  | 
1963  | 274k  |         }  | 
1964  |  |  | 
1965  | 6.63M  |         return;  | 
1966  | 6.63M  |     }  | 
1967  |  |  | 
1968  |  |     /* record filter order */  | 
1969  | 2.44M  |     if (obj->numfilters < PDF_FILTERLIST_MAX && (*state == STATE_FILTER) && ((1 << act->set_objflag) & KNOWN_FILTERS))  | 
1970  | 298k  |         obj->filterlist[obj->numfilters++] = act->set_objflag;  | 
1971  |  |  | 
1972  | 2.44M  |     if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) { | 
1973  |  |         /* if a commonly used PDF name is escaped that is certainly  | 
1974  |  |            suspicious. */  | 
1975  | 1.16k  |         cli_dbgmsg("handle_pdfname: pdfname %s is escaped\n", pdfname); | 
1976  | 1.16k  |         pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);  | 
1977  | 1.16k  |     }  | 
1978  |  |  | 
1979  | 2.44M  |     if ((act->pdf_stats_cb))  | 
1980  | 1.12M  |         act->pdf_stats_cb(pdf, obj, act);  | 
1981  |  |  | 
1982  | 2.44M  |     if (act->from_state == *state || act->from_state == STATE_ANY) { | 
1983  | 2.15M  |         *state = act->to_state;  | 
1984  |  |  | 
1985  | 2.15M  |         if (*state == STATE_FILTER && act->set_objflag != OBJ_DICT && (obj->flags & (1 << act->set_objflag))) { | 
1986  | 70.6k  |             cli_dbgmsg("handle_pdfname: duplicate stream filter %s\n", pdfname); | 
1987  | 70.6k  |             pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);  | 
1988  | 70.6k  |         }  | 
1989  |  |  | 
1990  | 2.15M  |         obj->flags |= 1 << act->set_objflag;  | 
1991  | 2.15M  |     } else { | 
1992  |  |         /* auto-reset states */  | 
1993  | 282k  |         switch (*state) { | 
1994  | 8.02k  |             case STATE_S:  | 
1995  | 8.02k  |                 *state = STATE_NONE;  | 
1996  | 8.02k  |                 break;  | 
1997  | 274k  |             default:  | 
1998  | 274k  |                 break;  | 
1999  | 282k  |         }  | 
2000  | 282k  |     }  | 
2001  | 2.44M  | }  | 
2002  |  |  | 
2003  |  | static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)  | 
2004  | 54.5k  | { | 
2005  | 54.5k  |     const char *q, *q2;  | 
2006  | 54.5k  |     unsigned long objid;  | 
2007  | 54.5k  |     unsigned long genid;  | 
2008  | 54.5k  |     long temp_long;  | 
2009  |  |  | 
2010  | 54.5k  |     if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) { | 
2011  | 3.43k  |         q = cli_memstr(enc + 16, len - 16, "/Encrypt", 8);  | 
2012  | 3.43k  |         if (!q)  | 
2013  | 925  |             return;  | 
2014  |  |  | 
2015  | 2.51k  |         len -= q - enc;  | 
2016  | 2.51k  |         enc = q;  | 
2017  | 2.51k  |     }  | 
2018  |  |  | 
2019  | 53.6k  |     q = enc + 8;  | 
2020  | 53.6k  |     len -= 8;  | 
2021  | 53.6k  |     q2 = pdf_nextobject(q, len);  | 
2022  | 53.6k  |     if (!q2 || !isdigit(*q2))  | 
2023  | 6.13k  |         return;  | 
2024  | 47.5k  |     len -= q2 - q;  | 
2025  | 47.5k  |     q = q2;  | 
2026  |  |  | 
2027  | 47.5k  |     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, &temp_long)) { | 
2028  | 206  |         cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse objid\n"); | 
2029  | 206  |         return;  | 
2030  | 47.3k  |     } else if (temp_long < 0) { | 
2031  | 0  |         cli_dbgmsg("pdf_parse_encrypt: Encountered invalid negative objid (%ld).\n", temp_long); | 
2032  | 0  |         return;  | 
2033  | 0  |     }  | 
2034  | 47.3k  |     objid = (unsigned long)temp_long;  | 
2035  |  |  | 
2036  | 47.3k  |     objid = objid << 8;  | 
2037  | 47.3k  |     q2    = pdf_nextobject(q, len);  | 
2038  | 47.3k  |     if (!q2 || !isdigit(*q2))  | 
2039  | 3.54k  |         return;  | 
2040  | 43.7k  |     len -= q2 - q;  | 
2041  | 43.7k  |     q = q2;  | 
2042  |  |  | 
2043  | 43.7k  |     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)len, 0, 10, &temp_long)) { | 
2044  | 333  |         cli_dbgmsg("pdf_parse_encrypt: Found Encrypt dictionary but failed to parse genid\n"); | 
2045  | 333  |         return;  | 
2046  | 43.4k  |     } else if (temp_long < 0) { | 
2047  | 0  |         cli_dbgmsg("pdf_parse_encrypt: Encountered invalid negative genid (%ld).\n", temp_long); | 
2048  | 0  |         return;  | 
2049  | 0  |     }  | 
2050  | 43.4k  |     genid = (unsigned long)temp_long;  | 
2051  |  |  | 
2052  | 43.4k  |     objid |= genid & 0xff;  | 
2053  | 43.4k  |     q2 = pdf_nextobject(q, len);  | 
2054  | 43.4k  |     if (!q2 || *q2 != 'R')  | 
2055  | 3.79k  |         return;  | 
2056  |  |  | 
2057  | 39.6k  |     cli_dbgmsg("pdf_parse_encrypt: Encrypt dictionary in obj %lu %lu\n", objid >> 8, objid & 0xff); | 
2058  |  |  | 
2059  | 39.6k  |     pdf->enc_objid = objid;  | 
2060  | 39.6k  | }  | 
2061  |  |  | 
2062  |  | static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)  | 
2063  | 86.1k  | { | 
2064  | 86.1k  |     const char *enc;  | 
2065  |  |  | 
2066  | 86.1k  |     enc = cli_memstr(s, length, "/Encrypt", 8);  | 
2067  | 86.1k  |     if (enc) { | 
2068  | 54.5k  |         char *newID;  | 
2069  | 54.5k  |         unsigned int newIDlen = 0;  | 
2070  |  |  | 
2071  | 54.5k  |         pdf->flags |= 1 << ENCRYPTED_PDF;  | 
2072  | 54.5k  |         pdf_parse_encrypt(pdf, enc, s + length - enc);  | 
2073  | 54.5k  |         newID = pdf_readstring(s, length, "/ID", &newIDlen, NULL, false);  | 
2074  |  |  | 
2075  | 54.5k  |         if (newID) { | 
2076  | 40.5k  |             free(pdf->fileID);  | 
2077  | 40.5k  |             pdf->fileID    = newID;  | 
2078  | 40.5k  |             pdf->fileIDlen = newIDlen;  | 
2079  | 40.5k  |         }  | 
2080  | 54.5k  |     }  | 
2081  | 86.1k  | }  | 
2082  |  |  | 
2083  |  | void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)  | 
2084  | 1.59M  | { | 
2085  |  |     /* enough to hold common pdf names, we don't need all the names */  | 
2086  | 1.59M  |     char pdfname[64];  | 
2087  | 1.59M  |     const char *q2, *q3;  | 
2088  | 1.59M  |     const char *nextobj = NULL, *nextopen = NULL, *nextclose = NULL;  | 
2089  | 1.59M  |     const char *q    = NULL;  | 
2090  | 1.59M  |     const char *dict = NULL, *enddict = NULL, *start = NULL;  | 
2091  | 1.59M  |     off_t dict_length = 0, full_dict_length = 0, bytesleft = 0;  | 
2092  | 1.59M  |     size_t i         = 0;  | 
2093  | 1.59M  |     unsigned filters = 0, blockopens = 0;  | 
2094  | 1.59M  |     enum objstate objstate = STATE_NONE;  | 
2095  |  |  | 
2096  | 1.59M  |     json_object *pdfobj = NULL, *jsonobj = NULL;  | 
2097  |  |  | 
2098  | 1.59M  |     if (NULL == pdf || NULL == obj) { | 
2099  | 0  |         cli_warnmsg("pdf_parseobj: invalid arguments\n"); | 
2100  | 0  |         return;  | 
2101  | 0  |     }  | 
2102  |  |  | 
2103  | 1.59M  |     cli_dbgmsg("pdf_parseobj: Parsing object %u %u\n", obj->id >> 8, obj->id & 0xff); | 
2104  |  |  | 
2105  | 1.59M  |     if (obj->objstm) { | 
2106  | 239k  |         if ((size_t)obj->start > obj->objstm->streambuf_len) { | 
2107  | 0  |             cli_dbgmsg("pdf_parseobj: %u %u obj: obj start (%u) is greater than size of object stream (%zu).\n", | 
2108  | 0  |                        obj->id >> 8, obj->id & 0xff, obj->start, obj->objstm->streambuf_len);  | 
2109  | 0  |             return;  | 
2110  | 0  |         }  | 
2111  | 239k  |         q = (const char *)(obj->start + obj->objstm->streambuf);  | 
2112  | 1.35M  |     } else { | 
2113  | 1.35M  |         if ((size_t)obj->start > pdf->size) { | 
2114  | 0  |             cli_dbgmsg("pdf_parseobj: %u %u obj: obj start (%u) is greater than size of PDF (%lld).\n", | 
2115  | 0  |                        obj->id >> 8, obj->id & 0xff, obj->start, (long long)pdf->size);  | 
2116  | 0  |             return;  | 
2117  | 0  |         }  | 
2118  | 1.35M  |         q = (const char *)(obj->start + pdf->map);  | 
2119  | 1.35M  |     }  | 
2120  | 1.59M  |     start = q;  | 
2121  |  |  | 
2122  | 1.59M  |     if (obj->size <= 0)  | 
2123  | 2.04k  |         return;  | 
2124  |  |  | 
2125  | 1.58M  |     if (obj->objstm) { | 
2126  | 239k  |         bytesleft = MIN(obj->size, obj->objstm->streambuf_len - obj->start);  | 
2127  | 1.35M  |     } else { | 
2128  | 1.35M  |         bytesleft = MIN(obj->size, pdf->size - obj->start);  | 
2129  | 1.35M  |     }  | 
2130  |  |  | 
2131  |  |     /* For objects that aren't already in an object stream^, check if they contain a stream.  | 
2132  |  |      * ^Objects in object streams aren't supposed to contain streams, so we don't check them. */  | 
2133  | 1.58M  |     if (NULL == obj->objstm) { | 
2134  |  |         /* Check if object contains stream */  | 
2135  | 1.35M  |         cl_error_t has_stream;  | 
2136  | 1.35M  |         const char *stream = NULL;  | 
2137  | 1.35M  |         size_t stream_size = 0;  | 
2138  |  |  | 
2139  | 1.35M  |         has_stream = find_stream_bounds(  | 
2140  | 1.35M  |             start,  | 
2141  | 1.35M  |             obj->size,  | 
2142  | 1.35M  |             &stream,  | 
2143  | 1.35M  |             &stream_size,  | 
2144  | 1.35M  |             (pdf->enc_method_stream <= ENC_IDENTITY) && (pdf->enc_method_embeddedfile <= ENC_IDENTITY));  | 
2145  |  |  | 
2146  | 1.35M  |         if ((CL_SUCCESS == has_stream) ||  | 
2147  | 1.35M  |             (CL_EFORMAT == has_stream)) { | 
2148  |  |             /* Stream found. Store this fact and the stream bounds. */  | 
2149  | 712k  |             cli_dbgmsg("pdf_parseobj: %u %u contains stream, size: %zu\n", obj->id >> 8, obj->id & 0xff, stream_size); | 
2150  | 712k  |             obj->flags |= (1 << OBJ_STREAM);  | 
2151  | 712k  |             obj->stream      = stream;  | 
2152  | 712k  |             obj->stream_size = stream_size;  | 
2153  | 712k  |         }  | 
2154  | 1.35M  |     }  | 
2155  |  |  | 
2156  |  |     /* find start of dictionary */  | 
2157  | 12.5M  |     do { | 
2158  | 12.5M  |         nextobj = pdf_nextobject(q, bytesleft);  | 
2159  | 12.5M  |         bytesleft -= nextobj - q;  | 
2160  |  |  | 
2161  | 12.5M  |         if (!nextobj || bytesleft < 0) { | 
2162  | 308k  |             cli_dbgmsg("pdf_parseobj: %u %u obj: no dictionary\n", obj->id >> 8, obj->id & 0xff); | 
2163  |  |  | 
2164  | 308k  |             if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { | 
2165  | 308k  |                 pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
2166  | 308k  |                 if (!(pdfobj))  | 
2167  | 0  |                     return;  | 
2168  | 308k  |             }  | 
2169  |  |  | 
2170  | 308k  |             if (pdfobj) { | 
2171  | 308k  |                 if (!(jsonobj))  | 
2172  | 308k  |                     jsonobj = cli_jsonarray(pdfobj, "ObjectsWithoutDictionaries");  | 
2173  | 308k  |                 if (jsonobj)  | 
2174  | 308k  |                     cli_jsonint_array(jsonobj, obj->id >> 8);  | 
2175  | 308k  |             }  | 
2176  |  |  | 
2177  | 308k  |             return;  | 
2178  | 308k  |         }  | 
2179  |  |  | 
2180  |  |         /*  | 
2181  |  |          * Opening `<` for object's dictionary may be back 1 character,  | 
2182  |  |          * provided q is not at the start of the buffer (it shouldn't be).  | 
2183  |  |          */  | 
2184  | 12.2M  |         if (obj->objstm) { | 
2185  | 2.73M  |             if (obj->objstm->streambuf == q) { | 
2186  | 0  |                 q3 = memchr(q, '<', nextobj - q);  | 
2187  | 2.73M  |             } else { | 
2188  | 2.73M  |                 q3 = memchr(q - 1, '<', nextobj - q + 1);  | 
2189  | 2.73M  |             }  | 
2190  | 9.48M  |         } else { | 
2191  | 9.48M  |             if (pdf->map == q) { | 
2192  | 0  |                 q3 = memchr(q, '<', nextobj - q);  | 
2193  | 9.48M  |             } else { | 
2194  | 9.48M  |                 q3 = memchr(q - 1, '<', nextobj - q + 1);  | 
2195  | 9.48M  |             }  | 
2196  | 9.48M  |         }  | 
2197  | 12.2M  |         nextobj++;  | 
2198  | 12.2M  |         bytesleft--;  | 
2199  | 12.2M  |         q = nextobj;  | 
2200  | 12.2M  |     } while (!q3 || q3[1] != '<');  | 
2201  | 1.28M  |     dict = q3 + 2;  | 
2202  | 1.28M  |     q    = dict;  | 
2203  | 1.28M  |     blockopens++;  | 
2204  | 1.28M  |     bytesleft = obj->size - (q - start);  | 
2205  | 1.28M  |     enddict   = q + bytesleft - 1;  | 
2206  |  |  | 
2207  |  |     /* find end of dictionary block */  | 
2208  | 1.28M  |     if (bytesleft < 0) { | 
2209  | 0  |         cli_dbgmsg("pdf_parseobj: %u %u obj: broken dictionary\n", obj->id >> 8, obj->id & 0xff); | 
2210  |  | 
  | 
2211  | 0  |         if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { | 
2212  | 0  |             pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
2213  | 0  |             if (!(pdfobj))  | 
2214  | 0  |                 return;  | 
2215  | 0  |         }  | 
2216  |  |  | 
2217  | 0  |         if (pdfobj) { | 
2218  | 0  |             if (!(jsonobj))  | 
2219  | 0  |                 jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");  | 
2220  | 0  |             if (jsonobj)  | 
2221  | 0  |                 cli_jsonint_array(jsonobj, obj->id >> 8);  | 
2222  | 0  |         }  | 
2223  |  | 
  | 
2224  | 0  |         return;  | 
2225  | 0  |     }  | 
2226  |  |  | 
2227  |  |     /* while still looking ... */  | 
2228  | 4.49M  |     while ((q < enddict - 1) && (blockopens > 0)) { | 
2229  |  |         /* find next close */  | 
2230  | 3.42M  |         nextclose = memchr(q, '>', enddict - q);  | 
2231  | 3.42M  |         if (nextclose && (nextclose[1] == '>')) { | 
2232  |  |             /* check for nested open */  | 
2233  | 4.11M  |             while ((nextopen = memchr(q - 1, '<', nextclose - q + 1)) != NULL) { | 
2234  | 2.65M  |                 if (nextopen[1] == '<') { | 
2235  |  |                     /* nested open */  | 
2236  | 617k  |                     blockopens++;  | 
2237  | 617k  |                     q = nextopen + 2;  | 
2238  | 2.03M  |                 } else { | 
2239  |  |                     /* unmatched < before next close */  | 
2240  | 2.03M  |                     q = nextopen + 2;  | 
2241  | 2.03M  |                 }  | 
2242  | 2.65M  |             }  | 
2243  |  |             /* close block */  | 
2244  | 1.46M  |             blockopens--;  | 
2245  | 1.46M  |             q = nextclose + 2;  | 
2246  | 1.95M  |         } else if (nextclose) { | 
2247  |  |             /* found one > but not two */  | 
2248  | 1.74M  |             q = nextclose + 2;  | 
2249  | 1.74M  |         } else { | 
2250  |  |             /* next closing not found */  | 
2251  | 211k  |             break;  | 
2252  | 211k  |         }  | 
2253  | 3.42M  |     }  | 
2254  |  |  | 
2255  |  |     /* Was end of dictionary found? */  | 
2256  | 1.28M  |     if (blockopens) { | 
2257  |  |         /* probably truncated */  | 
2258  | 273k  |         cli_dbgmsg("pdf_parseobj: %u %u obj broken dictionary\n", obj->id >> 8, obj->id & 0xff); | 
2259  |  |  | 
2260  | 273k  |         if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) { | 
2261  | 273k  |             pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
2262  | 273k  |             if (!(pdfobj))  | 
2263  | 0  |                 return;  | 
2264  | 273k  |         }  | 
2265  |  |  | 
2266  | 273k  |         if (pdfobj) { | 
2267  | 273k  |             if (!(jsonobj))  | 
2268  | 273k  |                 jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");  | 
2269  | 273k  |             if (jsonobj)  | 
2270  | 273k  |                 cli_jsonint_array(jsonobj, obj->id >> 8);  | 
2271  | 273k  |         }  | 
2272  |  |  | 
2273  | 273k  |         return;  | 
2274  | 273k  |     }  | 
2275  |  |  | 
2276  | 1.00M  |     enddict = nextclose;  | 
2277  | 1.00M  |     obj->flags |= 1 << OBJ_DICT;  | 
2278  | 1.00M  |     full_dict_length = dict_length = enddict - dict;  | 
2279  |  |  | 
2280  |  |     /* This code prints the dictionary content.  | 
2281  |  |     { | 
2282  |  |         char * dictionary = malloc(dict_length + 1);  | 
2283  |  |         if (dictionary) { | 
2284  |  |             for (i = 0; i < dict_length; i++) { | 
2285  |  |                 if (dict[i] == '\r')  | 
2286  |  |                     dictionary[i] = '\n';  | 
2287  |  |                 else if (isprint(dict[i]) || isspace(dict[i]))  | 
2288  |  |                     dictionary[i] = dict[i];  | 
2289  |  |                 else  | 
2290  |  |                     dictionary[i] = '*';  | 
2291  |  |             }  | 
2292  |  |             dictionary[dict_length] = '\0';  | 
2293  |  |             cli_dbgmsg("pdf_parseobj: dictionary is <<%s>>\n", dictionary); | 
2294  |  |             free(dictionary);  | 
2295  |  |         }  | 
2296  |  |     }  | 
2297  |  |     */  | 
2298  |  |  | 
2299  |  |     /*  process pdf names */  | 
2300  | 10.0M  |     for (q = dict; dict_length > 0;) { | 
2301  | 9.91M  |         int escapes = 0, breakout = 0;  | 
2302  | 9.91M  |         q2 = memchr(q, '/', dict_length);  | 
2303  | 9.91M  |         if (!q2)  | 
2304  | 840k  |             break;  | 
2305  |  |  | 
2306  | 9.07M  |         dict_length -= q2 - q;  | 
2307  | 9.07M  |         q = q2;  | 
2308  |  |         /* normalize PDF names */  | 
2309  | 94.8M  |         for (i = 0; dict_length > 0 && (i < sizeof(pdfname) - 1); i++) { | 
2310  | 94.6M  |             q++;  | 
2311  | 94.6M  |             dict_length--;  | 
2312  |  |  | 
2313  | 94.6M  |             if (*q == '#') { | 
2314  | 280k  |                 if (cli_hex2str_to(q + 1, pdfname + i, 2) == -1)  | 
2315  | 227k  |                     break;  | 
2316  |  |  | 
2317  | 52.2k  |                 q += 2;  | 
2318  | 52.2k  |                 dict_length -= 2;  | 
2319  | 52.2k  |                 escapes = 1;  | 
2320  | 52.2k  |                 continue;  | 
2321  | 280k  |             }  | 
2322  |  |  | 
2323  | 94.3M  |             switch (*q) { | 
2324  | 2.81M  |                 case ' ':  | 
2325  | 2.90M  |                 case '\t':  | 
2326  | 3.14M  |                 case '\r':  | 
2327  | 3.52M  |                 case '\n':  | 
2328  | 6.63M  |                 case '/':  | 
2329  | 7.39M  |                 case '>':  | 
2330  | 7.71M  |                 case '[':  | 
2331  | 7.88M  |                 case ']':  | 
2332  | 8.38M  |                 case '<':  | 
2333  | 8.66M  |                 case '(': | 
2334  | 8.66M  |                     breakout = 1;  | 
2335  | 94.3M  |             }  | 
2336  |  |  | 
2337  | 94.3M  |             if (breakout)  | 
2338  | 8.66M  |                 break;  | 
2339  |  |  | 
2340  | 85.7M  |             pdfname[i] = *q;  | 
2341  | 85.7M  |         }  | 
2342  |  |  | 
2343  | 9.07M  |         pdfname[i] = '\0';  | 
2344  |  |  | 
2345  | 9.07M  |         handle_pdfname(pdf, obj, pdfname, escapes, &objstate);  | 
2346  | 9.07M  |         if (objstate == STATE_LINEARIZED) { | 
2347  | 55.6k  |             long trailer_end, trailer;  | 
2348  |  |  | 
2349  | 55.6k  |             pdfobj_flag(pdf, obj, LINEARIZED_PDF);  | 
2350  | 55.6k  |             objstate    = STATE_NONE;  | 
2351  | 55.6k  |             trailer_end = pdf_readint(dict, full_dict_length, "/H");  | 
2352  | 55.6k  |             if ((trailer_end > 0) && ((size_t)trailer_end < pdf->size)) { | 
2353  | 26.6k  |                 trailer = trailer_end - 1024;  | 
2354  | 26.6k  |                 if (trailer < 0)  | 
2355  | 25.7k  |                     trailer = 0;  | 
2356  |  |  | 
2357  | 26.6k  |                 q2 = pdf->map + trailer;  | 
2358  | 26.6k  |                 cli_dbgmsg("pdf_parseobj: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end); | 
2359  | 26.6k  |                 pdf_parse_trailer(pdf, q2, trailer_end - trailer);  | 
2360  | 26.6k  |                 if (pdf->fileID)  | 
2361  | 13.5k  |                     cli_dbgmsg("pdf_parseobj: found fileID\n"); | 
2362  | 26.6k  |             }  | 
2363  | 55.6k  |         }  | 
2364  |  |  | 
2365  | 9.07M  |         if (objstate == STATE_LAUNCHACTION)  | 
2366  | 97.8k  |             pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);  | 
2367  | 9.07M  |         if (dict_length > 0 && (objstate == STATE_JAVASCRIPT || objstate == STATE_OPENACTION || objstate == STATE_CONTENTS)) { | 
2368  | 425k  |             off_t dict_remaining = dict_length;  | 
2369  |  |  | 
2370  | 425k  |             if (objstate == STATE_OPENACTION)  | 
2371  | 304k  |                 pdfobj_flag(pdf, obj, HAS_OPENACTION);  | 
2372  |  |  | 
2373  | 425k  |             q2 = pdf_nextobject(q, dict_remaining);  | 
2374  | 425k  |             if (q2 && isdigit(*q2)) { | 
2375  | 360k  |                 const char *q2_old = NULL;  | 
2376  | 360k  |                 unsigned long objid;  | 
2377  | 360k  |                 unsigned long genid;  | 
2378  | 360k  |                 long temp_long;  | 
2379  |  |  | 
2380  | 360k  |                 dict_remaining -= (off_t)(q2 - q);  | 
2381  |  |  | 
2382  | 360k  |                 if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, &temp_long)) { | 
2383  | 3.04k  |                     cli_dbgmsg("pdf_parseobj: failed to parse object objid\n"); | 
2384  | 3.04k  |                     return;  | 
2385  | 357k  |                 } else if (temp_long < 0) { | 
2386  | 0  |                     cli_dbgmsg("pdf_parseobj: Encountered invalid negative genid (%ld).\n", temp_long); | 
2387  | 0  |                     return;  | 
2388  | 0  |                 }  | 
2389  | 357k  |                 objid = (unsigned long)temp_long;  | 
2390  |  |  | 
2391  | 357k  |                 objid = objid << 8;  | 
2392  |  |  | 
2393  | 799k  |                 while ((dict_remaining > 0) && isdigit(*q2)) { | 
2394  | 442k  |                     q2++;  | 
2395  | 442k  |                     dict_remaining--;  | 
2396  | 442k  |                 }  | 
2397  |  |  | 
2398  | 357k  |                 q2_old = q2;  | 
2399  | 357k  |                 q2     = pdf_nextobject(q2, dict_remaining);  | 
2400  | 357k  |                 if (q2 && isdigit(*q2)) { | 
2401  | 324k  |                     dict_remaining -= (off_t)(q2 - q2_old);  | 
2402  | 324k  |                     if (CL_SUCCESS != cli_strntol_wrap(q2, (size_t)dict_remaining, 0, 10, &temp_long)) { | 
2403  | 1.06k  |                         cli_dbgmsg("pdf_parseobj: failed to parse object genid\n"); | 
2404  | 1.06k  |                         return;  | 
2405  | 323k  |                     } else if (temp_long < 0) { | 
2406  | 0  |                         cli_dbgmsg("pdf_parseobj: Encountered invalid negative genid (%ld).\n", temp_long); | 
2407  | 0  |                         return;  | 
2408  | 0  |                     }  | 
2409  | 323k  |                     genid = (unsigned long)temp_long;  | 
2410  |  |  | 
2411  | 323k  |                     objid |= genid & 0xff;  | 
2412  |  |  | 
2413  | 323k  |                     q2 = pdf_nextobject(q2, dict_remaining);  | 
2414  | 323k  |                     if (q2 && *q2 == 'R') { | 
2415  | 285k  |                         struct pdf_obj *obj2;  | 
2416  |  |  | 
2417  | 285k  |                         cli_dbgmsg("pdf_parseobj: found %s stored in indirect object %lu %lu\n", pdfname, objid >> 8, objid & 0xff); | 
2418  | 285k  |                         obj2 = find_obj(pdf, obj, objid);  | 
2419  | 285k  |                         if (obj2) { | 
2420  | 10.6k  |                             enum pdf_objflags flag = OBJ_STREAM;  | 
2421  |  |  | 
2422  | 10.6k  |                             switch (objstate) { | 
2423  | 985  |                                 case STATE_JAVASCRIPT:  | 
2424  | 985  |                                     flag = OBJ_JAVASCRIPT;  | 
2425  | 985  |                                     break;  | 
2426  | 1.41k  |                                 case STATE_OPENACTION:  | 
2427  | 1.41k  |                                     flag = OBJ_OPENACTION;  | 
2428  | 1.41k  |                                     break;  | 
2429  | 8.23k  |                                 case STATE_CONTENTS:  | 
2430  | 8.23k  |                                     flag = OBJ_CONTENTS;  | 
2431  | 8.23k  |                                     break;  | 
2432  | 0  |                                 default:  | 
2433  | 0  |                                     cli_dbgmsg("pdf_parseobj: Unexpected object type\n"); | 
2434  | 0  |                                     return;  | 
2435  | 10.6k  |                             }  | 
2436  |  |  | 
2437  | 10.6k  |                             obj->flags &= ~(1 << flag); /* Disable flag for current object ...                   */  | 
2438  | 10.6k  |                             obj2->flags |= 1 << flag;   /* ... and set the flag for the indirect object instead! */  | 
2439  | 275k  |                         } else { | 
2440  | 275k  |                             pdfobj_flag(pdf, obj, BAD_INDOBJ);  | 
2441  | 275k  |                         }  | 
2442  | 285k  |                     }  | 
2443  | 323k  |                 }  | 
2444  | 357k  |             }  | 
2445  |  |  | 
2446  | 421k  |             objstate = STATE_NONE;  | 
2447  | 421k  |         }  | 
2448  | 9.07M  |     }  | 
2449  |  |  | 
2450  | 47.1M  |     for (i = 0; i < sizeof(pdfname_actions) / sizeof(pdfname_actions[0]); i++) { | 
2451  | 46.1M  |         const struct pdfname_action *act = &pdfname_actions[i];  | 
2452  |  |  | 
2453  | 46.1M  |         if ((obj->flags & (1 << act->set_objflag)) &&  | 
2454  | 46.1M  |             act->from_state == STATE_FILTER &&  | 
2455  | 46.1M  |             act->to_state == STATE_FILTER &&  | 
2456  | 46.1M  |             act->set_objflag != OBJ_FILTER_CRYPT &&  | 
2457  | 46.1M  |             act->set_objflag != OBJ_FILTER_STANDARD) { | 
2458  | 534k  |             filters++;  | 
2459  | 534k  |         }  | 
2460  | 46.1M  |     }  | 
2461  |  |  | 
2462  | 1.00M  |     if (filters > 2) { | 
2463  |  |         /* more than 2 non-crypt filters */  | 
2464  | 24.9k  |         pdfobj_flag(pdf, obj, MANY_FILTERS);  | 
2465  | 24.9k  |     }  | 
2466  |  |  | 
2467  | 1.00M  |     if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS))  | 
2468  | 305k  |         obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);  | 
2469  |  |  | 
2470  | 1.00M  |     if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))  | 
2471  | 21.4k  |         pdfobj_flag(pdf, obj, UNKNOWN_FILTER);  | 
2472  |  |  | 
2473  | 1.00M  |     cli_dbgmsg("pdf_parseobj: %u %u obj flags: %02x\n", obj->id >> 8, obj->id & 0xff, obj->flags); | 
2474  | 1.00M  | }  | 
2475  |  |  | 
2476  |  | /**  | 
2477  |  |  * @brief   Given a pointer to a dictionary object and a key, get the key's value.  | 
2478  |  |  *  | 
2479  |  |  * @param q0            Offset of the start of the dictionary.  | 
2480  |  |  * @param[in,out] len   In: The number of bytes in the dictionary.  | 
2481  |  |  *                      Out: The number of bytes remaining from the start  | 
2482  |  |  *                           of the value to the end of the dict  | 
2483  |  |  * @param key           Null terminated 'key' to search for.  | 
2484  |  |  * @return const char*  Address of the dictionary key's 'value'.  | 
2485  |  |  */  | 
2486  |  | static const char *pdf_getdict(const char *q0, int *len, const char *key)  | 
2487  | 2.44M  | { | 
2488  | 2.44M  |     const char *q;  | 
2489  |  |  | 
2490  | 2.44M  |     if (*len <= 0) { | 
2491  | 1.20k  |         cli_dbgmsg("pdf_getdict: bad length %d\n", *len); | 
2492  | 1.20k  |         return NULL;  | 
2493  | 1.20k  |     }  | 
2494  |  |  | 
2495  | 2.44M  |     if (!q0)  | 
2496  | 1.67k  |         return NULL;  | 
2497  |  |  | 
2498  |  |     /* find the key */  | 
2499  | 2.44M  |     q = cli_memstr(q0, *len, key, strlen(key));  | 
2500  | 2.44M  |     if (!q) { | 
2501  | 1.79M  |         cli_dbgmsg("pdf_getdict: %s not found in dict\n", key); | 
2502  | 1.79M  |         return NULL;  | 
2503  | 1.79M  |     }  | 
2504  |  |  | 
2505  | 655k  |     *len -= q - q0;  | 
2506  | 655k  |     q0 = q;  | 
2507  |  |  | 
2508  |  |     /* find the start of the value object */  | 
2509  | 655k  |     q = pdf_nextobject(q0 + 1, *len - 1);  | 
2510  | 655k  |     if (!q) { | 
2511  | 881  |         cli_dbgmsg("pdf_getdict: %s is invalid in dict\n", key); | 
2512  | 881  |         return NULL;  | 
2513  | 881  |     }  | 
2514  |  |  | 
2515  |  |     /* if the value is a dictionary object, include the < > brackets.*/  | 
2516  | 948k  |     while (q > q0 && (q[-1] == '<' || q[-1] == '\n'))  | 
2517  | 293k  |         q--;  | 
2518  |  |  | 
2519  | 654k  |     *len -= q - q0;  | 
2520  | 654k  |     return q;  | 
2521  | 655k  | }  | 
2522  |  |  | 
2523  |  | /**  | 
2524  |  |  * @brief Read the value string from a PDF dictionary key/value pair.  | 
2525  |  |  *  | 
2526  |  |  * @param q0            A pointer into the PDF dictionary.  | 
2527  |  |  * @param len           The bytes remaining in the file.  | 
2528  |  |  * @param key           The key we're looking for.  | 
2529  |  |  * @param [out] slen    The length of the output string  | 
2530  |  |  * @param [out] qend    The pointer we wound up at, after the end of the value.  | 
2531  |  |  * @param noescape      Select 'true' to ignore escape characters, 'false' to process them.  | 
2532  |  |  * @return char*  | 
2533  |  |  */  | 
2534  |  | static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, bool noescape)  | 
2535  | 119k  | { | 
2536  | 119k  |     char *s, *s0;  | 
2537  | 119k  |     const char *start, *q, *end;  | 
2538  | 119k  |     if (slen)  | 
2539  | 101k  |         *slen = 0;  | 
2540  |  |  | 
2541  | 119k  |     if (qend)  | 
2542  | 18.6k  |         *qend = q0;  | 
2543  |  |  | 
2544  | 119k  |     q = pdf_getdict(q0, &len, key);  | 
2545  | 119k  |     if (!q || len <= 0)  | 
2546  | 17.5k  |         return NULL;  | 
2547  |  |  | 
2548  | 102k  |     if (*q == '(') { | 
2549  | 49.3k  |         int paren = 1;  | 
2550  | 49.3k  |         start     = ++q;  | 
2551  | 49.3k  |         len--;  | 
2552  | 18.6M  |         for (; paren > 0 && len > 0; q++, len--) { | 
2553  | 18.5M  |             switch (*q) { | 
2554  | 31.2k  |                 case '(': | 
2555  | 31.2k  |                     paren++;  | 
2556  | 31.2k  |                     break;  | 
2557  | 68.9k  |                 case ')':  | 
2558  | 68.9k  |                     paren--;  | 
2559  | 68.9k  |                     break;  | 
2560  | 175k  |                 case '\\':  | 
2561  | 175k  |                     q++;  | 
2562  | 175k  |                     len--;  | 
2563  | 175k  |                     break;  | 
2564  | 18.2M  |                 default:  | 
2565  | 18.2M  |                     break;  | 
2566  | 18.5M  |             }  | 
2567  | 18.5M  |         }  | 
2568  |  |  | 
2569  | 49.3k  |         if (len <= 0) { | 
2570  | 5.58k  |             cli_errmsg("pdf_readstring: Invalid, truncated dictionary.\n"); | 
2571  | 5.58k  |             return NULL;  | 
2572  | 5.58k  |         }  | 
2573  |  |  | 
2574  | 43.7k  |         if (qend)  | 
2575  | 8.50k  |             *qend = q;  | 
2576  |  |  | 
2577  | 43.7k  |         q--;  | 
2578  | 43.7k  |         len = q - start;  | 
2579  | 43.7k  |         s0 = s = cli_max_malloc(len + 1);  | 
2580  | 43.7k  |         if (!s) { | 
2581  | 0  |             cli_errmsg("pdf_readstring: Unable to allocate buffer\n"); | 
2582  | 0  |             return NULL;  | 
2583  | 0  |         }  | 
2584  |  |  | 
2585  | 43.7k  |         end = start + len;  | 
2586  | 43.7k  |         if (noescape) { | 
2587  | 5.55k  |             memcpy(s0, start, len);  | 
2588  | 5.55k  |             s = s0 + len;  | 
2589  | 38.2k  |         } else { | 
2590  | 10.9M  |             for (q = start; q < end; q++) { | 
2591  | 10.8M  |                 if (*q != '\\') { | 
2592  | 10.7M  |                     *s++ = *q;  | 
2593  | 10.7M  |                 } else { | 
2594  | 116k  |                     q++;  | 
2595  | 116k  |                     switch (*q) { | 
2596  | 14.0k  |                         case 'n':  | 
2597  | 14.0k  |                             *s++ = '\n';  | 
2598  | 14.0k  |                             break;  | 
2599  | 15.4k  |                         case 'r':  | 
2600  | 15.4k  |                             *s++ = '\r';  | 
2601  | 15.4k  |                             break;  | 
2602  | 167  |                         case 't':  | 
2603  | 167  |                             *s++ = '\t';  | 
2604  | 167  |                             break;  | 
2605  | 363  |                         case 'b':  | 
2606  | 363  |                             *s++ = '\b';  | 
2607  | 363  |                             break;  | 
2608  | 5.96k  |                         case 'f':  | 
2609  | 5.96k  |                             *s++ = '\f';  | 
2610  | 5.96k  |                             break;  | 
2611  | 11.2k  |                         case '(': /* fall-through */ | 
2612  | 22.2k  |                         case ')': /* fall-through */  | 
2613  | 43.3k  |                         case '\\':  | 
2614  | 43.3k  |                             *s++ = *q;  | 
2615  | 43.3k  |                             break;  | 
2616  | 217  |                         case '\n':  | 
2617  |  |                             /* ignore */  | 
2618  | 217  |                             break;  | 
2619  | 2.90k  |                         case '\r':  | 
2620  |  |                             /* ignore */  | 
2621  | 2.90k  |                             if (q + 1 < end && q[1] == '\n')  | 
2622  | 288  |                                 q++;  | 
2623  | 2.90k  |                             break;  | 
2624  | 2.86k  |                         case '0':  | 
2625  | 3.69k  |                         case '1':  | 
2626  | 9.02k  |                         case '2':  | 
2627  | 11.1k  |                         case '3':  | 
2628  | 12.1k  |                         case '4':  | 
2629  | 12.3k  |                         case '5':  | 
2630  | 12.8k  |                         case '6':  | 
2631  | 13.3k  |                         case '7':  | 
2632  | 14.1k  |                         case '8':  | 
2633  | 14.7k  |                         case '9':  | 
2634  |  |                             /* octal escape */  | 
2635  | 14.7k  |                             if (q + 2 < end) { | 
2636  | 14.6k  |                                 *s++ = 64 * (q[0] - '0') + 8 * (q[1] - '0') + (q[2] - '0');  | 
2637  | 14.6k  |                                 q += 2;  | 
2638  | 14.6k  |                             }  | 
2639  | 14.7k  |                             break;  | 
2640  | 18.8k  |                         default:  | 
2641  |  |                             /* ignore */  | 
2642  | 18.8k  |                             *s++ = '\\';  | 
2643  | 18.8k  |                             q--;  | 
2644  | 18.8k  |                             break;  | 
2645  | 116k  |                     }  | 
2646  | 116k  |                 }  | 
2647  | 10.8M  |             }  | 
2648  | 38.2k  |         }  | 
2649  |  |  | 
2650  | 43.7k  |         *s++ = '\0';  | 
2651  | 43.7k  |         if (slen)  | 
2652  | 35.2k  |             *slen = s - s0 - 1;  | 
2653  |  |  | 
2654  | 43.7k  |         return s0;  | 
2655  | 43.7k  |     }  | 
2656  |  |  | 
2657  | 52.8k  |     if ((*q == '<') && (len >= 3)) { | 
2658  | 49.1k  |         start = ++q;  | 
2659  | 49.1k  |         len -= 1;  | 
2660  |  |         // skip newlines after <  | 
2661  | 49.7k  |         while (len > 0 && *start == '\n') { | 
2662  | 512  |             start = ++q;  | 
2663  | 512  |             len -= 1;  | 
2664  | 512  |         }  | 
2665  | 49.1k  |         q = memchr(q + 1, '>', len - 1);  | 
2666  | 49.1k  |         if (!q)  | 
2667  | 341  |             return NULL;  | 
2668  |  |  | 
2669  | 48.8k  |         if (qend)  | 
2670  | 801  |             *qend = q;  | 
2671  |  |  | 
2672  | 48.8k  |         s = cli_max_malloc((q - start) / 2 + 1);  | 
2673  | 48.8k  |         if (s == NULL) { /* oops, couldn't allocate memory */ | 
2674  | 0  |             cli_dbgmsg("pdf_readstring: unable to allocate memory...\n"); | 
2675  | 0  |             return NULL;  | 
2676  | 0  |         }  | 
2677  |  |  | 
2678  | 48.8k  |         if (cli_hex2str_to(start, s, q - start)) { | 
2679  | 5.78k  |             cli_dbgmsg("pdf_readstring: %s has bad hex value\n", key); | 
2680  | 5.78k  |             free(s);  | 
2681  | 5.78k  |             return NULL;  | 
2682  | 5.78k  |         }  | 
2683  |  |  | 
2684  | 43.0k  |         s[(q - start) / 2] = '\0';  | 
2685  | 43.0k  |         if (slen)  | 
2686  | 42.6k  |             *slen = (q - start) / 2;  | 
2687  |  |  | 
2688  | 43.0k  |         return s;  | 
2689  | 48.8k  |     }  | 
2690  |  |  | 
2691  | 3.65k  |     cli_dbgmsg("pdf_readstring: %s is invalid string in dict\n", key); | 
2692  | 3.65k  |     return NULL;  | 
2693  | 52.8k  | }  | 
2694  |  |  | 
2695  |  | static char *pdf_readval(const char *q, int len, const char *key)  | 
2696  | 44.8k  | { | 
2697  | 44.8k  |     const char *end;  | 
2698  | 44.8k  |     char *s;  | 
2699  | 44.8k  |     int origlen = len;  | 
2700  |  |  | 
2701  | 44.8k  |     q = pdf_getdict(q, &len, key);  | 
2702  | 44.8k  |     if (!q || len <= 0)  | 
2703  | 19.5k  |         return NULL;  | 
2704  |  |  | 
2705  | 25.3k  |     while (len > 0 && *q && *q == ' ') { | 
2706  | 0  |         q++;  | 
2707  | 0  |         len--;  | 
2708  | 0  |     }  | 
2709  |  |  | 
2710  | 25.3k  |     if (*q != '/')  | 
2711  | 482  |         return NULL;  | 
2712  |  |  | 
2713  | 24.8k  |     q++;  | 
2714  | 24.8k  |     len--;  | 
2715  | 24.8k  |     end = q;  | 
2716  |  |  | 
2717  | 176k  |     while (len > 0 && *end && !(*end == '/' || (len > 1 && end[0] == '>' && end[1] == '>'))) { | 
2718  | 151k  |         end++;  | 
2719  | 151k  |         len--;  | 
2720  | 151k  |     }  | 
2721  |  |  | 
2722  |  |     /* end-of-buffer whitespace trimming */  | 
2723  | 25.4k  |     while (len < origlen && isspace(*(end - 1))) { | 
2724  | 602  |         end--;  | 
2725  | 602  |         len++;  | 
2726  | 602  |     }  | 
2727  |  |  | 
2728  | 24.8k  |     s = cli_max_malloc(end - q + 1);  | 
2729  | 24.8k  |     if (!s)  | 
2730  | 0  |         return NULL;  | 
2731  |  |  | 
2732  | 24.8k  |     memcpy(s, q, end - q);  | 
2733  | 24.8k  |     s[end - q] = '\0';  | 
2734  |  |  | 
2735  | 24.8k  |     return s;  | 
2736  | 24.8k  | }  | 
2737  |  |  | 
2738  |  | static int pdf_readint(const char *q0, int len, const char *key)  | 
2739  | 361k  | { | 
2740  | 361k  |     long value    = 0;  | 
2741  | 361k  |     const char *q = pdf_getdict(q0, &len, key);  | 
2742  |  |  | 
2743  | 361k  |     if (q == NULL) { | 
2744  | 49.7k  |         value = -1;  | 
2745  | 311k  |     } else if (CL_SUCCESS != cli_strntol_wrap(q, (size_t)len, 0, 10, &value)) { | 
2746  | 23.4k  |         value = -1;  | 
2747  | 23.4k  |     }  | 
2748  | 361k  |     return value;  | 
2749  | 361k  | }  | 
2750  |  |  | 
2751  |  | static int pdf_readbool(const char *q0, int len, const char *key, int Default)  | 
2752  | 10.5k  | { | 
2753  | 10.5k  |     const char *q = pdf_getdict(q0, &len, key);  | 
2754  |  |  | 
2755  | 10.5k  |     if (!q || len < 5)  | 
2756  | 10.4k  |         return Default;  | 
2757  |  |  | 
2758  | 94  |     if (!strncmp(q, "true", 4))  | 
2759  | 0  |         return 1;  | 
2760  |  |  | 
2761  | 94  |     if (!strncmp(q, "false", 5))  | 
2762  | 0  |         return 0;  | 
2763  |  |  | 
2764  | 94  |     cli_dbgmsg("pdf_readbool: invalid value for %s bool\n", key); | 
2765  |  |  | 
2766  | 94  |     return Default;  | 
2767  | 94  | }  | 
2768  |  |  | 
2769  |  | static const char *key_padding =  | 
2770  |  |     "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"  | 
2771  |  |     "\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";  | 
2772  |  |  | 
2773  |  | static void dbg_printhex(const char *msg, const char *hex, unsigned len)  | 
2774  | 75.6k  | { | 
2775  | 75.6k  |     if (cli_debug_flag) { | 
2776  | 0  |         char *kh = cli_str2hex(hex, len);  | 
2777  |  | 
  | 
2778  | 0  |         cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh); | 
2779  |  | 
  | 
2780  | 0  |         free(kh);  | 
2781  | 0  |     }  | 
2782  | 75.6k  | }  | 
2783  |  |  | 
2784  |  | /**  | 
2785  |  |  * @brief Compute the hash of the password concatenated with the validation salt and (for owner-password checks) the U string.  | 
2786  |  |  *  | 
2787  |  |  * Some details and comments for how to compute this hash comes from the PyPDF project:  | 
2788  |  |  * https://github.com/py-pdf/pypdf/blob/3.17.4/pypdf/_encryption.py#L568  | 
2789  |  |  *  | 
2790  |  |  * @param password  The password to hash.  | 
2791  |  |  * @param pwlen     The length of the password.  | 
2792  |  |  * @param salt      The validation salt.  | 
2793  |  |  * @param hash      The resulting hash.  | 
2794  |  |  * @param U         [Optional] The U string (for owner-password checks).  | 
2795  |  |  */  | 
2796  |  | static void compute_hash_r6(const char *password, size_t pwlen, const unsigned char salt[16], unsigned char hash[32], const char *U)  | 
2797  | 9.81k  | { | 
2798  | 9.81k  |     unsigned char data[(128 + 64 + 48) * 64];  | 
2799  | 9.81k  |     unsigned char block[64];  | 
2800  | 9.81k  |     int32_t block_size = 32;  | 
2801  | 9.81k  |     size_t in_data_len = 0, out_data_len;  | 
2802  | 9.81k  |     int32_t i, j, sum;  | 
2803  | 9.81k  |     uint8_t sha256[32], sha384[48], sha512[64];  | 
2804  |  |  | 
2805  |  |     /*  | 
2806  |  |      * Compute a SHA-256 hash of the UTF-8 password concatenated with the 8 bytes of the owner or user validation salt.  | 
2807  |  |      */  | 
2808  | 9.81k  |     memcpy(data, password, pwlen);  | 
2809  | 9.81k  |     memcpy(data + pwlen, salt, 8);  | 
2810  |  |  | 
2811  | 9.81k  |     if (NULL != U) { | 
2812  |  |         // If it's for the owner password check, we also concatenate the 48-byte U string.  | 
2813  | 3.47k  |         memcpy(data + pwlen + 8, U, 48);  | 
2814  |  |  | 
2815  | 3.47k  |         cl_sha256(data, pwlen + 8 + 48, block, NULL);  | 
2816  | 6.34k  |     } else { | 
2817  | 6.34k  |         cl_sha256(data, pwlen + 8, block, NULL);  | 
2818  | 6.34k  |     }  | 
2819  |  |  | 
2820  | 656k  |     for (i = 0; i < 64 || i < (data[(in_data_len * 64) - 1] + 32); i++) { | 
2821  | 647k  |         memcpy(data, password, pwlen);  | 
2822  | 647k  |         memcpy(data + pwlen, block, block_size);  | 
2823  |  |  | 
2824  | 647k  |         in_data_len = pwlen + block_size;  | 
2825  |  |  | 
2826  | 647k  |         if (NULL != U) { | 
2827  |  |             // If it's for the owner password check, we also concatenate the 48-byte U string.  | 
2828  | 231k  |             memcpy(data + pwlen + block_size, U, 48);  | 
2829  | 231k  |             in_data_len += 48;  | 
2830  | 231k  |         }  | 
2831  |  |  | 
2832  | 41.4M  |         for (j = 1; j < 64; j++)  | 
2833  | 40.7M  |             memcpy(data + j * in_data_len, data, in_data_len);  | 
2834  |  |  | 
2835  | 647k  |         aes_128cbc_encrypt(data, in_data_len * 64, data, &out_data_len, block, 16, block + 16);  | 
2836  |  |  | 
2837  | 10.9M  |         for (j = 0, sum = 0; j < 16; j++)  | 
2838  | 10.3M  |             sum += data[j];  | 
2839  |  |  | 
2840  | 647k  |         block_size = 32 + (sum % 3) * 16;  | 
2841  | 647k  |         switch (block_size) { | 
2842  | 239k  |             case 32:  | 
2843  | 239k  |                 cl_sha256(data, in_data_len * 64, sha256, NULL);  | 
2844  | 239k  |                 memcpy(block, sha256, 32);  | 
2845  | 239k  |                 break;  | 
2846  |  |  | 
2847  | 193k  |             case 48:  | 
2848  | 193k  |                 cl_sha384(data, in_data_len * 64, sha384, NULL);  | 
2849  | 193k  |                 memcpy(block, sha384, 48);  | 
2850  | 193k  |                 break;  | 
2851  |  |  | 
2852  | 213k  |             case 64:  | 
2853  | 213k  |                 cl_sha512(data, in_data_len * 64, sha512, NULL);  | 
2854  | 213k  |                 memcpy(block, sha512, 64);  | 
2855  | 213k  |                 break;  | 
2856  | 647k  |         }  | 
2857  | 647k  |     }  | 
2858  |  |  | 
2859  | 9.81k  |     memcpy(hash, block, 32);  | 
2860  | 9.81k  | }  | 
2861  |  |  | 
2862  |  | /**  | 
2863  |  |  * @brief Check if the owner password matches an empty password.  | 
2864  |  |  *  | 
2865  |  |  * Will set the DECRYPTABLE_PDF flag if the owner password is empty.  | 
2866  |  |  * Will also set the key and keylen fields in the pdf_struct.  | 
2867  |  |  *  | 
2868  |  |  * Some details and comments for how to check the owner password comes from the PyPDF project:  | 
2869  |  |  * https://github.com/py-pdf/pypdf/blob/3.17.4/pypdf/_encryption.py#L397  | 
2870  |  |  *  | 
2871  |  |  * @param pdf       The PDF context.  | 
2872  |  |  * @param R         The encryption version.  | 
2873  |  |  * @param O         The /O string.  | 
2874  |  |  * @param U         The /U string.  | 
2875  |  |  * @param OE        The /OE string.  | 
2876  |  |  * @param OE_len    The length of the /OE string.  | 
2877  |  |  */  | 
2878  |  | static void check_owner_password(struct pdf_struct *pdf, int R,  | 
2879  |  |                                  const char *O, const char *U,  | 
2880  |  |                                  const char *OE, size_t OE_len)  | 
2881  | 9.95k  | { | 
2882  | 9.95k  |     bool password_empty = false;  | 
2883  |  |  | 
2884  | 9.95k  |     dbg_printhex("U: ", U, 32); | 
2885  | 9.95k  |     dbg_printhex("O: ", O, 32); | 
2886  |  |  | 
2887  | 9.95k  |     switch (R) { | 
2888  | 3.93k  |         case 6: { | 
2889  | 3.93k  |             unsigned char hash[32], validationkey[32];  | 
2890  |  |  | 
2891  | 3.93k  |             size_t pwlen    = 0;  | 
2892  | 3.93k  |             char password[] = "";  | 
2893  |  |  | 
2894  | 3.93k  |             if (NULL == OE) { | 
2895  | 466  |                 cli_dbgmsg("check_owner_password: Missing OE value!\n"); | 
2896  | 466  |                 noisy_warnmsg("check_owner_password: Missing OE value!\n"); | 
2897  | 466  |                 goto done;  | 
2898  | 466  |             }  | 
2899  |  |  | 
2900  | 3.46k  |             dbg_printhex("OE: ", OE, OE_len); | 
2901  |  |  | 
2902  |  |             /*  | 
2903  |  |              * Test the password against the owner key by computing the SHA-256 hash of the UTF-8 password concatenated  | 
2904  |  |              * with the 8 bytes of owner validation salt, concatenated with the 48-byte U string.  | 
2905  |  |              */  | 
2906  | 3.46k  |             compute_hash_r6(  | 
2907  | 3.46k  |                 password,  | 
2908  | 3.46k  |                 pwlen,  | 
2909  | 3.46k  |                 (const unsigned char *)(O + 32), // owner validation salt  | 
2910  | 3.46k  |                 validationkey,  | 
2911  | 3.46k  |                 U);  | 
2912  |  |  | 
2913  |  |             /* If the 32-byte result matches the first 32 bytes of the O string, this is the owner password. */  | 
2914  | 3.46k  |             if (0 != memcmp(O, validationkey, sizeof(validationkey))) { | 
2915  | 3.46k  |                 cli_dbgmsg("check_owner_password: Owner password check did not match!\n"); | 
2916  | 3.46k  |                 break;  | 
2917  | 3.46k  |             }  | 
2918  |  |  | 
2919  |  |             /*  | 
2920  |  |              * Compute an intermediate owner key by computing the SHA-256 hash of the UTF-8 password concatenated with  | 
2921  |  |              * the 8 bytes of owner key salt, concatenated with the 48-byte U string.  | 
2922  |  |              */  | 
2923  | 4  |             compute_hash_r6(  | 
2924  | 4  |                 password,  | 
2925  | 4  |                 pwlen,  | 
2926  | 4  |                 (const unsigned char *)(O + 40), // owner key salt  | 
2927  | 4  |                 hash,  | 
2928  | 4  |                 U);  | 
2929  |  |  | 
2930  | 4  |             if (OE_len != 32) { | 
2931  | 0  |                 cli_dbgmsg("check_owner_password: OE length is not 32: %zu\n", OE_len); | 
2932  | 0  |                 noisy_warnmsg("check_owner_password: OE length is not 32: %zu\n", OE_len); | 
2933  | 4  |             } else { | 
2934  | 4  |                 pdf->keylen = 32;  | 
2935  | 4  |                 pdf->key    = cli_max_malloc(pdf->keylen);  | 
2936  | 4  |                 if (!pdf->key) { | 
2937  | 0  |                     cli_errmsg("check_owner_password: Cannot allocate memory for pdf->key\n"); | 
2938  | 0  |                     goto done;  | 
2939  | 0  |                 }  | 
2940  |  |  | 
2941  | 4  |                 aes_256cbc_decrypt((const unsigned char *)OE, &OE_len, (unsigned char *)(pdf->key), (char *)hash, 32, 0);  | 
2942  | 4  |                 dbg_printhex("check_owner_password: Candidate encryption key", pdf->key, pdf->keylen); | 
2943  |  |  | 
2944  | 4  |                 password_empty = true;  | 
2945  | 4  |             }  | 
2946  |  |  | 
2947  | 4  |             break;  | 
2948  | 4  |         }  | 
2949  | 6.01k  |         default: { | 
2950  | 6.01k  |             cli_dbgmsg("check_owner_password: Unknown or unsupported encryption version. R: %d\n", R); | 
2951  | 6.01k  |             noisy_warnmsg("check_owner_password: Unknown or unsupported encryption version. R: %d\n", R); | 
2952  | 6.01k  |         }  | 
2953  | 9.95k  |     }  | 
2954  |  |  | 
2955  | 9.48k  |     if (password_empty) { | 
2956  |  |         /* The key we computed above is the key used to encrypt the streams. We could decrypt it now if we wanted to */  | 
2957  | 4  |         pdf->flags |= 1 << DECRYPTABLE_PDF;  | 
2958  |  |  | 
2959  | 4  |         cli_dbgmsg("check_owner_password: encrypted PDF found, owner password is empty, will attempt to decrypt\n"); | 
2960  | 4  |         noisy_msg(pdf, "check_owner_password: encrypted PDF found, owner password is empty, will attempt to decrypt\n");  | 
2961  | 9.48k  |     } else { | 
2962  |  |         /* The key is not valid, we would need the user or the owner password to decrypt */  | 
2963  | 9.48k  |         cli_dbgmsg("check_owner_password: encrypted PDF found but cannot decrypt with empty owner password\n"); | 
2964  | 9.48k  |         noisy_warnmsg("check_owner_password: encrypted PDF found but cannot decrypt with empty owner password\n"); | 
2965  | 9.48k  |     }  | 
2966  |  |  | 
2967  | 9.95k  | done:  | 
2968  |  |  | 
2969  | 9.95k  |     return;  | 
2970  | 9.48k  | }  | 
2971  |  |  | 
2972  |  | static void check_user_password(struct pdf_struct *pdf, int R, const char *O,  | 
2973  |  |                                 const char *U, int32_t P, int EM,  | 
2974  |  |                                 const char *UE, size_t UE_len,  | 
2975  |  |                                 unsigned length)  | 
2976  | 9.95k  | { | 
2977  | 9.95k  |     unsigned i;  | 
2978  | 9.95k  |     uint8_t result[16];  | 
2979  | 9.95k  |     char data[32];  | 
2980  | 9.95k  |     struct arc4_state arc4;  | 
2981  | 9.95k  |     bool password_empty = false;  | 
2982  |  |  | 
2983  | 9.95k  |     dbg_printhex("U: ", U, 32); | 
2984  | 9.95k  |     dbg_printhex("O: ", O, 32); | 
2985  |  |  | 
2986  | 9.95k  |     switch (R) { | 
2987  | 2.00k  |         case 2:  | 
2988  | 4.64k  |         case 3:  | 
2989  | 5.98k  |         case 4: { | 
2990  | 5.98k  |             unsigned char *d;  | 
2991  | 5.98k  |             size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0);  | 
2992  | 5.98k  |             d         = calloc(1, sz);  | 
2993  |  |  | 
2994  | 5.98k  |             if (!(d))  | 
2995  | 0  |                 goto done;  | 
2996  |  |  | 
2997  | 5.98k  |             memcpy(d, key_padding, 32);  | 
2998  | 5.98k  |             memcpy(d + 32, O, 32);  | 
2999  | 5.98k  |             P = le32_to_host(P);  | 
3000  | 5.98k  |             memcpy(d + 64, &P, 4);  | 
3001  | 5.98k  |             memcpy(d + 68, pdf->fileID, pdf->fileIDlen);  | 
3002  |  |  | 
3003  |  |             /* 7.6.3.3 Algorithm 2 */  | 
3004  |  |             /* empty password, password == padding */  | 
3005  | 5.98k  |             if (R >= 4 && !EM) { | 
3006  | 0  |                 uint32_t v = 0xFFFFFFFF;  | 
3007  | 0  |                 memcpy(d + 68 + pdf->fileIDlen, &v, 4);  | 
3008  | 0  |             }  | 
3009  |  |  | 
3010  | 5.98k  |             cl_hash_data("md5", d, sz, result, NULL); | 
3011  | 5.98k  |             free(d);  | 
3012  | 5.98k  |             if (length > 128)  | 
3013  | 89  |                 length = 128;  | 
3014  | 5.98k  |             if (R >= 3) { | 
3015  |  |                 /* Yes, this really is on purpose */  | 
3016  | 203k  |                 for (i = 0; i < 50; i++)  | 
3017  | 199k  |                     cl_hash_data("md5", result, length / 8, result, NULL); | 
3018  | 3.98k  |             }  | 
3019  | 5.98k  |             if (R == 2)  | 
3020  | 2.00k  |                 length = 40;  | 
3021  |  |  | 
3022  | 5.98k  |             pdf->keylen = length / 8;  | 
3023  | 5.98k  |             pdf->key    = cli_max_malloc(pdf->keylen);  | 
3024  | 5.98k  |             if (!pdf->key)  | 
3025  | 0  |                 goto done;  | 
3026  |  |  | 
3027  | 5.98k  |             memcpy(pdf->key, result, pdf->keylen);  | 
3028  | 5.98k  |             dbg_printhex("md5", (const char *)result, 16); | 
3029  | 5.98k  |             dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen); | 
3030  |  |  | 
3031  |  |             /* 7.6.3.3 Algorithm 6 */  | 
3032  | 5.98k  |             if (R == 2) { | 
3033  |  |                 /* 7.6.3.3 Algorithm 4 */  | 
3034  | 2.00k  |                 memcpy(data, key_padding, 32);  | 
3035  | 2.00k  |                 if (false == arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen)) { | 
3036  | 0  |                     noisy_warnmsg("check_user_password: failed to init arc4\n"); | 
3037  | 0  |                     goto done;  | 
3038  | 0  |                 }  | 
3039  | 2.00k  |                 arc4_apply(&arc4, (uint8_t *)data, 32);  | 
3040  | 2.00k  |                 dbg_printhex("computed U (R2)", data, 32); | 
3041  | 2.00k  |                 if (!memcmp(data, U, 32))  | 
3042  | 470  |                     password_empty = true;  | 
3043  | 3.98k  |             } else { | 
3044  |  |                 // R is 3 or 4  | 
3045  | 3.98k  |                 unsigned len = pdf->keylen;  | 
3046  | 3.98k  |                 unsigned char *d;  | 
3047  |  |  | 
3048  | 3.98k  |                 d = calloc(1, 32 + pdf->fileIDlen);  | 
3049  | 3.98k  |                 if (!(d))  | 
3050  | 0  |                     goto done;  | 
3051  |  |  | 
3052  |  |                 /* 7.6.3.3 Algorithm 5 */  | 
3053  | 3.98k  |                 memcpy(d, key_padding, 32);  | 
3054  | 3.98k  |                 memcpy(d + 32, pdf->fileID, pdf->fileIDlen);  | 
3055  | 3.98k  |                 cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL); | 
3056  | 3.98k  |                 memcpy(data, pdf->key, len);  | 
3057  |  |  | 
3058  | 3.98k  |                 if (false == arc4_init(&arc4, (const uint8_t *)data, len)) { | 
3059  | 0  |                     noisy_warnmsg("check_user_password: failed to init arc4\n"); | 
3060  | 0  |                     goto done;  | 
3061  | 0  |                 }  | 
3062  | 3.98k  |                 arc4_apply(&arc4, result, 16);  | 
3063  | 79.7k  |                 for (i = 1; i <= 19; i++) { | 
3064  | 75.7k  |                     unsigned j;  | 
3065  |  |  | 
3066  | 775k  |                     for (j = 0; j < len; j++)  | 
3067  | 700k  |                         data[j] = pdf->key[j] ^ i;  | 
3068  |  |  | 
3069  | 75.7k  |                     if (false == arc4_init(&arc4, (const uint8_t *)data, len)) { | 
3070  | 0  |                         noisy_warnmsg("check_user_password: failed to init arc4\n"); | 
3071  | 0  |                         goto done;  | 
3072  | 0  |                     }  | 
3073  | 75.7k  |                     arc4_apply(&arc4, result, 16);  | 
3074  | 75.7k  |                 }  | 
3075  |  |  | 
3076  | 3.98k  |                 dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen); | 
3077  | 3.98k  |                 dbg_printhex("computed U (R>=3)", (const char *)result, 16); | 
3078  | 3.98k  |                 if (!memcmp(result, U, 16))  | 
3079  | 942  |                     password_empty = true;  | 
3080  | 3.98k  |                 free(d);  | 
3081  | 3.98k  |             }  | 
3082  |  |  | 
3083  | 5.98k  |             break;  | 
3084  | 5.98k  |         }  | 
3085  | 5.98k  |         case 5: { | 
3086  | 31  |             uint8_t result2[32];  | 
3087  |  |  | 
3088  |  |             /* supplement to ISO3200, 3.5.2 Algorithm 3.11 */  | 
3089  |  |             /* user validation salt */  | 
3090  | 31  |             cl_sha256(U + 32, 8, result2, NULL);  | 
3091  | 31  |             dbg_printhex("Computed U", (const char *)result2, 32); | 
3092  | 31  |             if (!memcmp(result2, U, 32)) { | 
3093  |  |                 /* Algorithm 3.2a could be used to recover encryption key */  | 
3094  | 0  |                 cl_sha256(U + 40, 8, result2, NULL);  | 
3095  |  | 
  | 
3096  | 0  |                 if (UE_len != 32) { | 
3097  | 0  |                     cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); | 
3098  | 0  |                     noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); | 
3099  | 0  |                 } else { | 
3100  | 0  |                     pdf->keylen = 32;  | 
3101  | 0  |                     pdf->key    = cli_max_malloc(pdf->keylen);  | 
3102  | 0  |                     if (!pdf->key) { | 
3103  | 0  |                         cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); | 
3104  | 0  |                         goto done;  | 
3105  | 0  |                     }  | 
3106  |  |  | 
3107  | 0  |                     aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0);  | 
3108  | 0  |                     dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); | 
3109  |  | 
  | 
3110  | 0  |                     password_empty = true;  | 
3111  | 0  |                 }  | 
3112  | 0  |             }  | 
3113  |  |  | 
3114  | 31  |             break;  | 
3115  | 31  |         }  | 
3116  | 3.93k  |         case 6: { | 
3117  | 3.93k  |             unsigned char hash[32], validationkey[32];  | 
3118  |  |  | 
3119  | 3.93k  |             size_t pwlen    = 0;  | 
3120  | 3.93k  |             char password[] = "";  | 
3121  |  |  | 
3122  | 3.93k  |             if (NULL == UE) { | 
3123  | 371  |                 cli_dbgmsg("check_user_password: Missing UE value!\n"); | 
3124  | 371  |                 noisy_warnmsg("check_user_password: Missing UE value!\n"); | 
3125  | 371  |                 goto done;  | 
3126  | 371  |             }  | 
3127  |  |  | 
3128  | 3.56k  |             dbg_printhex("UE: ", UE, UE_len); | 
3129  |  |  | 
3130  |  |             /*  | 
3131  |  |              * Test the password against the user key by computing the SHA-256 hash of the UTF-8 password concatenated  | 
3132  |  |              * with the 8 bytes of user validation salt.  | 
3133  |  |              */  | 
3134  | 3.56k  |             compute_hash_r6(  | 
3135  | 3.56k  |                 password,  | 
3136  | 3.56k  |                 pwlen,  | 
3137  | 3.56k  |                 (const unsigned char *)(U + 32), // user validation salt  | 
3138  | 3.56k  |                 validationkey,  | 
3139  | 3.56k  |                 NULL); // no U string for user password check  | 
3140  |  |  | 
3141  |  |             /* If the 32-byte result matches the first 32 bytes of the U string, this is the user password. */  | 
3142  | 3.56k  |             if (0 != memcmp(U, validationkey, sizeof(validationkey))) { | 
3143  | 776  |                 cli_dbgmsg("check_user_password: User password check did not match!\n"); | 
3144  | 776  |                 break;  | 
3145  | 776  |             }  | 
3146  |  |  | 
3147  |  |             /*  | 
3148  |  |              * Compute an intermediate user key by computing the SHA-256 hash of the UTF-8 password concatenated with  | 
3149  |  |              * the 8 bytes of user key salt.  | 
3150  |  |              */  | 
3151  | 2.78k  |             compute_hash_r6(  | 
3152  | 2.78k  |                 password,  | 
3153  | 2.78k  |                 pwlen,  | 
3154  | 2.78k  |                 (const unsigned char *)(U + 40), // user key salt  | 
3155  | 2.78k  |                 hash,  | 
3156  | 2.78k  |                 NULL); // no U string for user password check  | 
3157  |  |  | 
3158  | 2.78k  |             if (UE_len != 32) { | 
3159  | 281  |                 cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); | 
3160  | 281  |                 noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); | 
3161  | 2.50k  |             } else { | 
3162  | 2.50k  |                 pdf->keylen = 32;  | 
3163  | 2.50k  |                 pdf->key    = cli_max_malloc(pdf->keylen);  | 
3164  | 2.50k  |                 if (!pdf->key) { | 
3165  | 0  |                     cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); | 
3166  | 0  |                     goto done;  | 
3167  | 0  |                 }  | 
3168  |  |  | 
3169  | 2.50k  |                 aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)hash, 32, 0);  | 
3170  | 2.50k  |                 dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); | 
3171  |  |  | 
3172  | 2.50k  |                 password_empty = true;  | 
3173  | 2.50k  |             }  | 
3174  |  |  | 
3175  | 2.78k  |             break;  | 
3176  | 2.78k  |         }  | 
3177  | 2.78k  |         default: { | 
3178  |  |             /* Supported R is in {2,3,4,5} */ | 
3179  | 0  |             cli_dbgmsg("check_user_password: R value out of range\n"); | 
3180  | 0  |             noisy_warnmsg("check_user_password: R value out of range\n"); | 
3181  | 0  |         }  | 
3182  | 9.95k  |     }  | 
3183  |  |  | 
3184  | 9.57k  |     if (password_empty) { | 
3185  | 3.91k  |         cli_dbgmsg("check_user_password: user password is empty\n"); | 
3186  | 3.91k  |         noisy_msg(pdf, "check_user_password: encrypted PDF found, user password is empty, will attempt to decrypt\n");  | 
3187  |  |         /* The key we computed above is the key used to encrypt the streams.  | 
3188  |  |          * We could decrypt it now if we wanted to */  | 
3189  | 3.91k  |         pdf->flags |= 1 << DECRYPTABLE_PDF;  | 
3190  | 5.66k  |     } else { | 
3191  |  |         /* the key is not valid, we would need the user or the owner password to decrypt */  | 
3192  | 5.66k  |         cli_dbgmsg("check_user_password: user/owner password would be required for decryption\n"); | 
3193  | 5.66k  |         noisy_warnmsg("check_user_password: encrypted PDF found, user password is NOT empty, cannot decrypt!\n"); | 
3194  | 5.66k  |     }  | 
3195  |  |  | 
3196  | 9.95k  | done:  | 
3197  | 9.95k  |     return;  | 
3198  | 9.57k  | }  | 
3199  |  |  | 
3200  |  | enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def)  | 
3201  | 32.0k  | { | 
3202  | 32.0k  |     const char *q;  | 
3203  | 32.0k  |     char *CFM           = NULL;  | 
3204  | 32.0k  |     enum enc_method ret = ENC_UNKNOWN;  | 
3205  |  |  | 
3206  | 32.0k  |     if (!key)  | 
3207  | 15.6k  |         return def;  | 
3208  |  |  | 
3209  | 16.4k  |     if (!strcmp(key, "Identity"))  | 
3210  | 69  |         return ENC_IDENTITY;  | 
3211  |  |  | 
3212  | 16.3k  |     q = pdf_getdict(dict, (int *)(&len), key);  | 
3213  | 16.3k  |     if (!q)  | 
3214  | 3.22k  |         return def;  | 
3215  |  |  | 
3216  | 13.1k  |     CFM = pdf_readval(q, len, "/CFM");  | 
3217  | 13.1k  |     if (CFM) { | 
3218  | 8.74k  |         cli_dbgmsg("parse_enc_method: %s CFM: %s\n", key, CFM); | 
3219  | 8.74k  |         if (!strncmp(CFM, "V2", 2))  | 
3220  | 41  |             ret = ENC_V2;  | 
3221  | 8.70k  |         else if (!strncmp(CFM, "AESV2", 5))  | 
3222  | 857  |             ret = ENC_AESV2;  | 
3223  | 7.85k  |         else if (!strncmp(CFM, "AESV3", 5))  | 
3224  | 5.34k  |             ret = ENC_AESV3;  | 
3225  | 2.50k  |         else if (!strncmp(CFM, "None", 4))  | 
3226  | 403  |             ret = ENC_NONE;  | 
3227  |  |  | 
3228  | 8.74k  |         free(CFM);  | 
3229  | 8.74k  |     }  | 
3230  |  |  | 
3231  | 13.1k  |     return ret;  | 
3232  | 16.3k  | }  | 
3233  |  |  | 
3234  |  | void pdf_handle_enc(struct pdf_struct *pdf)  | 
3235  | 433k  | { | 
3236  | 433k  |     struct pdf_obj *obj;  | 
3237  | 433k  |     uint32_t len, n, R, P, length, EM = 1, i, oulen;  | 
3238  |  |  | 
3239  | 433k  |     char *O       = NULL;  | 
3240  | 433k  |     char *OE      = NULL;  | 
3241  | 433k  |     size_t OE_len = 0;  | 
3242  |  |  | 
3243  | 433k  |     char *U       = NULL;  | 
3244  | 433k  |     char *UE      = NULL;  | 
3245  | 433k  |     size_t UE_len = 0;  | 
3246  |  |  | 
3247  | 433k  |     char *StmF = NULL;  | 
3248  | 433k  |     char *StrF = NULL;  | 
3249  | 433k  |     char *EFF  = NULL;  | 
3250  |  |  | 
3251  | 433k  |     const char *q, *q2;  | 
3252  |  |  | 
3253  | 433k  |     if (pdf->enc_objid == ~0u)  | 
3254  | 395k  |         return;  | 
3255  | 38.5k  |     if (!pdf->fileID) { | 
3256  | 6.49k  |         cli_dbgmsg("pdf_handle_enc: no file ID\n"); | 
3257  | 6.49k  |         noisy_warnmsg("pdf_handle_enc: no file ID\n"); | 
3258  | 6.49k  |         return;  | 
3259  | 6.49k  |     }  | 
3260  |  |  | 
3261  | 32.0k  |     obj = find_obj(pdf, pdf->objs[0], pdf->enc_objid);  | 
3262  | 32.0k  |     if (!obj) { | 
3263  | 6.14k  |         cli_dbgmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid >> 8, pdf->enc_objid & 0xff); | 
3264  | 6.14k  |         noisy_warnmsg("pdf_handle_enc: can't find encrypted object %d %d\n", pdf->enc_objid >> 8, pdf->enc_objid & 0xff); | 
3265  | 6.14k  |         return;  | 
3266  | 6.14k  |     }  | 
3267  |  |  | 
3268  | 25.9k  |     len = obj->size;  | 
3269  |  |  | 
3270  | 25.9k  |     q = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
3271  | 25.9k  |                       : (const char *)(obj->start + pdf->map);  | 
3272  |  |  | 
3273  | 25.9k  |     O = U = UE = StmF = StrF = EFF = NULL;  | 
3274  |  |  | 
3275  | 25.9k  |     pdf->enc_method_string       = ENC_UNKNOWN;  | 
3276  | 25.9k  |     pdf->enc_method_stream       = ENC_UNKNOWN;  | 
3277  | 25.9k  |     pdf->enc_method_embeddedfile = ENC_UNKNOWN;  | 
3278  |  |  | 
3279  | 25.9k  |     q2 = cli_memstr(q, len, "/Standard", 9);  | 
3280  | 25.9k  |     if (!q2) { | 
3281  | 4.60k  |         cli_dbgmsg("pdf_handle_enc: /Standard not found\n"); | 
3282  | 4.60k  |         noisy_warnmsg("pdf_handle_enc: /Standard not found\n"); | 
3283  | 4.60k  |         goto done;  | 
3284  | 4.60k  |     }  | 
3285  |  |  | 
3286  |  |     /* we can have both of these:  | 
3287  |  |      * /AESV2/Length /Standard/Length  | 
3288  |  |      * /Length /Standard  | 
3289  |  |      * make sure we don't mistake AES's length for Standard's */  | 
3290  | 21.3k  |     length = pdf_readint(q2, len - (q2 - q), "/Length");  | 
3291  | 21.3k  |     if (length == ~0u)  | 
3292  | 11.9k  |         length = pdf_readint(q, len, "/Length");  | 
3293  |  |  | 
3294  | 21.3k  |     if (length < 40) { | 
3295  | 2.51k  |         cli_dbgmsg("pdf_handle_enc: invalid length: %d\n", length); | 
3296  | 2.51k  |         length = 40;  | 
3297  | 2.51k  |     }  | 
3298  |  |  | 
3299  | 21.3k  |     R = pdf_readint(q, len, "/R");  | 
3300  | 21.3k  |     if (R == ~0u) { | 
3301  | 1.08k  |         cli_dbgmsg("pdf_handle_enc: invalid R\n"); | 
3302  | 1.08k  |         noisy_warnmsg("pdf_handle_enc: invalid R\n"); | 
3303  | 1.08k  |         goto done;  | 
3304  | 1.08k  |     }  | 
3305  |  |  | 
3306  | 20.2k  |     if ((R > 6) || (R < 2)) { | 
3307  | 589  |         cli_dbgmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); | 
3308  | 589  |         noisy_warnmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); | 
3309  | 589  |         goto done;  | 
3310  | 589  |     }  | 
3311  |  |  | 
3312  | 19.6k  |     P = pdf_readint(q, len, "/P");  | 
3313  | 19.6k  |     if (R < 6) { // P field doesn't seem to be required for R6. | 
3314  | 11.6k  |         if (P == ~0u) { | 
3315  | 1.94k  |             cli_dbgmsg("pdf_handle_enc: invalid P\n"); | 
3316  | 1.94k  |             noisy_warnmsg("pdf_handle_enc: invalid P\n"); | 
3317  | 1.94k  |             goto done;  | 
3318  | 1.94k  |         }  | 
3319  | 11.6k  |     }  | 
3320  |  |  | 
3321  | 17.6k  |     if (R < 5) { | 
3322  | 9.45k  |         oulen = 32;  | 
3323  | 9.45k  |     } else { | 
3324  | 8.23k  |         oulen = 48;  | 
3325  | 8.23k  |     }  | 
3326  |  |  | 
3327  | 17.6k  |     if (R == 2 || R == 3) { | 
3328  | 7.11k  |         pdf->enc_method_stream       = ENC_V2;  | 
3329  | 7.11k  |         pdf->enc_method_string       = ENC_V2;  | 
3330  | 7.11k  |         pdf->enc_method_embeddedfile = ENC_V2;  | 
3331  | 10.5k  |     } else if (R == 4 || R == 5 || R == 6) { | 
3332  | 10.5k  |         EM        = pdf_readbool(q, len, "/EncryptMetadata", 1);  | 
3333  | 10.5k  |         StmF      = pdf_readval(q, len, "/StmF");  | 
3334  | 10.5k  |         StrF      = pdf_readval(q, len, "/StrF");  | 
3335  | 10.5k  |         EFF       = pdf_readval(q, len, "/EFF");  | 
3336  | 10.5k  |         n         = len;  | 
3337  | 10.5k  |         pdf->CF   = pdf_getdict(q, (int *)(&n), "/CF");  | 
3338  | 10.5k  |         pdf->CF_n = n;  | 
3339  |  |  | 
3340  | 10.5k  |         if (StmF) { | 
3341  | 8.00k  |             cli_dbgmsg("pdf_handle_enc: StmF: %s\n", StmF); | 
3342  | 8.00k  |         }  | 
3343  | 10.5k  |         if (StrF) { | 
3344  | 7.76k  |             cli_dbgmsg("pdf_handle_enc: StrF: %s\n", StrF); | 
3345  | 7.76k  |         }  | 
3346  | 10.5k  |         if (EFF) { | 
3347  | 342  |             cli_dbgmsg("pdf_handle_enc: EFF: %s\n", EFF); | 
3348  | 342  |         }  | 
3349  |  |  | 
3350  | 10.5k  |         pdf->enc_method_stream       = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);  | 
3351  | 10.5k  |         pdf->enc_method_string       = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);  | 
3352  | 10.5k  |         pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream);  | 
3353  |  |  | 
3354  | 10.5k  |         cli_dbgmsg("pdf_handle_enc: EncryptMetadata: %s\n", EM ? "true" : "false"); | 
3355  |  |  | 
3356  | 10.5k  |         if (R == 4) { | 
3357  | 2.33k  |             length = 128;  | 
3358  | 8.23k  |         } else { | 
3359  | 8.23k  |             length = 256;  | 
3360  |  |  | 
3361  |  |             /*  | 
3362  |  |              * Read the UE value (for checking user-password)  | 
3363  |  |              */  | 
3364  | 8.23k  |             n      = 0;  | 
3365  | 8.23k  |             UE     = pdf_readstring(q, len, "/UE", &n, NULL, false);  | 
3366  | 8.23k  |             UE_len = n;  | 
3367  |  |  | 
3368  |  |             /*  | 
3369  |  |              * Read the OE value (for checking owner-password)  | 
3370  |  |              */  | 
3371  | 8.23k  |             n      = 0;  | 
3372  | 8.23k  |             OE     = pdf_readstring(q, len, "/OE", &n, NULL, false);  | 
3373  | 8.23k  |             OE_len = n;  | 
3374  | 8.23k  |         }  | 
3375  | 10.5k  |     }  | 
3376  |  |  | 
3377  | 17.6k  |     if (length == ~0u)  | 
3378  | 5.41k  |         length = 40;  | 
3379  |  |  | 
3380  |  |     /*  | 
3381  |  |      * Read the O value  | 
3382  |  |      */  | 
3383  | 17.6k  |     n = 0;  | 
3384  | 17.6k  |     O = pdf_readstring(q, len, "/O", &n, NULL, false);  | 
3385  | 17.6k  |     if (!O || n < oulen) { | 
3386  | 3.74k  |         cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); | 
3387  | 3.74k  |         noisy_warnmsg("pdf_handle_enc: invalid O: %d\n", n); | 
3388  | 3.74k  |         if (O) { | 
3389  | 1.81k  |             dbg_printhex("invalid O", O, n); | 
3390  | 1.81k  |         }  | 
3391  |  |  | 
3392  | 3.74k  |         goto done;  | 
3393  | 3.74k  |     }  | 
3394  | 13.9k  |     if (n > oulen) { | 
3395  | 364k  |         for (i = oulen; i < n; i++) { | 
3396  | 359k  |             if (O[i]) { | 
3397  | 1.56k  |                 dbg_printhex("pdf_handle_enc: too long O", O, n); | 
3398  | 1.56k  |                 noisy_warnmsg("pdf_handle_enc: too long O: %u", n); | 
3399  | 1.56k  |                 goto done;  | 
3400  | 1.56k  |             }  | 
3401  | 359k  |         }  | 
3402  | 6.67k  |     }  | 
3403  |  |  | 
3404  |  |     /*  | 
3405  |  |      * Read the U value  | 
3406  |  |      */  | 
3407  | 12.3k  |     n = 0;  | 
3408  | 12.3k  |     U = pdf_readstring(q, len, "/U", &n, NULL, false);  | 
3409  | 12.3k  |     if (!U || n < oulen) { | 
3410  | 1.12k  |         cli_dbgmsg("pdf_handle_enc: invalid U: %u\n", n); | 
3411  | 1.12k  |         noisy_warnmsg("pdf_handle_enc: invalid U: %u\n", n); | 
3412  | 1.12k  |         if (U) { | 
3413  | 433  |             dbg_printhex("invalid U", U, n); | 
3414  | 433  |         }  | 
3415  |  |  | 
3416  | 1.12k  |         goto done;  | 
3417  | 1.12k  |     }  | 
3418  |  |  | 
3419  | 11.2k  |     if (n > oulen) { | 
3420  | 274k  |         for (i = oulen; i < n; i++) { | 
3421  | 270k  |             if (U[i]) { | 
3422  | 486  |                 dbg_printhex("too long U", U, n); | 
3423  | 486  |                 goto done;  | 
3424  | 486  |             }  | 
3425  | 270k  |         }  | 
3426  | 4.22k  |     }  | 
3427  |  |  | 
3428  | 10.7k  |     cli_dbgmsg("pdf_handle_enc: Encrypt R: %d, P %x, length: %u\n", R, P, length); | 
3429  | 10.7k  |     if (length % 8) { | 
3430  | 808  |         cli_dbgmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); | 
3431  | 808  |         noisy_warnmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); | 
3432  | 808  |         goto done;  | 
3433  | 808  |     }  | 
3434  |  |  | 
3435  |  |     // Check the owner password.  | 
3436  | 9.95k  |     check_owner_password(pdf, R, O, U, OE, OE_len);  | 
3437  |  |  | 
3438  | 9.95k  |     if (NULL == pdf->key) { | 
3439  |  |         // Wasn't the owner password, let's try the user password.  | 
3440  | 9.95k  |         check_user_password(pdf, R, O, U, P, EM, UE, UE_len, length);  | 
3441  | 9.95k  |     }  | 
3442  |  |  | 
3443  | 25.9k  | done:  | 
3444  | 25.9k  |     free(O);  | 
3445  | 25.9k  |     free(OE);  | 
3446  |  |  | 
3447  | 25.9k  |     free(U);  | 
3448  | 25.9k  |     free(UE);  | 
3449  |  |  | 
3450  | 25.9k  |     free(StmF);  | 
3451  | 25.9k  |     free(StrF);  | 
3452  | 25.9k  |     free(EFF);  | 
3453  | 25.9k  | }  | 
3454  |  |  | 
3455  |  | /**  | 
3456  |  |  * @brief Search pdf buffer for objects.  Parse each.  | 
3457  |  |  *  | 
3458  |  |  * Newly found objects will be extracted after completion when the extraction for loop continues.  | 
3459  |  |  *  | 
3460  |  |  * @param pdf           Pdf struct that keeps track of all information found in the PDF.  | 
3461  |  |  * @param objstm        Pointer to an object stream to parse.  | 
3462  |  |  *  | 
3463  |  |  * @return cl_error_t   Error code.  | 
3464  |  |  */  | 
3465  |  | cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm)  | 
3466  | 54.5k  | { | 
3467  | 54.5k  |     cl_error_t status   = CL_EFORMAT;  | 
3468  | 54.5k  |     cl_error_t retval   = CL_EPARSE;  | 
3469  | 54.5k  |     uint32_t badobjects = 0;  | 
3470  | 54.5k  |     size_t i            = 0;  | 
3471  |  |  | 
3472  | 54.5k  |     struct pdf_obj *obj = NULL;  | 
3473  |  |  | 
3474  | 54.5k  |     if ((NULL == objstm) || (NULL == objstm->streambuf)) { | 
3475  | 0  |         status = CL_EARG;  | 
3476  | 0  |         goto done;  | 
3477  | 0  |     }  | 
3478  |  |  | 
3479  | 54.5k  |     if ((0 == objstm->first) ||  | 
3480  | 54.5k  |         (0 == objstm->streambuf_len) ||  | 
3481  | 54.5k  |         (0 == objstm->n)) { | 
3482  | 3.31k  |         cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Empty object stream.\n"); | 
3483  | 3.31k  |         goto done;  | 
3484  | 3.31k  |     }  | 
3485  |  |  | 
3486  | 51.2k  |     if (objstm->first >= objstm->streambuf_len) { | 
3487  | 10.0k  |         cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Invalid objstm values. Offset of first obj greater than stream length.\n"); | 
3488  | 10.0k  |         goto done;  | 
3489  | 10.0k  |     }  | 
3490  |  |  | 
3491  |  |     /* Process each object */  | 
3492  | 280k  |     for (i = 0; i < objstm->n; i++) { | 
3493  | 254k  |         obj = NULL;  | 
3494  |  |  | 
3495  | 254k  |         if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { | 
3496  | 0  |             cli_dbgmsg("Timeout reached in the PDF parser while parsing object stream.\n"); | 
3497  | 0  |             status = CL_ETIMEOUT;  | 
3498  | 0  |             goto done;  | 
3499  | 0  |         }  | 
3500  |  |  | 
3501  |  |         /* Find object */  | 
3502  | 254k  |         retval = pdf_findobj_in_objstm(pdf, objstm, &obj);  | 
3503  | 254k  |         if (retval != CL_SUCCESS) { | 
3504  | 14.7k  |             if (retval != CL_BREAK) { | 
3505  | 14.7k  |                 cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n", | 
3506  | 14.7k  |                            objstm->nobjs_found, objstm->n);  | 
3507  | 14.7k  |                 badobjects++;  | 
3508  | 14.7k  |                 pdf->stats.ninvalidobjs++;  | 
3509  | 14.7k  |             }  | 
3510  | 14.7k  |             break;  | 
3511  | 14.7k  |         }  | 
3512  |  |  | 
3513  | 239k  |         cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Found object %u %u in object stream at offset: %u\n", obj->id >> 8, obj->id & 0xff, obj->start); | 
3514  |  |  | 
3515  | 239k  |         if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { | 
3516  | 0  |             cli_dbgmsg("Timeout reached in the PDF parser while parsing object stream.\n"); | 
3517  | 0  |             status = CL_ETIMEOUT;  | 
3518  | 0  |             goto done;  | 
3519  | 0  |         }  | 
3520  |  |  | 
3521  |  |         /* Parse object */  | 
3522  | 239k  |         pdf_parseobj(pdf, obj);  | 
3523  | 239k  |     }  | 
3524  |  |  | 
3525  | 41.2k  |     if (badobjects) { | 
3526  | 14.7k  |         status = CL_EFORMAT;  | 
3527  | 14.7k  |         goto done;  | 
3528  | 14.7k  |     }  | 
3529  |  |  | 
3530  | 26.4k  |     status = CL_SUCCESS;  | 
3531  |  |  | 
3532  | 54.5k  | done:  | 
3533  | 54.5k  |     return status;  | 
3534  | 26.4k  | }  | 
3535  |  |  | 
3536  |  | /**  | 
3537  |  |  * @brief Search pdf buffer for objects.  Parse each and then extract each.  | 
3538  |  |  *  | 
3539  |  |  * @param pdf               Pdf struct that keeps track of all information found in the PDF.  | 
3540  |  |  *  | 
3541  |  |  * @return cl_error_t       Error code.  | 
3542  |  |  */  | 
3543  |  | static cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf)  | 
3544  | 433k  | { | 
3545  | 433k  |     cl_error_t status   = CL_SUCCESS;  | 
3546  | 433k  |     int32_t rv          = 0;  | 
3547  | 433k  |     unsigned int i      = 0;  | 
3548  | 433k  |     uint32_t badobjects = 0;  | 
3549  | 433k  |     cli_ctx *ctx        = NULL;  | 
3550  |  |  | 
3551  | 433k  |     if (NULL == pdf) { | 
3552  | 0  |         cli_errmsg("pdf_find_and_extract_objs: Invalid arguments.\n"); | 
3553  | 0  |         status = CL_EARG;  | 
3554  | 0  |         goto done;  | 
3555  | 0  |     }  | 
3556  |  |  | 
3557  | 433k  |     ctx = pdf->ctx;  | 
3558  |  |  | 
3559  |  |     /* parse PDF and find obj offsets */  | 
3560  | 2.08M  |     while (CL_BREAK != (rv = pdf_findobj(pdf))) { | 
3561  | 1.64M  |         if (rv == CL_EMEM) { | 
3562  | 0  |             cli_errmsg("pdf_find_and_extract_objs: Memory allocation error.\n"); | 
3563  | 0  |             status = CL_EMEM;  | 
3564  | 0  |             goto done;  | 
3565  | 0  |         }  | 
3566  | 1.64M  |     }  | 
3567  |  |  | 
3568  |  |     /* must parse after finding all objs, so we can flag indirect objects */  | 
3569  | 1.78M  |     for (i = 0; i < pdf->nobjs; i++) { | 
3570  | 1.35M  |         struct pdf_obj *obj = pdf->objs[i];  | 
3571  |  |  | 
3572  | 1.35M  |         if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { | 
3573  | 0  |             cli_dbgmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while parsing objects.\n"); | 
3574  |  | 
  | 
3575  | 0  |             status = CL_ETIMEOUT;  | 
3576  | 0  |             goto done;  | 
3577  | 0  |         }  | 
3578  |  |  | 
3579  | 1.35M  |         pdf_parseobj(pdf, obj);  | 
3580  | 1.35M  |     }  | 
3581  |  |  | 
3582  | 433k  |     pdf_handle_enc(pdf);  | 
3583  | 433k  |     if (pdf->flags & (1 << ENCRYPTED_PDF))  | 
3584  | 53.3k  |         cli_dbgmsg("pdf_find_and_extract_objs: encrypted pdf found, %s!\n", | 
3585  | 53.3k  |                    (pdf->flags & (1 << DECRYPTABLE_PDF)) ? "decryptable" : "not decryptable, stream will probably fail to decompress");  | 
3586  |  |  | 
3587  | 433k  |     if (SCAN_HEURISTIC_ENCRYPTED_DOC &&  | 
3588  | 433k  |         (pdf->flags & (1 << ENCRYPTED_PDF)) &&  | 
3589  | 433k  |         !(pdf->flags & (1 << DECRYPTABLE_PDF))) { | 
3590  |  |         /* It is encrypted, and a password/key needs to be supplied to decrypt.  | 
3591  |  |          * This doesn't trigger for PDFs that are encrypted but don't need  | 
3592  |  |          * a password to decrypt */  | 
3593  | 49.3k  |         status = cli_append_potentially_unwanted(pdf->ctx, "Heuristics.Encrypted.PDF");  | 
3594  | 49.3k  |     }  | 
3595  |  |  | 
3596  | 433k  |     if (CL_SUCCESS == status) { | 
3597  | 433k  |         status = run_pdf_hooks(pdf, PDF_PHASE_PARSED, -1);  | 
3598  | 433k  |         cli_dbgmsg("pdf_find_and_extract_objs: (parsed hooks) returned %d\n", status); | 
3599  | 433k  |     }  | 
3600  |  |  | 
3601  | 433k  |     if (CL_SUCCESS == status) { | 
3602  |  |         /* extract PDF objs */  | 
3603  | 2.02M  |         for (i = 0; !status && i < pdf->nobjs; i++) { | 
3604  | 1.58M  |             struct pdf_obj *obj = pdf->objs[i];  | 
3605  |  |  | 
3606  | 1.58M  |             if (cli_checktimelimit(pdf->ctx) != CL_SUCCESS) { | 
3607  | 0  |                 cli_dbgmsg("pdf_find_and_extract_objs: Timeout reached in the PDF parser while extracting objects.\n"); | 
3608  |  | 
  | 
3609  | 0  |                 status = CL_ETIMEOUT;  | 
3610  | 0  |                 goto done;  | 
3611  | 0  |             }  | 
3612  |  |  | 
3613  | 1.58M  |             pdf->parse_recursion_depth++;  | 
3614  | 1.58M  |             status = pdf_extract_obj(pdf, obj, PDF_EXTRACT_OBJ_SCAN);  | 
3615  | 1.58M  |             pdf->parse_recursion_depth--;  | 
3616  | 1.58M  |             switch (status) { | 
3617  | 0  |                 case CL_EFORMAT:  | 
3618  |  |                     /* Don't halt on one bad object */  | 
3619  | 0  |                     cli_dbgmsg("pdf_find_and_extract_objs: Format error when extracting object, skipping to the next object.\n"); | 
3620  | 0  |                     badobjects++;  | 
3621  | 0  |                     pdf->stats.ninvalidobjs++;  | 
3622  | 0  |                     status = CL_CLEAN;  | 
3623  | 0  |                     break;  | 
3624  | 0  |                 case CL_VIRUS:  | 
3625  | 0  |                     break;  | 
3626  | 1.58M  |                 default:  | 
3627  | 1.58M  |                     break;  | 
3628  | 1.58M  |             }  | 
3629  | 1.58M  |         }  | 
3630  | 433k  |     }  | 
3631  |  |  | 
3632  | 433k  | done:  | 
3633  | 433k  |     if ((CL_SUCCESS == status) && badobjects) { | 
3634  | 0  |         status = CL_EFORMAT;  | 
3635  | 0  |     }  | 
3636  |  |  | 
3637  | 433k  |     return status;  | 
3638  | 433k  | }  | 
3639  |  |  | 
3640  |  | /**  | 
3641  |  |  * @brief Primary function for parsing and scanning a PDF.  | 
3642  |  |  *  | 
3643  |  |  * @param dir       Filepath for temp file.  | 
3644  |  |  * @param ctx       clam scan context structure.  | 
3645  |  |  * @param offset    offset of pdf in ctx->fmap  | 
3646  |  |  *  | 
3647  |  |  * @return int      Returns cl_error_t status value.  | 
3648  |  |  */  | 
3649  |  | cl_error_t cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)  | 
3650  | 441k  | { | 
3651  | 441k  |     cl_error_t rc = CL_SUCCESS;  | 
3652  | 441k  |     struct pdf_struct pdf;  | 
3653  | 441k  |     fmap_t *map   = ctx->fmap;  | 
3654  | 441k  |     size_t size   = map->len - offset;  | 
3655  | 441k  |     off_t versize = size > 1032 ? 1032 : size;  | 
3656  | 441k  |     off_t map_off, bytesleft;  | 
3657  | 441k  |     unsigned long xref;  | 
3658  | 441k  |     long temp_long;  | 
3659  | 441k  |     const char *pdfver, *tmp, *start, *eofmap, *q, *eof;  | 
3660  | 441k  |     unsigned i;  | 
3661  | 441k  |     unsigned int objs_found = 0;  | 
3662  |  |  | 
3663  | 441k  |     json_object *pdfobj = NULL;  | 
3664  | 441k  |     char *begin, *end, *p1;  | 
3665  |  |  | 
3666  | 441k  |     cli_dbgmsg("in cli_pdf(%s)\n", dir); | 
3667  | 441k  |     memset(&pdf, 0, sizeof(pdf));  | 
3668  | 441k  |     pdf.ctx       = ctx;  | 
3669  | 441k  |     pdf.dir       = dir;  | 
3670  | 441k  |     pdf.enc_objid = ~0u;  | 
3671  |  |  | 
3672  | 441k  |     pdfver = start = fmap_need_off_once(map, offset, versize);  | 
3673  |  |  | 
3674  |  |     /* Check PDF version */  | 
3675  | 441k  |     if (!pdfver) { | 
3676  | 0  |         cli_errmsg("cli_pdf: mmap() failed (1)\n"); | 
3677  | 0  |         rc = CL_EMAP;  | 
3678  | 0  |         goto done;  | 
3679  | 0  |     }  | 
3680  |  |  | 
3681  | 441k  |     if (ctx->wrkproperty)  | 
3682  | 441k  |         pdfobj = cli_jsonobj(ctx->wrkproperty, "PDFStats");  | 
3683  |  |  | 
3684  |  |     /* offset is 0 when coming from filetype2 */  | 
3685  | 441k  |     tmp = cli_memstr(pdfver, versize, "%PDF-", 5);  | 
3686  | 441k  |     if (!tmp) { | 
3687  | 7.32k  |         cli_dbgmsg("cli_pdf: no PDF- header found\n"); | 
3688  | 7.32k  |         noisy_warnmsg("cli_pdf: no PDF- header found\n"); | 
3689  |  |  | 
3690  | 7.32k  |         rc = CL_SUCCESS;  | 
3691  | 7.32k  |         goto done;  | 
3692  | 7.32k  |     }  | 
3693  |  |  | 
3694  | 434k  |     versize -= tmp - pdfver;  | 
3695  | 434k  |     pdfver = tmp;  | 
3696  |  |  | 
3697  | 434k  |     if (versize < 8) { | 
3698  | 352  |         rc = CL_EFORMAT;  | 
3699  | 352  |         goto done;  | 
3700  | 352  |     }  | 
3701  |  |  | 
3702  |  |     /* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */  | 
3703  | 433k  |     if (pdfver[5] != '1' || pdfver[6] != '.' ||  | 
3704  | 433k  |         pdfver[7] < '1' || pdfver[7] > '9') { | 
3705  | 304k  |         pdf.flags |= 1 << BAD_PDF_VERSION;  | 
3706  | 304k  |         cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver); | 
3707  |  |  | 
3708  | 304k  |         if (pdfobj)  | 
3709  | 304k  |             cli_jsonbool(pdfobj, "BadVersion", 1);  | 
3710  | 304k  |     } else { | 
3711  | 129k  |         if (pdfobj) { | 
3712  | 129k  |             begin = (char *)(pdfver + 5);  | 
3713  | 129k  |             end   = begin + 2;  | 
3714  | 129k  |             strtoul(end, &end, 10);  | 
3715  | 129k  |             p1 = cli_max_calloc((end - begin) + 2, 1);  | 
3716  | 129k  |             if (p1) { | 
3717  | 129k  |                 strncpy(p1, begin, end - begin);  | 
3718  | 129k  |                 p1[end - begin] = '\0';  | 
3719  | 129k  |                 cli_jsonstr(pdfobj, "PDFVersion", p1);  | 
3720  | 129k  |                 free(p1);  | 
3721  | 129k  |             }  | 
3722  | 129k  |         }  | 
3723  | 129k  |     }  | 
3724  |  |  | 
3725  | 433k  |     if (pdfver != start || offset) { | 
3726  | 414k  |         pdf.flags |= 1 << BAD_PDF_HEADERPOS;  | 
3727  | 414k  |         cli_dbgmsg("cli_pdf: PDF header is not at position 0: %lld\n", (long long)(pdfver - start + offset)); | 
3728  |  |  | 
3729  | 414k  |         if (pdfobj)  | 
3730  | 414k  |             cli_jsonbool(pdfobj, "BadVersionLocation", 1);  | 
3731  | 414k  |     }  | 
3732  |  |  | 
3733  | 433k  |     offset += pdfver - start;  | 
3734  |  |  | 
3735  |  |     /* find trailer and xref, don't fail if not found */  | 
3736  | 433k  |     map_off = (off_t)map->len - 2048;  | 
3737  | 433k  |     if (map_off < 0)  | 
3738  | 284k  |         map_off = 0;  | 
3739  |  |  | 
3740  | 433k  |     bytesleft = map->len - map_off;  | 
3741  |  |  | 
3742  | 433k  |     eofmap = fmap_need_off_once(map, map_off, bytesleft);  | 
3743  | 433k  |     if (!eofmap) { | 
3744  | 0  |         cli_errmsg("cli_pdf: mmap() failed (2)\n"); | 
3745  |  | 
  | 
3746  | 0  |         rc = CL_EMAP;  | 
3747  | 0  |         goto done;  | 
3748  | 0  |     }  | 
3749  |  |  | 
3750  | 433k  |     eof = eofmap + bytesleft;  | 
3751  | 469M  |     for (q = &eofmap[bytesleft - 5]; q > eofmap; q--) { | 
3752  | 468M  |         if (memcmp(q, "%%EOF", 5) == 0)  | 
3753  | 90.7k  |             break;  | 
3754  | 468M  |     }  | 
3755  |  |  | 
3756  | 433k  |     if (q <= eofmap) { | 
3757  | 342k  |         pdf.flags |= 1 << BAD_PDF_TRAILER;  | 
3758  | 342k  |         cli_dbgmsg("cli_pdf: %%%%EOF not found\n"); | 
3759  |  |  | 
3760  | 342k  |         if (pdfobj)  | 
3761  | 342k  |             cli_jsonbool(pdfobj, "NoEOF", 1);  | 
3762  | 342k  |     } else { | 
3763  | 90.7k  |         const char *t;  | 
3764  |  |  | 
3765  |  |         /*size = q - eofmap + map_off;*/  | 
3766  | 90.7k  |         q -= 9;  | 
3767  | 27.2M  |         for (; q > eofmap; q--) { | 
3768  | 27.1M  |             if (memcmp(q, "startxref", 9) == 0)  | 
3769  | 59.4k  |                 break;  | 
3770  | 27.1M  |         }  | 
3771  |  |  | 
3772  | 90.7k  |         if (q <= eofmap) { | 
3773  | 31.2k  |             pdf.flags |= 1 << BAD_PDF_TRAILER;  | 
3774  | 31.2k  |             cli_dbgmsg("cli_pdf: startxref not found\n"); | 
3775  |  |  | 
3776  | 31.2k  |             if (pdfobj)  | 
3777  | 31.2k  |                 cli_jsonbool(pdfobj, "NoXREF", 1);  | 
3778  | 59.4k  |         } else { | 
3779  | 50.6M  |             for (t = q; t > eofmap; t--) { | 
3780  | 50.5M  |                 if (memcmp(t, "trailer", 7) == 0)  | 
3781  | 8.38k  |                     break;  | 
3782  | 50.5M  |             }  | 
3783  |  |  | 
3784  | 59.4k  |             pdf_parse_trailer(&pdf, eofmap, eof - eofmap);  | 
3785  | 59.4k  |             q += 9;  | 
3786  |  |  | 
3787  | 137k  |             while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { | 
3788  | 78.3k  |                 q++;  | 
3789  | 78.3k  |             }  | 
3790  |  |  | 
3791  | 59.4k  |             if (CL_SUCCESS != cli_strntol_wrap(q, q - eofmap + map_off, 0, 10, &temp_long)) { | 
3792  | 9.02k  |                 cli_dbgmsg("cli_pdf: failed to parse PDF trailer xref\n"); | 
3793  | 9.02k  |                 pdf.flags |= 1 << BAD_PDF_TRAILER;  | 
3794  | 50.4k  |             } else if (temp_long < 0) { | 
3795  | 4.26k  |                 cli_dbgmsg("cli_pdf: Encountered invalid negative PDF trailer xref (%ld).\n", temp_long); | 
3796  | 4.26k  |                 pdf.flags |= 1 << BAD_PDF_TRAILER;  | 
3797  | 46.1k  |             } else { | 
3798  | 46.1k  |                 xref      = (unsigned long)temp_long;  | 
3799  | 46.1k  |                 bytesleft = map->len - offset - xref;  | 
3800  | 46.1k  |                 if (bytesleft > 4096)  | 
3801  | 5.24k  |                     bytesleft = 4096;  | 
3802  |  |  | 
3803  | 46.1k  |                 q = fmap_need_off_once(map, offset + xref, bytesleft);  | 
3804  | 46.1k  |                 if (!q || xrefCheck(q, q + bytesleft) == -1) { | 
3805  | 41.4k  |                     cli_dbgmsg("cli_pdf: did not find valid xref\n"); | 
3806  | 41.4k  |                     pdf.flags |= 1 << BAD_PDF_TRAILER;  | 
3807  | 41.4k  |                 }  | 
3808  | 46.1k  |             }  | 
3809  | 59.4k  |         }  | 
3810  | 90.7k  |     }  | 
3811  |  |  | 
3812  | 433k  |     size -= offset;  | 
3813  | 433k  |     pdf.size = size;  | 
3814  | 433k  |     pdf.map  = fmap_need_off(map, offset, size);  | 
3815  | 433k  |     if (!pdf.map) { | 
3816  | 0  |         cli_errmsg("cli_pdf: mmap() failed (3)\n"); | 
3817  |  | 
  | 
3818  | 0  |         rc = CL_EMAP;  | 
3819  | 0  |         goto done;  | 
3820  | 0  |     }  | 
3821  |  |  | 
3822  | 433k  |     pdf.startoff = offset;  | 
3823  |  |  | 
3824  | 433k  |     rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1);  | 
3825  | 433k  |     if (CL_SUCCESS != rc) { | 
3826  | 0  |         cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc); | 
3827  |  | 
  | 
3828  | 0  |         rc = rc == CL_BREAK ? CL_CLEAN : rc;  | 
3829  | 0  |         goto done;  | 
3830  | 0  |     }  | 
3831  |  |  | 
3832  |  |     /*  | 
3833  |  |      * Find and extract all objects in the PDF.  | 
3834  |  |      * This methodology adds objects from object streams.  | 
3835  |  |      */  | 
3836  | 433k  |     objs_found = pdf.nobjs;  | 
3837  | 433k  |     rc         = pdf_find_and_extract_objs(&pdf);  | 
3838  |  |  | 
3839  | 433k  |     if (CL_EMEM == rc) { | 
3840  | 5  |         cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs had an allocation failure\n"); | 
3841  | 5  |         goto err;  | 
3842  | 433k  |     } else if (pdf.nobjs <= objs_found) { | 
3843  | 49.6k  |         cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs did not find any new objects!\n"); | 
3844  | 384k  |     } else { | 
3845  | 384k  |         cli_dbgmsg("cli_pdf: pdf_find_and_extract_objs found %d new objects.\n", pdf.nobjs - objs_found); | 
3846  | 384k  |     }  | 
3847  |  |  | 
3848  | 433k  |     if (pdf.flags & (1 << ENCRYPTED_PDF))  | 
3849  | 53.3k  |         pdf.flags &= ~((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE));  | 
3850  |  |  | 
3851  | 433k  |     if (pdf.flags && CL_SUCCESS == rc) { | 
3852  | 429k  |         cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); | 
3853  | 429k  |         rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1);  | 
3854  |  |  | 
3855  | 429k  |         if (CL_SUCCESS == rc && SCAN_HEURISTICS && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) { | 
3856  | 429k  |             if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) { | 
3857  |  |                 /* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */  | 
3858  | 1.00k  |                 rc = cli_append_potentially_unwanted(ctx, "Heuristics.PDF.ObfuscatedNameObject");  | 
3859  | 1.00k  |             }  | 
3860  | 429k  |         }  | 
3861  |  | #if 0  | 
3862  |  |     /* TODO: find both trailers, and /Encrypt settings */  | 
3863  |  |     if (pdf.flags & (1 << LINEARIZED_PDF))  | 
3864  |  |         pdf.flags &= ~ (1 << BAD_ASCIIDECODE);  | 
3865  |  |     if (pdf.flags & (1 << MANY_FILTERS))  | 
3866  |  |         pdf.flags &= ~ (1 << BAD_ASCIIDECODE);  | 
3867  |  |     if (CL_SUCCESS == rc && (pdf.flags &  | 
3868  |  |         ((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |  | 
3869  |  |          (1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|  | 
3870  |  |              (1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) { | 
3871  |  |         rc = CL_EUNPACK;  | 
3872  |  |     }  | 
3873  |  | #endif  | 
3874  | 429k  |     }  | 
3875  |  |  | 
3876  | 441k  | done:  | 
3877  | 441k  |     if (CL_SUCCESS == rc && pdf.stats.ninvalidobjs > 0) { | 
3878  | 9.31k  |         rc = CL_EFORMAT;  | 
3879  | 9.31k  |     }  | 
3880  |  |  | 
3881  | 441k  | err:  | 
3882  |  |  | 
3883  | 441k  |     pdf_export_json(&pdf);  | 
3884  |  |  | 
3885  | 441k  |     if (pdf.objstms) { | 
3886  | 90.7k  |         for (i = 0; i < pdf.nobjstms; i++) { | 
3887  | 54.5k  |             if (pdf.objstms[i]) { | 
3888  | 54.5k  |                 if (pdf.objstms[i]->streambuf) { | 
3889  | 54.5k  |                     free(pdf.objstms[i]->streambuf);  | 
3890  | 54.5k  |                     pdf.objstms[i]->streambuf = NULL;  | 
3891  | 54.5k  |                 }  | 
3892  | 54.5k  |                 free(pdf.objstms[i]);  | 
3893  | 54.5k  |                 pdf.objstms[i] = NULL;  | 
3894  | 54.5k  |             }  | 
3895  | 54.5k  |         }  | 
3896  | 36.1k  |         free(pdf.objstms);  | 
3897  | 36.1k  |         pdf.objstms = NULL;  | 
3898  | 36.1k  |     }  | 
3899  |  |  | 
3900  | 441k  |     if (NULL != pdf.objs) { | 
3901  | 2.02M  |         for (i = 0; i < pdf.nobjs; i++) { | 
3902  | 1.59M  |             if (NULL != pdf.objs[i]) { | 
3903  | 1.59M  |                 if (NULL != pdf.objs[i]->path) { | 
3904  | 0  |                     free(pdf.objs[i]->path);  | 
3905  | 0  |                     pdf.objs[i]->path = NULL;  | 
3906  | 0  |                 }  | 
3907  | 1.59M  |                 free(pdf.objs[i]);  | 
3908  | 1.59M  |                 pdf.objs[i] = NULL;  | 
3909  | 1.59M  |             }  | 
3910  | 1.59M  |         }  | 
3911  | 433k  |         free(pdf.objs);  | 
3912  | 433k  |         pdf.objs = NULL;  | 
3913  | 433k  |     }  | 
3914  | 441k  |     if (pdf.fileID) { | 
3915  | 39.5k  |         free(pdf.fileID);  | 
3916  | 39.5k  |         pdf.fileID = NULL;  | 
3917  | 39.5k  |     }  | 
3918  | 441k  |     if (pdf.key) { | 
3919  | 8.49k  |         free(pdf.key);  | 
3920  | 8.49k  |         pdf.key = NULL;  | 
3921  | 8.49k  |     }  | 
3922  |  |  | 
3923  |  |     /* PDF hooks may abort, don't return CL_BREAK to caller! */  | 
3924  | 441k  |     rc = (rc == CL_BREAK) ? CL_CLEAN : rc;  | 
3925  |  |  | 
3926  | 441k  |     cli_dbgmsg("cli_pdf: returning %d\n", rc); | 
3927  | 441k  |     return rc;  | 
3928  | 441k  | }  | 
3929  |  |  | 
3930  |  | /**  | 
3931  |  |  * @brief   Skip the rest of the current line, and find the start of the next line.  | 
3932  |  |  *  | 
3933  |  |  * @param ptr   Current offset into buffer.  | 
3934  |  |  * @param len   Remaining bytes in buffer.  | 
3935  |  |  *  | 
3936  |  |  * @return const char*  Address of next line, or NULL if no next line in buffer.  | 
3937  |  |  */  | 
3938  |  | static const char *  | 
3939  |  | pdf_nextlinestart(const char *ptr, size_t len)  | 
3940  | 3.25M  | { | 
3941  | 3.25M  |     if (!ptr || (0 == len)) { | 
3942  |  |         /* Invalid args */  | 
3943  | 0  |         return NULL;  | 
3944  | 0  |     }  | 
3945  |  |  | 
3946  | 26.1M  |     while (strchr("\r\n", *ptr) == NULL) { | 
3947  | 22.9M  |         if (--len == 0L)  | 
3948  | 5.71k  |             return NULL;  | 
3949  |  |  | 
3950  | 22.9M  |         ptr++;  | 
3951  | 22.9M  |     }  | 
3952  |  |  | 
3953  | 8.65M  |     while (strchr("\r\n", *ptr) != NULL) { | 
3954  | 5.47M  |         if (--len == 0L)  | 
3955  | 70.4k  |             return NULL;  | 
3956  |  |  | 
3957  | 5.40M  |         ptr++;  | 
3958  | 5.40M  |     }  | 
3959  |  |  | 
3960  | 3.18M  |     return ptr;  | 
3961  | 3.25M  | }  | 
3962  |  |  | 
3963  |  | /**  | 
3964  |  |  * @brief   Return the start of the next PDF object.  | 
3965  |  |  *  | 
3966  |  |  * This assumes that we're not in a stream.  | 
3967  |  |  *  | 
3968  |  |  * @param ptr   Current offset into buffer.  | 
3969  |  |  * @param len   Remaining bytes in buffer.  | 
3970  |  |  *  | 
3971  |  |  * @return const char*  Address of next object in the buffer, or NULL if there is none in the buffer.  | 
3972  |  |  */  | 
3973  |  | static const char *  | 
3974  |  | pdf_nextobject(const char *ptr, size_t len)  | 
3975  | 14.7M  | { | 
3976  | 14.7M  |     const char *p;  | 
3977  | 14.7M  |     int inobject = 1;  | 
3978  |  |  | 
3979  | 499M  |     while (len) { | 
3980  | 499M  |         switch (*ptr) { | 
3981  | 1.47M  |             case '\n':  | 
3982  | 2.55M  |             case '\r':  | 
3983  | 3.25M  |             case '%': /* comment */  | 
3984  | 3.25M  |                 p = pdf_nextlinestart(ptr, len);  | 
3985  | 3.25M  |                 if (p == NULL)  | 
3986  | 76.1k  |                     return NULL;  | 
3987  |  |  | 
3988  | 3.18M  |                 len -= (size_t)(p - ptr);  | 
3989  | 3.18M  |                 ptr      = p;  | 
3990  | 3.18M  |                 inobject = 0;  | 
3991  |  |  | 
3992  | 3.18M  |                 break;  | 
3993  | 7.94M  |             case ' ':  | 
3994  | 8.52M  |             case '\t':  | 
3995  | 9.03M  |             case '[': /* Start of an array object */  | 
3996  | 9.59M  |             case '\v':  | 
3997  | 9.83M  |             case '\f':  | 
3998  | 13.4M  |             case '<': /* Start of a dictionary object */  | 
3999  | 13.4M  |                 inobject = 0;  | 
4000  | 13.4M  |                 ptr++;  | 
4001  | 13.4M  |                 len--;  | 
4002  |  |  | 
4003  | 13.4M  |                 break;  | 
4004  | 3.39M  |             case '/': /* Start of a name object */  | 
4005  | 3.39M  |                 return ptr;  | 
4006  | 312k  |             case '(': /* start of JS */ | 
4007  | 312k  |                 return ptr;  | 
4008  | 478M  |             default:  | 
4009  | 478M  |                 if (!inobject) { | 
4010  |  |                     /* TODO: parse and return object type */  | 
4011  | 10.7M  |                     return ptr;  | 
4012  | 10.7M  |                 }  | 
4013  |  |  | 
4014  | 468M  |                 ptr++;  | 
4015  | 468M  |                 len--;  | 
4016  | 499M  |         }  | 
4017  | 499M  |     }  | 
4018  |  |  | 
4019  | 251k  |     return NULL;  | 
4020  | 14.7M  | }  | 
4021  |  |  | 
4022  |  | /* PDF statistics */  | 
4023  |  | static void ASCIIHexDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4024  | 34.6k  | { | 
4025  | 34.6k  |     UNUSEDPARAM(obj);  | 
4026  | 34.6k  |     UNUSEDPARAM(act);  | 
4027  |  |  | 
4028  | 34.6k  |     if (NULL == pdf)  | 
4029  | 0  |         return;  | 
4030  |  |  | 
4031  | 34.6k  |     pdf->stats.nasciihexdecode++;  | 
4032  | 34.6k  | }  | 
4033  |  |  | 
4034  |  | static void ASCII85Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4035  | 31.7k  | { | 
4036  | 31.7k  |     UNUSEDPARAM(obj);  | 
4037  | 31.7k  |     UNUSEDPARAM(act);  | 
4038  |  |  | 
4039  | 31.7k  |     if (NULL == pdf)  | 
4040  | 0  |         return;  | 
4041  |  |  | 
4042  | 31.7k  |     pdf->stats.nascii85decode++;  | 
4043  | 31.7k  | }  | 
4044  |  |  | 
4045  |  | static void EmbeddedFile_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4046  | 21.0k  | { | 
4047  | 21.0k  |     UNUSEDPARAM(obj);  | 
4048  | 21.0k  |     UNUSEDPARAM(act);  | 
4049  |  |  | 
4050  | 21.0k  |     if (NULL == pdf)  | 
4051  | 0  |         return;  | 
4052  |  |  | 
4053  | 21.0k  |     pdf->stats.nembeddedfile++;  | 
4054  | 21.0k  | }  | 
4055  |  |  | 
4056  |  | static void FlateDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4057  | 186k  | { | 
4058  | 186k  |     UNUSEDPARAM(obj);  | 
4059  | 186k  |     UNUSEDPARAM(act);  | 
4060  |  |  | 
4061  | 186k  |     if (NULL == pdf)  | 
4062  | 0  |         return;  | 
4063  |  |  | 
4064  | 186k  |     pdf->stats.nflate++;  | 
4065  | 186k  | }  | 
4066  |  |  | 
4067  |  | static void Image_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4068  | 8.98k  | { | 
4069  | 8.98k  |     UNUSEDPARAM(obj);  | 
4070  | 8.98k  |     UNUSEDPARAM(act);  | 
4071  |  |  | 
4072  | 8.98k  |     if (NULL == pdf)  | 
4073  | 0  |         return;  | 
4074  |  |  | 
4075  | 8.98k  |     pdf->stats.nimage++;  | 
4076  | 8.98k  | }  | 
4077  |  |  | 
4078  |  | static void LZWDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4079  | 26.6k  | { | 
4080  | 26.6k  |     UNUSEDPARAM(obj);  | 
4081  | 26.6k  |     UNUSEDPARAM(act);  | 
4082  |  |  | 
4083  | 26.6k  |     if (NULL == pdf)  | 
4084  | 0  |         return;  | 
4085  |  |  | 
4086  | 26.6k  |     pdf->stats.nlzw++;  | 
4087  | 26.6k  | }  | 
4088  |  |  | 
4089  |  | static void RunLengthDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4090  | 27.3k  | { | 
4091  | 27.3k  |     UNUSEDPARAM(obj);  | 
4092  | 27.3k  |     UNUSEDPARAM(act);  | 
4093  |  |  | 
4094  | 27.3k  |     if (NULL == pdf)  | 
4095  | 0  |         return;  | 
4096  |  |  | 
4097  | 27.3k  |     pdf->stats.nrunlengthdecode++;  | 
4098  | 27.3k  | }  | 
4099  |  |  | 
4100  |  | static void CCITTFaxDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4101  | 3.20k  | { | 
4102  | 3.20k  |     UNUSEDPARAM(obj);  | 
4103  | 3.20k  |     UNUSEDPARAM(act);  | 
4104  |  |  | 
4105  | 3.20k  |     if (NULL == pdf)  | 
4106  | 0  |         return;  | 
4107  |  |  | 
4108  | 3.20k  |     pdf->stats.nfaxdecode++;  | 
4109  | 3.20k  | }  | 
4110  |  |  | 
4111  |  | static void JBIG2Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4112  | 2.41k  | { | 
4113  | 2.41k  |     cli_ctx *ctx = NULL;  | 
4114  | 2.41k  |     struct json_object *pdfobj, *jbig2arr;  | 
4115  |  |  | 
4116  | 2.41k  |     UNUSEDPARAM(obj);  | 
4117  | 2.41k  |     UNUSEDPARAM(act);  | 
4118  |  |  | 
4119  | 2.41k  |     if (NULL == pdf)  | 
4120  | 0  |         return;  | 
4121  |  |  | 
4122  | 2.41k  |     ctx = pdf->ctx;  | 
4123  |  |  | 
4124  | 2.41k  |     if (!(SCAN_COLLECT_METADATA))  | 
4125  | 0  |         return;  | 
4126  |  |  | 
4127  | 2.41k  |     if (!(pdf->ctx->wrkproperty))  | 
4128  | 0  |         return;  | 
4129  |  |  | 
4130  | 2.41k  |     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
4131  | 2.41k  |     if (!(pdfobj))  | 
4132  | 0  |         return;  | 
4133  |  |  | 
4134  | 2.41k  |     jbig2arr = cli_jsonarray(pdfobj, "JBIG2Objects");  | 
4135  | 2.41k  |     if (!(jbig2arr))  | 
4136  | 0  |         return;  | 
4137  |  |  | 
4138  | 2.41k  |     cli_jsonint_array(jbig2arr, obj->id >> 8);  | 
4139  |  |  | 
4140  | 2.41k  |     pdf->stats.njbig2decode++;  | 
4141  | 2.41k  | }  | 
4142  |  |  | 
4143  |  | static void DCTDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4144  | 16.8k  | { | 
4145  | 16.8k  |     UNUSEDPARAM(obj);  | 
4146  | 16.8k  |     UNUSEDPARAM(act);  | 
4147  |  |  | 
4148  | 16.8k  |     if (NULL == pdf)  | 
4149  | 0  |         return;  | 
4150  |  |  | 
4151  | 16.8k  |     pdf->stats.ndctdecode++;  | 
4152  | 16.8k  | }  | 
4153  |  |  | 
4154  |  | static void JPXDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4155  | 4.46k  | { | 
4156  | 4.46k  |     UNUSEDPARAM(obj);  | 
4157  | 4.46k  |     UNUSEDPARAM(act);  | 
4158  |  |  | 
4159  | 4.46k  |     if (NULL == pdf)  | 
4160  | 0  |         return;  | 
4161  |  |  | 
4162  | 4.46k  |     pdf->stats.njpxdecode++;  | 
4163  | 4.46k  | }  | 
4164  |  |  | 
4165  |  | static void Crypt_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4166  | 15.6k  | { | 
4167  | 15.6k  |     UNUSEDPARAM(obj);  | 
4168  | 15.6k  |     UNUSEDPARAM(act);  | 
4169  |  |  | 
4170  | 15.6k  |     if (NULL == pdf)  | 
4171  | 0  |         return;  | 
4172  |  |  | 
4173  | 15.6k  |     pdf->stats.ncrypt++;  | 
4174  | 15.6k  | }  | 
4175  |  |  | 
4176  |  | static void Standard_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4177  | 47.2k  | { | 
4178  | 47.2k  |     UNUSEDPARAM(obj);  | 
4179  | 47.2k  |     UNUSEDPARAM(act);  | 
4180  |  |  | 
4181  | 47.2k  |     if (NULL == pdf)  | 
4182  | 0  |         return;  | 
4183  |  |  | 
4184  | 47.2k  |     pdf->stats.nstandard++;  | 
4185  | 47.2k  | }  | 
4186  |  |  | 
4187  |  | static void Sig_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4188  | 2.68k  | { | 
4189  | 2.68k  |     UNUSEDPARAM(obj);  | 
4190  | 2.68k  |     UNUSEDPARAM(act);  | 
4191  |  |  | 
4192  | 2.68k  |     if (NULL == pdf)  | 
4193  | 0  |         return;  | 
4194  |  |  | 
4195  | 2.68k  |     pdf->stats.nsigned++;  | 
4196  | 2.68k  | }  | 
4197  |  |  | 
4198  |  | static void JavaScript_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4199  | 23.0k  | { | 
4200  | 23.0k  |     UNUSEDPARAM(pdf);  | 
4201  | 23.0k  |     UNUSEDPARAM(obj);  | 
4202  | 23.0k  |     UNUSEDPARAM(act);  | 
4203  |  |  | 
4204  |  |     /*  | 
4205  |  |      * Don't record the pdf->stats or JSON now, we'll look for the actual  | 
4206  |  |      * Javascript in the object when we extract it later. This is to prevent  | 
4207  |  |      * false positives when objects reference an indirect object which doesn't  | 
4208  |  |      * actually have any content.  | 
4209  |  |      */  | 
4210  | 23.0k  | }  | 
4211  |  |  | 
4212  |  | static void OpenAction_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4213  | 304k  | { | 
4214  | 304k  |     UNUSEDPARAM(obj);  | 
4215  | 304k  |     UNUSEDPARAM(act);  | 
4216  |  |  | 
4217  | 304k  |     if (NULL == pdf)  | 
4218  | 0  |         return;  | 
4219  |  |  | 
4220  | 304k  |     pdf->stats.nopenaction++;  | 
4221  | 304k  | }  | 
4222  |  |  | 
4223  |  | static void Launch_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4224  | 13.6k  | { | 
4225  | 13.6k  |     UNUSEDPARAM(obj);  | 
4226  | 13.6k  |     UNUSEDPARAM(act);  | 
4227  |  |  | 
4228  | 13.6k  |     if (NULL == pdf)  | 
4229  | 0  |         return;  | 
4230  |  |  | 
4231  | 13.6k  |     pdf->stats.nlaunch++;  | 
4232  | 13.6k  | }  | 
4233  |  |  | 
4234  |  | static void Page_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4235  | 28.7k  | { | 
4236  | 28.7k  |     UNUSEDPARAM(obj);  | 
4237  | 28.7k  |     UNUSEDPARAM(act);  | 
4238  |  |  | 
4239  | 28.7k  |     if (NULL == pdf)  | 
4240  | 0  |         return;  | 
4241  |  |  | 
4242  | 28.7k  |     pdf->stats.npage++;  | 
4243  | 28.7k  | }  | 
4244  |  |  | 
4245  |  | static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4246  | 14.0k  | { | 
4247  | 14.0k  |     cli_ctx *ctx = NULL;  | 
4248  |  |  | 
4249  | 14.0k  |     UNUSEDPARAM(act);  | 
4250  |  |  | 
4251  | 14.0k  |     if (NULL == pdf)  | 
4252  | 0  |         return;  | 
4253  |  |  | 
4254  | 14.0k  |     ctx = pdf->ctx;  | 
4255  |  |  | 
4256  | 14.0k  |     if (!(SCAN_COLLECT_METADATA))  | 
4257  | 0  |         return;  | 
4258  |  |  | 
4259  | 14.0k  |     if (!(pdf->stats.author)) { | 
4260  | 9.29k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4261  | 9.29k  |                                              : (const char *)(obj->start + pdf->map);  | 
4262  |  |  | 
4263  | 9.29k  |         pdf->stats.author = calloc(1, sizeof(struct pdf_stats_entry));  | 
4264  | 9.29k  |         if (!(pdf->stats.author))  | 
4265  | 0  |             return;  | 
4266  |  |  | 
4267  | 9.29k  |         pdf->parse_recursion_depth++;  | 
4268  | 9.29k  |         pdf->stats.author->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Author", NULL, &(pdf->stats.author->meta));  | 
4269  | 9.29k  |         pdf->parse_recursion_depth--;  | 
4270  | 9.29k  |     }  | 
4271  | 14.0k  | }  | 
4272  |  |  | 
4273  |  | static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4274  | 10.2k  | { | 
4275  | 10.2k  |     cli_ctx *ctx = NULL;  | 
4276  |  |  | 
4277  | 10.2k  |     UNUSEDPARAM(act);  | 
4278  |  |  | 
4279  | 10.2k  |     if (NULL == pdf)  | 
4280  | 0  |         return;  | 
4281  |  |  | 
4282  | 10.2k  |     ctx = pdf->ctx;  | 
4283  |  |  | 
4284  | 10.2k  |     if (!(SCAN_COLLECT_METADATA))  | 
4285  | 0  |         return;  | 
4286  |  |  | 
4287  | 10.2k  |     if (!(pdf->stats.creator)) { | 
4288  | 7.14k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4289  | 7.14k  |                                              : (const char *)(obj->start + pdf->map);  | 
4290  |  |  | 
4291  | 7.14k  |         pdf->stats.creator = calloc(1, sizeof(struct pdf_stats_entry));  | 
4292  | 7.14k  |         if (!(pdf->stats.creator))  | 
4293  | 0  |             return;  | 
4294  |  |  | 
4295  | 7.14k  |         pdf->parse_recursion_depth++;  | 
4296  | 7.14k  |         pdf->stats.creator->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Creator", NULL, &(pdf->stats.creator->meta));  | 
4297  | 7.14k  |         pdf->parse_recursion_depth--;  | 
4298  | 7.14k  |     }  | 
4299  | 10.2k  | }  | 
4300  |  |  | 
4301  |  | static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4302  | 18.9k  | { | 
4303  | 18.9k  |     cli_ctx *ctx = NULL;  | 
4304  |  |  | 
4305  | 18.9k  |     UNUSEDPARAM(act);  | 
4306  |  |  | 
4307  | 18.9k  |     if (NULL == pdf)  | 
4308  | 0  |         return;  | 
4309  |  |  | 
4310  | 18.9k  |     ctx = pdf->ctx;  | 
4311  |  |  | 
4312  | 18.9k  |     if (!(SCAN_COLLECT_METADATA))  | 
4313  | 0  |         return;  | 
4314  |  |  | 
4315  | 18.9k  |     if (!(pdf->stats.modificationdate)) { | 
4316  | 12.5k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4317  | 12.5k  |                                              : (const char *)(obj->start + pdf->map);  | 
4318  |  |  | 
4319  | 12.5k  |         pdf->stats.modificationdate = calloc(1, sizeof(struct pdf_stats_entry));  | 
4320  | 12.5k  |         if (!(pdf->stats.modificationdate))  | 
4321  | 0  |             return;  | 
4322  |  |  | 
4323  | 12.5k  |         pdf->parse_recursion_depth++;  | 
4324  | 12.5k  |         pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/ModDate", NULL, &(pdf->stats.modificationdate->meta));  | 
4325  | 12.5k  |         pdf->parse_recursion_depth--;  | 
4326  | 12.5k  |     }  | 
4327  | 18.9k  | }  | 
4328  |  |  | 
4329  |  | static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4330  | 50.0k  | { | 
4331  | 50.0k  |     cli_ctx *ctx = NULL;  | 
4332  |  |  | 
4333  | 50.0k  |     UNUSEDPARAM(act);  | 
4334  |  |  | 
4335  | 50.0k  |     if (NULL == pdf)  | 
4336  | 0  |         return;  | 
4337  |  |  | 
4338  | 50.0k  |     ctx = pdf->ctx;  | 
4339  |  |  | 
4340  | 50.0k  |     if (!(SCAN_COLLECT_METADATA))  | 
4341  | 0  |         return;  | 
4342  |  |  | 
4343  | 50.0k  |     if (!(pdf->stats.creationdate)) { | 
4344  | 23.9k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4345  | 23.9k  |                                              : (const char *)(obj->start + pdf->map);  | 
4346  |  |  | 
4347  | 23.9k  |         pdf->stats.creationdate = calloc(1, sizeof(struct pdf_stats_entry));  | 
4348  | 23.9k  |         if (!(pdf->stats.creationdate))  | 
4349  | 0  |             return;  | 
4350  |  |  | 
4351  | 23.9k  |         pdf->parse_recursion_depth++;  | 
4352  | 23.9k  |         pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/CreationDate", NULL, &(pdf->stats.creationdate->meta));  | 
4353  | 23.9k  |         pdf->parse_recursion_depth--;  | 
4354  | 23.9k  |     }  | 
4355  | 50.0k  | }  | 
4356  |  |  | 
4357  |  | static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4358  | 18.9k  | { | 
4359  | 18.9k  |     cli_ctx *ctx = NULL;  | 
4360  |  |  | 
4361  | 18.9k  |     UNUSEDPARAM(act);  | 
4362  |  |  | 
4363  | 18.9k  |     if (NULL == pdf)  | 
4364  | 0  |         return;  | 
4365  |  |  | 
4366  | 18.9k  |     ctx = pdf->ctx;  | 
4367  |  |  | 
4368  | 18.9k  |     if (!(SCAN_COLLECT_METADATA))  | 
4369  | 0  |         return;  | 
4370  |  |  | 
4371  | 18.9k  |     if (!(pdf->stats.producer)) { | 
4372  | 12.3k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4373  | 12.3k  |                                              : (const char *)(obj->start + pdf->map);  | 
4374  |  |  | 
4375  | 12.3k  |         pdf->stats.producer = calloc(1, sizeof(struct pdf_stats_entry));  | 
4376  | 12.3k  |         if (!(pdf->stats.producer))  | 
4377  | 0  |             return;  | 
4378  |  |  | 
4379  | 12.3k  |         pdf->parse_recursion_depth++;  | 
4380  | 12.3k  |         pdf->stats.producer->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Producer", NULL, &(pdf->stats.producer->meta));  | 
4381  | 12.3k  |         pdf->parse_recursion_depth--;  | 
4382  | 12.3k  |     }  | 
4383  | 18.9k  | }  | 
4384  |  |  | 
4385  |  | static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4386  | 16.4k  | { | 
4387  | 16.4k  |     cli_ctx *ctx = NULL;  | 
4388  |  |  | 
4389  | 16.4k  |     UNUSEDPARAM(act);  | 
4390  |  |  | 
4391  | 16.4k  |     if (NULL == pdf)  | 
4392  | 0  |         return;  | 
4393  |  |  | 
4394  | 16.4k  |     ctx = pdf->ctx;  | 
4395  |  |  | 
4396  | 16.4k  |     if (!(SCAN_COLLECT_METADATA))  | 
4397  | 0  |         return;  | 
4398  |  |  | 
4399  | 16.4k  |     if (!(pdf->stats.title)) { | 
4400  | 9.43k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4401  | 9.43k  |                                              : (const char *)(obj->start + pdf->map);  | 
4402  |  |  | 
4403  | 9.43k  |         pdf->stats.title = calloc(1, sizeof(struct pdf_stats_entry));  | 
4404  | 9.43k  |         if (!(pdf->stats.title))  | 
4405  | 0  |             return;  | 
4406  |  |  | 
4407  | 9.43k  |         pdf->parse_recursion_depth++;  | 
4408  | 9.43k  |         pdf->stats.title->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Title", NULL, &(pdf->stats.title->meta));  | 
4409  | 9.43k  |         pdf->parse_recursion_depth--;  | 
4410  | 9.43k  |     }  | 
4411  | 16.4k  | }  | 
4412  |  |  | 
4413  |  | static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4414  | 7.99k  | { | 
4415  | 7.99k  |     cli_ctx *ctx = NULL;  | 
4416  |  |  | 
4417  | 7.99k  |     UNUSEDPARAM(act);  | 
4418  |  |  | 
4419  | 7.99k  |     if (NULL == pdf)  | 
4420  | 0  |         return;  | 
4421  |  |  | 
4422  | 7.99k  |     ctx = pdf->ctx;  | 
4423  |  |  | 
4424  | 7.99k  |     if (!(SCAN_COLLECT_METADATA))  | 
4425  | 0  |         return;  | 
4426  |  |  | 
4427  | 7.99k  |     if (!(pdf->stats.keywords)) { | 
4428  | 5.68k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4429  | 5.68k  |                                              : (const char *)(obj->start + pdf->map);  | 
4430  |  |  | 
4431  | 5.68k  |         pdf->stats.keywords = calloc(1, sizeof(struct pdf_stats_entry));  | 
4432  | 5.68k  |         if (!(pdf->stats.keywords))  | 
4433  | 0  |             return;  | 
4434  |  |  | 
4435  | 5.68k  |         pdf->parse_recursion_depth++;  | 
4436  | 5.68k  |         pdf->stats.keywords->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Keywords", NULL, &(pdf->stats.keywords->meta));  | 
4437  | 5.68k  |         pdf->parse_recursion_depth--;  | 
4438  | 5.68k  |     }  | 
4439  | 7.99k  | }  | 
4440  |  |  | 
4441  |  | static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4442  | 17.6k  | { | 
4443  | 17.6k  |     cli_ctx *ctx = NULL;  | 
4444  |  |  | 
4445  | 17.6k  |     UNUSEDPARAM(act);  | 
4446  |  |  | 
4447  | 17.6k  |     if (NULL == pdf)  | 
4448  | 0  |         return;  | 
4449  |  |  | 
4450  | 17.6k  |     ctx = pdf->ctx;  | 
4451  |  |  | 
4452  | 17.6k  |     if (!(SCAN_COLLECT_METADATA))  | 
4453  | 0  |         return;  | 
4454  |  |  | 
4455  | 17.6k  |     if (!(pdf->stats.subject)) { | 
4456  | 11.1k  |         const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4457  | 11.1k  |                                              : (const char *)(obj->start + pdf->map);  | 
4458  |  |  | 
4459  | 11.1k  |         pdf->stats.subject = calloc(1, sizeof(struct pdf_stats_entry));  | 
4460  | 11.1k  |         if (!(pdf->stats.subject))  | 
4461  | 0  |             return;  | 
4462  |  |  | 
4463  | 11.1k  |         pdf->parse_recursion_depth++;  | 
4464  | 11.1k  |         pdf->stats.subject->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Subject", NULL, &(pdf->stats.subject->meta));  | 
4465  | 11.1k  |         pdf->parse_recursion_depth--;  | 
4466  | 11.1k  |     }  | 
4467  | 17.6k  | }  | 
4468  |  |  | 
4469  |  | static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4470  | 995  | { | 
4471  | 995  |     UNUSEDPARAM(obj);  | 
4472  | 995  |     UNUSEDPARAM(act);  | 
4473  |  |  | 
4474  | 995  |     if (NULL == pdf)  | 
4475  | 0  |         return;  | 
4476  |  |  | 
4477  | 995  |     pdf->stats.nrichmedia++;  | 
4478  | 995  | }  | 
4479  |  |  | 
4480  |  | static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4481  | 10.4k  | { | 
4482  | 10.4k  |     UNUSEDPARAM(obj);  | 
4483  | 10.4k  |     UNUSEDPARAM(act);  | 
4484  |  |  | 
4485  | 10.4k  |     if (NULL == pdf)  | 
4486  | 0  |         return;  | 
4487  |  |  | 
4488  | 10.4k  |     pdf->stats.nacroform++;  | 
4489  | 10.4k  | }  | 
4490  |  |  | 
4491  |  | static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4492  | 627  | { | 
4493  | 627  |     UNUSEDPARAM(obj);  | 
4494  | 627  |     UNUSEDPARAM(act);  | 
4495  |  |  | 
4496  | 627  |     if (NULL == pdf)  | 
4497  | 0  |         return;  | 
4498  |  |  | 
4499  | 627  |     pdf->stats.nxfa++;  | 
4500  | 627  | }  | 
4501  |  |  | 
4502  |  | static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4503  | 151k  | { | 
4504  | 151k  |     cli_ctx *ctx = NULL;  | 
4505  | 151k  |     struct pdf_array *array;  | 
4506  | 151k  |     const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4507  | 151k  |                                          : (const char *)(obj->start + pdf->map);  | 
4508  | 151k  |     const char *begin;  | 
4509  | 151k  |     unsigned long npages = 0, count;  | 
4510  | 151k  |     long temp_long;  | 
4511  | 151k  |     struct pdf_array_node *node;  | 
4512  | 151k  |     json_object *pdfobj;  | 
4513  | 151k  |     size_t countsize = 0;  | 
4514  |  |  | 
4515  | 151k  |     UNUSEDPARAM(act);  | 
4516  |  |  | 
4517  | 151k  |     if (!(pdf) || !(pdf->ctx->wrkproperty))  | 
4518  | 0  |         return;  | 
4519  |  |  | 
4520  | 151k  |     ctx = pdf->ctx;  | 
4521  |  |  | 
4522  | 151k  |     if (!(SCAN_COLLECT_METADATA))  | 
4523  | 0  |         return;  | 
4524  |  |  | 
4525  | 151k  |     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
4526  | 151k  |     if (!(pdfobj))  | 
4527  | 0  |         return;  | 
4528  |  |  | 
4529  | 151k  |     begin = cli_memstr(objstart, obj->size, "/Kids", 5);  | 
4530  | 151k  |     if (!(begin))  | 
4531  | 43.9k  |         return;  | 
4532  |  |  | 
4533  | 107k  |     begin += 5;  | 
4534  |  |  | 
4535  | 107k  |     pdf->parse_recursion_depth++;  | 
4536  | 107k  |     array = pdf_parse_array(pdf, obj, obj->size, (char *)begin, NULL);  | 
4537  | 107k  |     pdf->parse_recursion_depth--;  | 
4538  |  |  | 
4539  | 107k  |     if (!(array)) { | 
4540  | 46.4k  |         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);  | 
4541  | 46.4k  |         return;  | 
4542  | 46.4k  |     }  | 
4543  |  |  | 
4544  | 494k  |     for (node = array->nodes; node != NULL; node = node->next)  | 
4545  | 434k  |         if (node->datasz)  | 
4546  | 414k  |             if (strchr((char *)(node->data), 'R'))  | 
4547  | 33.2k  |                 npages++;  | 
4548  |  |  | 
4549  | 60.5k  |     begin = cli_memstr(objstart, obj->size, "/Count", 6);  | 
4550  | 60.5k  |     if (!(begin)) { | 
4551  | 42.2k  |         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);  | 
4552  | 42.2k  |         goto cleanup;  | 
4553  | 42.2k  |     }  | 
4554  |  |  | 
4555  | 18.3k  |     begin += 6;  | 
4556  | 44.0k  |     while (((size_t)(begin - objstart) < obj->size) && isspace(begin[0]))  | 
4557  | 25.6k  |         begin++;  | 
4558  |  |  | 
4559  | 18.3k  |     if ((size_t)(begin - objstart) >= obj->size) { | 
4560  | 288  |         goto cleanup;  | 
4561  | 288  |     }  | 
4562  |  |  | 
4563  | 18.0k  |     countsize = (obj->objstm) ? (size_t)(obj->start + obj->objstm->streambuf + obj->size - begin)  | 
4564  | 18.0k  |                               : (size_t)(obj->start + pdf->map + obj->size - begin);  | 
4565  |  |  | 
4566  | 18.0k  |     if (CL_SUCCESS != cli_strntol_wrap(begin, countsize, 0, 10, &temp_long)) { | 
4567  | 5.07k  |         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);  | 
4568  | 12.9k  |     } else if (temp_long < 0) { | 
4569  | 138  |         cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);  | 
4570  | 12.8k  |     } else { | 
4571  | 12.8k  |         count = (unsigned long)temp_long;  | 
4572  | 12.8k  |         if (count != npages) { | 
4573  | 8.88k  |             cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);  | 
4574  | 8.88k  |         }  | 
4575  | 12.8k  |     }  | 
4576  |  |  | 
4577  | 60.5k  | cleanup:  | 
4578  | 60.5k  |     pdf_free_array(array);  | 
4579  | 60.5k  | }  | 
4580  |  |  | 
4581  |  | static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)  | 
4582  | 11.6k  | { | 
4583  | 11.6k  |     cli_ctx *ctx = NULL;  | 
4584  | 11.6k  |     json_object *colorsobj, *pdfobj;  | 
4585  | 11.6k  |     unsigned long ncolors;  | 
4586  | 11.6k  |     long temp_long;  | 
4587  | 11.6k  |     char *p1;  | 
4588  | 11.6k  |     const char *objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)  | 
4589  | 11.6k  |                                          : (const char *)(obj->start + pdf->map);  | 
4590  |  |  | 
4591  | 11.6k  |     UNUSEDPARAM(act);  | 
4592  |  |  | 
4593  | 11.6k  |     if (!(pdf) || !(pdf->ctx) || !(pdf->ctx->wrkproperty))  | 
4594  | 0  |         return;  | 
4595  |  |  | 
4596  | 11.6k  |     ctx = pdf->ctx;  | 
4597  |  |  | 
4598  | 11.6k  |     if (!(SCAN_COLLECT_METADATA))  | 
4599  | 0  |         return;  | 
4600  |  |  | 
4601  | 11.6k  |     p1 = (char *)cli_memstr(objstart, obj->size, "/Colors", 7);  | 
4602  | 11.6k  |     if (!(p1))  | 
4603  | 0  |         return;  | 
4604  |  |  | 
4605  | 11.6k  |     p1 += 7;  | 
4606  |  |  | 
4607  |  |     /* Ensure that we have at least one whitespace character plus at least one number */  | 
4608  | 11.6k  |     if (obj->size - (size_t)(p1 - objstart) < 2)  | 
4609  | 0  |         return;  | 
4610  |  |  | 
4611  | 15.1k  |     while (((size_t)(p1 - objstart) < obj->size) && isspace(p1[0]))  | 
4612  | 3.43k  |         p1++;  | 
4613  |  |  | 
4614  | 11.6k  |     if ((size_t)(p1 - objstart) == obj->size)  | 
4615  | 0  |         return;  | 
4616  |  |  | 
4617  | 11.6k  |     if (CL_SUCCESS != cli_strntol_wrap(p1, (size_t)((p1 - objstart) - obj->size), 0, 10, &temp_long)) { | 
4618  | 11.6k  |         return;  | 
4619  | 11.6k  |     } else if (temp_long < 0) { | 
4620  | 0  |         return;  | 
4621  | 0  |     }  | 
4622  | 0  |     ncolors = (unsigned long)temp_long;  | 
4623  |  |  | 
4624  |  |     /* We only care if the number of colors > 2**24 */  | 
4625  | 0  |     if (ncolors < 1 << 24)  | 
4626  | 0  |         return;  | 
4627  |  |  | 
4628  | 0  |     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
4629  | 0  |     if (!(pdfobj))  | 
4630  | 0  |         return;  | 
4631  |  |  | 
4632  | 0  |     colorsobj = cli_jsonarray(pdfobj, "BigColors");  | 
4633  | 0  |     if (!(colorsobj))  | 
4634  | 0  |         return;  | 
4635  |  |  | 
4636  | 0  |     cli_jsonint_array(colorsobj, obj->id >> 8);  | 
4637  | 0  | }  | 
4638  |  |  | 
4639  |  | static void pdf_free_stats(struct pdf_struct *pdf)  | 
4640  | 441k  | { | 
4641  |  |  | 
4642  | 441k  |     if (!pdf) { | 
4643  | 0  |         return;  | 
4644  | 0  |     }  | 
4645  |  |  | 
4646  | 441k  |     if ((pdf->stats.author)) { | 
4647  | 9.29k  |         if (pdf->stats.author->data)  | 
4648  | 7.37k  |             free(pdf->stats.author->data);  | 
4649  | 9.29k  |         free(pdf->stats.author);  | 
4650  | 9.29k  |         pdf->stats.author = NULL;  | 
4651  | 9.29k  |     }  | 
4652  |  |  | 
4653  | 441k  |     if (pdf->stats.creator) { | 
4654  | 7.14k  |         if (pdf->stats.creator->data)  | 
4655  | 3.95k  |             free(pdf->stats.creator->data);  | 
4656  | 7.14k  |         free(pdf->stats.creator);  | 
4657  | 7.14k  |         pdf->stats.creator = NULL;  | 
4658  | 7.14k  |     }  | 
4659  |  |  | 
4660  | 441k  |     if (pdf->stats.producer) { | 
4661  | 12.3k  |         if (pdf->stats.producer->data)  | 
4662  | 8.99k  |             free(pdf->stats.producer->data);  | 
4663  | 12.3k  |         free(pdf->stats.producer);  | 
4664  | 12.3k  |         pdf->stats.producer = NULL;  | 
4665  | 12.3k  |     }  | 
4666  |  |  | 
4667  | 441k  |     if (pdf->stats.modificationdate) { | 
4668  | 12.5k  |         if (pdf->stats.modificationdate->data)  | 
4669  | 9.66k  |             free(pdf->stats.modificationdate->data);  | 
4670  | 12.5k  |         free(pdf->stats.modificationdate);  | 
4671  | 12.5k  |         pdf->stats.modificationdate = NULL;  | 
4672  | 12.5k  |     }  | 
4673  |  |  | 
4674  | 441k  |     if (pdf->stats.creationdate) { | 
4675  | 23.9k  |         if (pdf->stats.creationdate->data)  | 
4676  | 6.31k  |             free(pdf->stats.creationdate->data);  | 
4677  | 23.9k  |         free(pdf->stats.creationdate);  | 
4678  | 23.9k  |         pdf->stats.creationdate = NULL;  | 
4679  | 23.9k  |     }  | 
4680  |  |  | 
4681  | 441k  |     if (pdf->stats.title) { | 
4682  | 9.43k  |         if (pdf->stats.title->data)  | 
4683  | 6.31k  |             free(pdf->stats.title->data);  | 
4684  | 9.43k  |         free(pdf->stats.title);  | 
4685  | 9.43k  |         pdf->stats.title = NULL;  | 
4686  | 9.43k  |     }  | 
4687  |  |  | 
4688  | 441k  |     if (pdf->stats.subject) { | 
4689  | 11.1k  |         if (pdf->stats.subject->data)  | 
4690  | 8.11k  |             free(pdf->stats.subject->data);  | 
4691  | 11.1k  |         free(pdf->stats.subject);  | 
4692  | 11.1k  |         pdf->stats.subject = NULL;  | 
4693  | 11.1k  |     }  | 
4694  |  |  | 
4695  | 441k  |     if (pdf->stats.keywords) { | 
4696  | 5.68k  |         if (pdf->stats.keywords->data)  | 
4697  | 4.69k  |             free(pdf->stats.keywords->data);  | 
4698  | 5.68k  |         free(pdf->stats.keywords);  | 
4699  | 5.68k  |         pdf->stats.keywords = NULL;  | 
4700  | 5.68k  |     }  | 
4701  | 441k  | }  | 
4702  |  |  | 
4703  |  | static void pdf_export_json(struct pdf_struct *pdf)  | 
4704  | 441k  | { | 
4705  | 441k  |     cli_ctx *ctx = NULL;  | 
4706  | 441k  |     json_object *pdfobj;  | 
4707  | 441k  |     unsigned long i;  | 
4708  |  |  | 
4709  | 441k  |     if (NULL == pdf)  | 
4710  | 0  |         return;  | 
4711  |  |  | 
4712  | 441k  |     if (!(pdf->ctx)) { | 
4713  | 0  |         goto cleanup;  | 
4714  | 0  |     }  | 
4715  |  |  | 
4716  | 441k  |     ctx = pdf->ctx;  | 
4717  |  |  | 
4718  | 441k  |     if (!(SCAN_COLLECT_METADATA) || !(pdf->ctx->wrkproperty)) { | 
4719  | 0  |         goto cleanup;  | 
4720  | 0  |     }  | 
4721  |  |  | 
4722  | 441k  |     pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");  | 
4723  | 441k  |     if (!(pdfobj)) { | 
4724  | 0  |         goto cleanup;  | 
4725  | 0  |     }  | 
4726  |  |  | 
4727  | 441k  |     if (pdf->stats.author) { | 
4728  | 9.29k  |         if (!pdf->stats.author->meta.success) { | 
4729  | 4.25k  |             char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);  | 
4730  | 4.25k  |             if (out) { | 
4731  | 140  |                 free(pdf->stats.author->data);  | 
4732  | 140  |                 pdf->stats.author->data         = out;  | 
4733  | 140  |                 pdf->stats.author->meta.length  = strlen(out);  | 
4734  | 140  |                 pdf->stats.author->meta.success = 1;  | 
4735  | 140  |             }  | 
4736  | 4.25k  |         }  | 
4737  |  |  | 
4738  | 9.29k  |         if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) { | 
4739  | 5.18k  |             cli_jsonstr(pdfobj, "Author", pdf->stats.author->data);  | 
4740  | 5.18k  |         } else if (pdf->stats.author->data && pdf->stats.author->meta.length) { | 
4741  | 922  |             char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length);  | 
4742  | 922  |             cli_jsonstr(pdfobj, "Author", b64);  | 
4743  | 922  |             cli_jsonbool(pdfobj, "Author_base64", 1);  | 
4744  | 922  |             free(b64);  | 
4745  | 3.19k  |         } else { | 
4746  | 3.19k  |             cli_jsonstr(pdfobj, "Author", "");  | 
4747  | 3.19k  |         }  | 
4748  | 9.29k  |     }  | 
4749  | 441k  |     if (pdf->stats.creator) { | 
4750  | 7.14k  |         if (!pdf->stats.creator->meta.success) { | 
4751  | 4.34k  |             char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);  | 
4752  | 4.34k  |             if (out) { | 
4753  | 73  |                 free(pdf->stats.creator->data);  | 
4754  | 73  |                 pdf->stats.creator->data         = out;  | 
4755  | 73  |                 pdf->stats.creator->meta.length  = strlen(out);  | 
4756  | 73  |                 pdf->stats.creator->meta.success = 1;  | 
4757  | 73  |             }  | 
4758  | 4.34k  |         }  | 
4759  |  |  | 
4760  | 7.14k  |         if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) { | 
4761  | 2.86k  |             cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data);  | 
4762  | 4.27k  |         } else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) { | 
4763  | 709  |             char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length);  | 
4764  | 709  |             cli_jsonstr(pdfobj, "Creator", b64);  | 
4765  | 709  |             cli_jsonbool(pdfobj, "Creator_base64", 1);  | 
4766  | 709  |             free(b64);  | 
4767  | 3.56k  |         } else { | 
4768  | 3.56k  |             cli_jsonstr(pdfobj, "Creator", "");  | 
4769  | 3.56k  |         }  | 
4770  | 7.14k  |     }  | 
4771  | 441k  |     if (pdf->stats.producer) { | 
4772  | 12.3k  |         if (!pdf->stats.producer->meta.success) { | 
4773  | 6.22k  |             char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);  | 
4774  | 6.22k  |             if (out) { | 
4775  | 123  |                 free(pdf->stats.producer->data);  | 
4776  | 123  |                 pdf->stats.producer->data         = out;  | 
4777  | 123  |                 pdf->stats.producer->meta.length  = strlen(out);  | 
4778  | 123  |                 pdf->stats.producer->meta.success = 1;  | 
4779  | 123  |             }  | 
4780  | 6.22k  |         }  | 
4781  |  |  | 
4782  | 12.3k  |         if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) { | 
4783  | 6.21k  |             cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data);  | 
4784  | 6.21k  |         } else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) { | 
4785  | 2.26k  |             char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length);  | 
4786  | 2.26k  |             cli_jsonstr(pdfobj, "Producer", b64);  | 
4787  | 2.26k  |             cli_jsonbool(pdfobj, "Producer_base64", 1);  | 
4788  | 2.26k  |             free(b64);  | 
4789  | 3.83k  |         } else { | 
4790  | 3.83k  |             cli_jsonstr(pdfobj, "Producer", "");  | 
4791  | 3.83k  |         }  | 
4792  | 12.3k  |     }  | 
4793  | 441k  |     if (pdf->stats.modificationdate) { | 
4794  | 12.5k  |         if (!pdf->stats.modificationdate->meta.success) { | 
4795  | 7.40k  |             char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);  | 
4796  | 7.40k  |             if (out) { | 
4797  | 59  |                 free(pdf->stats.modificationdate->data);  | 
4798  | 59  |                 pdf->stats.modificationdate->data         = out;  | 
4799  | 59  |                 pdf->stats.modificationdate->meta.length  = strlen(out);  | 
4800  | 59  |                 pdf->stats.modificationdate->meta.success = 1;  | 
4801  | 59  |             }  | 
4802  | 7.40k  |         }  | 
4803  |  |  | 
4804  | 12.5k  |         if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) { | 
4805  | 5.23k  |             cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data);  | 
4806  | 7.34k  |         } else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) { | 
4807  | 4.38k  |             char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);  | 
4808  | 4.38k  |             cli_jsonstr(pdfobj, "ModificationDate", b64);  | 
4809  | 4.38k  |             cli_jsonbool(pdfobj, "ModificationDate_base64", 1);  | 
4810  | 4.38k  |             free(b64);  | 
4811  | 4.38k  |         } else { | 
4812  | 2.96k  |             cli_jsonstr(pdfobj, "ModificationDate", "");  | 
4813  | 2.96k  |         }  | 
4814  | 12.5k  |     }  | 
4815  | 441k  |     if (pdf->stats.creationdate) { | 
4816  | 23.9k  |         if (!pdf->stats.creationdate->meta.success) { | 
4817  | 19.6k  |             char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);  | 
4818  | 19.6k  |             if (out) { | 
4819  | 138  |                 free(pdf->stats.creationdate->data);  | 
4820  | 138  |                 pdf->stats.creationdate->data         = out;  | 
4821  | 138  |                 pdf->stats.creationdate->meta.length  = strlen(out);  | 
4822  | 138  |                 pdf->stats.creationdate->meta.success = 1;  | 
4823  | 138  |             }  | 
4824  | 19.6k  |         }  | 
4825  |  |  | 
4826  | 23.9k  |         if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) { | 
4827  | 4.41k  |             cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data);  | 
4828  | 19.5k  |         } else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) { | 
4829  | 1.89k  |             char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);  | 
4830  | 1.89k  |             cli_jsonstr(pdfobj, "CreationDate", b64);  | 
4831  | 1.89k  |             cli_jsonbool(pdfobj, "CreationDate_base64", 1);  | 
4832  | 1.89k  |             free(b64);  | 
4833  | 17.6k  |         } else { | 
4834  | 17.6k  |             cli_jsonstr(pdfobj, "CreationDate", "");  | 
4835  | 17.6k  |         }  | 
4836  | 23.9k  |     }  | 
4837  | 441k  |     if (pdf->stats.title) { | 
4838  | 9.43k  |         if (!pdf->stats.title->meta.success) { | 
4839  | 6.09k  |             char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);  | 
4840  | 6.09k  |             if (out) { | 
4841  | 235  |                 free(pdf->stats.title->data);  | 
4842  | 235  |                 pdf->stats.title->data         = out;  | 
4843  | 235  |                 pdf->stats.title->meta.length  = strlen(out);  | 
4844  | 235  |                 pdf->stats.title->meta.success = 1;  | 
4845  | 235  |             }  | 
4846  | 6.09k  |         }  | 
4847  |  |  | 
4848  | 9.43k  |         if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) { | 
4849  | 3.57k  |             cli_jsonstr(pdfobj, "Title", pdf->stats.title->data);  | 
4850  | 5.86k  |         } else if (pdf->stats.title->data && pdf->stats.title->meta.length) { | 
4851  | 2.73k  |             char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length);  | 
4852  | 2.73k  |             cli_jsonstr(pdfobj, "Title", b64);  | 
4853  | 2.73k  |             cli_jsonbool(pdfobj, "Title_base64", 1);  | 
4854  | 2.73k  |             free(b64);  | 
4855  | 3.12k  |         } else { | 
4856  | 3.12k  |             cli_jsonstr(pdfobj, "Title", "");  | 
4857  | 3.12k  |         }  | 
4858  | 9.43k  |     }  | 
4859  | 441k  |     if (pdf->stats.subject) { | 
4860  | 11.1k  |         if (!pdf->stats.subject->meta.success) { | 
4861  | 4.58k  |             char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);  | 
4862  | 4.58k  |             if (out) { | 
4863  | 335  |                 free(pdf->stats.subject->data);  | 
4864  | 335  |                 pdf->stats.subject->data         = out;  | 
4865  | 335  |                 pdf->stats.subject->meta.length  = strlen(out);  | 
4866  | 335  |                 pdf->stats.subject->meta.success = 1;  | 
4867  | 335  |             }  | 
4868  | 4.58k  |         }  | 
4869  |  |  | 
4870  | 11.1k  |         if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) { | 
4871  | 6.87k  |             cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data);  | 
4872  | 6.87k  |         } else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) { | 
4873  | 1.23k  |             char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length);  | 
4874  | 1.23k  |             cli_jsonstr(pdfobj, "Subject", b64);  | 
4875  | 1.23k  |             cli_jsonbool(pdfobj, "Subject_base64", 1);  | 
4876  | 1.23k  |             free(b64);  | 
4877  | 3.01k  |         } else { | 
4878  | 3.01k  |             cli_jsonstr(pdfobj, "Subject", "");  | 
4879  | 3.01k  |         }  | 
4880  | 11.1k  |     }  | 
4881  | 441k  |     if (pdf->stats.keywords) { | 
4882  | 5.68k  |         if (!pdf->stats.keywords->meta.success) { | 
4883  | 2.31k  |             char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);  | 
4884  | 2.31k  |             if (out) { | 
4885  | 14  |                 free(pdf->stats.keywords->data);  | 
4886  | 14  |                 pdf->stats.keywords->data         = out;  | 
4887  | 14  |                 pdf->stats.keywords->meta.length  = strlen(out);  | 
4888  | 14  |                 pdf->stats.keywords->meta.success = 1;  | 
4889  | 14  |             }  | 
4890  | 2.31k  |         }  | 
4891  |  |  | 
4892  | 5.68k  |         if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) { | 
4893  | 3.38k  |             cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data);  | 
4894  | 3.38k  |         } else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) { | 
4895  | 710  |             char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length);  | 
4896  | 710  |             cli_jsonstr(pdfobj, "Keywords", b64);  | 
4897  | 710  |             cli_jsonbool(pdfobj, "Keywords_base64", 1);  | 
4898  | 710  |             free(b64);  | 
4899  | 1.59k  |         } else { | 
4900  | 1.59k  |             cli_jsonstr(pdfobj, "Keywords", "");  | 
4901  | 1.59k  |         }  | 
4902  | 5.68k  |     }  | 
4903  | 441k  |     if (pdf->stats.ninvalidobjs)  | 
4904  | 9.35k  |         cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);  | 
4905  | 441k  |     if (pdf->stats.njs)  | 
4906  | 4.33k  |         cli_jsonint(pdfobj, "JavaScriptObjectCount", pdf->stats.njs);  | 
4907  | 441k  |     if (pdf->stats.nflate)  | 
4908  | 73.1k  |         cli_jsonint(pdfobj, "DeflateObjectCount", pdf->stats.nflate);  | 
4909  | 441k  |     if (pdf->stats.nactivex)  | 
4910  | 0  |         cli_jsonint(pdfobj, "ActiveXObjectCount", pdf->stats.nactivex);  | 
4911  | 441k  |     if (pdf->stats.nflash)  | 
4912  | 0  |         cli_jsonint(pdfobj, "FlashObjectCount", pdf->stats.nflash);  | 
4913  | 441k  |     if (pdf->stats.ncolors)  | 
4914  | 0  |         cli_jsonint(pdfobj, "ColorCount", pdf->stats.ncolors);  | 
4915  | 441k  |     if (pdf->stats.nasciihexdecode)  | 
4916  | 9.96k  |         cli_jsonint(pdfobj, "AsciiHexDecodeObjectCount", pdf->stats.nasciihexdecode);  | 
4917  | 441k  |     if (pdf->stats.nascii85decode)  | 
4918  | 14.7k  |         cli_jsonint(pdfobj, "Ascii85DecodeObjectCount", pdf->stats.nascii85decode);  | 
4919  | 441k  |     if (pdf->stats.nembeddedfile)  | 
4920  | 13.9k  |         cli_jsonint(pdfobj, "EmbeddedFileCount", pdf->stats.nembeddedfile);  | 
4921  | 441k  |     if (pdf->stats.nimage)  | 
4922  | 4.34k  |         cli_jsonint(pdfobj, "ImageCount", pdf->stats.nimage);  | 
4923  | 441k  |     if (pdf->stats.nlzw)  | 
4924  | 12.1k  |         cli_jsonint(pdfobj, "LZWCount", pdf->stats.nlzw);  | 
4925  | 441k  |     if (pdf->stats.nrunlengthdecode)  | 
4926  | 12.8k  |         cli_jsonint(pdfobj, "RunLengthDecodeCount", pdf->stats.nrunlengthdecode);  | 
4927  | 441k  |     if (pdf->stats.nfaxdecode)  | 
4928  | 1.58k  |         cli_jsonint(pdfobj, "FaxDecodeCount", pdf->stats.nfaxdecode);  | 
4929  | 441k  |     if (pdf->stats.njbig2decode)  | 
4930  | 1.80k  |         cli_jsonint(pdfobj, "JBIG2DecodeCount", pdf->stats.njbig2decode);  | 
4931  | 441k  |     if (pdf->stats.ndctdecode)  | 
4932  | 8.44k  |         cli_jsonint(pdfobj, "DCTDecodeCount", pdf->stats.ndctdecode);  | 
4933  | 441k  |     if (pdf->stats.njpxdecode)  | 
4934  | 1.38k  |         cli_jsonint(pdfobj, "JPXDecodeCount", pdf->stats.njpxdecode);  | 
4935  | 441k  |     if (pdf->stats.ncrypt)  | 
4936  | 9.12k  |         cli_jsonint(pdfobj, "CryptCount", pdf->stats.ncrypt);  | 
4937  | 441k  |     if (pdf->stats.nstandard)  | 
4938  | 27.9k  |         cli_jsonint(pdfobj, "StandardCount", pdf->stats.nstandard);  | 
4939  | 441k  |     if (pdf->stats.nsigned)  | 
4940  | 1.92k  |         cli_jsonint(pdfobj, "SignedCount", pdf->stats.nsigned);  | 
4941  | 441k  |     if (pdf->stats.nopenaction)  | 
4942  | 29.1k  |         cli_jsonint(pdfobj, "OpenActionCount", pdf->stats.nopenaction);  | 
4943  | 441k  |     if (pdf->stats.nlaunch)  | 
4944  | 7.72k  |         cli_jsonint(pdfobj, "LaunchCount", pdf->stats.nlaunch);  | 
4945  | 441k  |     if (pdf->stats.npage)  | 
4946  | 15.2k  |         cli_jsonint(pdfobj, "PageCount", pdf->stats.npage);  | 
4947  | 441k  |     if (pdf->stats.nrichmedia)  | 
4948  | 984  |         cli_jsonint(pdfobj, "RichMediaCount", pdf->stats.nrichmedia);  | 
4949  | 441k  |     if (pdf->stats.nacroform)  | 
4950  | 6.54k  |         cli_jsonint(pdfobj, "AcroFormCount", pdf->stats.nacroform);  | 
4951  | 441k  |     if (pdf->stats.nxfa)  | 
4952  | 619  |         cli_jsonint(pdfobj, "XFACount", pdf->stats.nxfa);  | 
4953  | 441k  |     if (pdf->flags & (1 << BAD_PDF_VERSION))  | 
4954  | 304k  |         cli_jsonbool(pdfobj, "BadVersion", 1);  | 
4955  | 441k  |     if (pdf->flags & (1 << BAD_PDF_HEADERPOS))  | 
4956  | 414k  |         cli_jsonbool(pdfobj, "BadHeaderPosition", 1);  | 
4957  | 441k  |     if (pdf->flags & (1 << BAD_PDF_TRAILER))  | 
4958  | 428k  |         cli_jsonbool(pdfobj, "BadTrailer", 1);  | 
4959  | 441k  |     if (pdf->flags & (1 << BAD_PDF_TOOMANYOBJS))  | 
4960  | 0  |         cli_jsonbool(pdfobj, "TooManyObjects", 1);  | 
4961  | 441k  |     if (pdf->flags & (1 << ENCRYPTED_PDF)) { | 
4962  | 53.3k  |         cli_jsonbool(pdfobj, "Encrypted", 1);  | 
4963  | 53.3k  |         if (pdf->flags & (1 << DECRYPTABLE_PDF))  | 
4964  | 3.91k  |             cli_jsonbool(pdfobj, "Decryptable", 1);  | 
4965  | 49.3k  |         else  | 
4966  | 49.3k  |             cli_jsonbool(pdfobj, "Decryptable", 0);  | 
4967  | 53.3k  |     }  | 
4968  |  |  | 
4969  | 2.03M  |     for (i = 0; i < pdf->nobjs; i++) { | 
4970  | 1.59M  |         if (pdf->objs[i]->flags & (1 << OBJ_TRUNCATED)) { | 
4971  | 352k  |             json_object *truncobj;  | 
4972  |  |  | 
4973  | 352k  |             truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");  | 
4974  | 352k  |             if (!(truncobj))  | 
4975  | 0  |                 continue;  | 
4976  |  |  | 
4977  | 352k  |             cli_jsonint_array(truncobj, pdf->objs[i]->id >> 8);  | 
4978  | 352k  |         }  | 
4979  | 1.59M  |     }  | 
4980  |  |  | 
4981  | 441k  | cleanup:  | 
4982  | 441k  |     pdf_free_stats(pdf);  | 
4983  | 441k  | }  |