Coverage Report

Created: 2026-03-31 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xpdf-4.06/xpdf/Parser.cc
Line
Count
Source
1
//========================================================================
2
//
3
// Parser.cc
4
//
5
// Copyright 1996-2003 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
11
#include <stddef.h>
12
#include <string.h>
13
#include "gmempp.h"
14
#include "Object.h"
15
#include "Array.h"
16
#include "Dict.h"
17
#include "Decrypt.h"
18
#include "Parser.h"
19
#include "XRef.h"
20
#include "Error.h"
21
22
562k
Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
23
562k
  xref = xrefA;
24
562k
  lexer = lexerA;
25
562k
  inlineImg = 0;
26
562k
  allowStreams = allowStreamsA;
27
562k
  lexer->getObj(&buf1);
28
562k
  lexer->getObj(&buf2);
29
562k
}
30
31
562k
Parser::~Parser() {
32
562k
  buf1.free();
33
562k
  buf2.free();
34
562k
  delete lexer;
35
562k
}
36
37
Object *Parser::getObj(Object *obj, GBool simpleOnly,
38
           Guchar *fileKey,
39
           CryptAlgorithm encAlgorithm, int keyLength,
40
248M
           int objNum, int objGen, int recursion) {
41
248M
  char *key;
42
248M
  Stream *str;
43
248M
  Object obj2;
44
248M
  int num;
45
248M
  DecryptStream *decrypt;
46
248M
  GString *s, *s2;
47
248M
  int c;
48
49
  // refill buffer after inline image data
50
248M
  if (inlineImg == 2) {
51
111k
    buf1.free();
52
111k
    buf2.free();
53
111k
    lexer->getObj(&buf1);
54
111k
    lexer->getObj(&buf2);
55
111k
    inlineImg = 0;
56
111k
  }
57
58
  // array
59
248M
  if (!simpleOnly && recursion < objectRecursionLimit && buf1.isCmd("[")) {
60
1.49M
    shift();
61
1.49M
    obj->initArray(xref);
62
34.0M
    while (!buf1.isCmd("]") && !buf1.isEOF())
63
32.5M
      obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
64
32.5M
         objNum, objGen, recursion + 1));
65
1.49M
    if (buf1.isEOF())
66
1.11M
      error(errSyntaxError, getPos(), "End of file inside array");
67
1.49M
    shift();
68
69
  // dictionary or stream
70
246M
  } else if (!simpleOnly && recursion < objectRecursionLimit &&
71
57.3M
       buf1.isCmd("<<")) {
72
599k
    shift();
73
599k
    obj->initDict(xref);
74
6.57M
    while (!buf1.isCmd(">>") && !buf1.isEOF()) {
75
5.98M
      if (!buf1.isName()) {
76
3.39M
  error(errSyntaxError, getPos(),
77
3.39M
        "Dictionary key must be a name object");
78
3.39M
  shift();
79
3.39M
      } else {
80
2.58M
  key = copyString(buf1.getName());
81
2.58M
  shift();
82
2.58M
  if (buf1.isEOF() || buf1.isError()) {
83
11.9k
    gfree(key);
84
11.9k
    break;
85
11.9k
  }
86
2.57M
  obj->dictAdd(key, getObj(&obj2, gFalse,
87
2.57M
         fileKey, encAlgorithm, keyLength,
88
2.57M
         objNum, objGen, recursion + 1));
89
2.57M
      }
90
5.98M
    }
91
599k
    if (buf1.isEOF())
92
41.2k
      error(errSyntaxError, getPos(), "End of file inside dictionary");
93
    // stream objects are not allowed inside content streams or
94
    // object streams
95
599k
    if (allowStreams && buf2.isCmd("stream")) {
96
161k
      if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
97
161k
          objNum, objGen, recursion + 1))) {
98
153k
  obj->initStream(str);
99
153k
      } else {
100
8.86k
  obj->free();
101
8.86k
  obj->initError();
102
8.86k
      }
103
437k
    } else {
104
437k
      shift();
105
437k
    }
106
107
  // indirect reference or integer
108
246M
  } else if (buf1.isInt()) {
109
28.9M
    num = buf1.getInt();
110
28.9M
    shift();
111
28.9M
    if (buf1.isInt() && buf2.isCmd("R")) {
112
868k
      int gen = buf1.getInt();
113
868k
      if (num >= 0 && gen >= 0) {
114
866k
  obj->initRef(num, gen);
115
866k
      } else {
116
1.82k
  error(errSyntaxError, getPos(),
117
1.82k
        "Negative number or generation in indirect reference");
118
1.82k
  obj->initError();
119
1.82k
      }
120
868k
      shift();
121
868k
      shift();
122
28.1M
    } else {
123
28.1M
      obj->initInt(num);
124
28.1M
    }
125
126
  // string
127
217M
  } else if (buf1.isString() && fileKey) {
128
13.5k
    s = buf1.getString();
129
13.5k
    s2 = new GString();
130
13.5k
    obj2.initNull();
131
13.5k
    decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
132
13.5k
                s->getLength(), &obj2),
133
13.5k
        fileKey, encAlgorithm, keyLength,
134
13.5k
        objNum, objGen);
135
13.5k
    decrypt->reset();
136
340k
    while ((c = decrypt->getChar()) != EOF) {
137
326k
      s2->append((char)c);
138
326k
    }
139
13.5k
    delete decrypt;
140
13.5k
    obj->initString(s2);
141
13.5k
    shift();
142
143
  // simple object
144
217M
  } else {
145
217M
    buf1.copy(obj);
146
217M
    shift();
147
217M
  }
148
149
248M
  return obj;
150
248M
}
151
152
Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
153
         CryptAlgorithm encAlgorithm, int keyLength,
154
161k
         int objNum, int objGen, int recursion) {
155
  // get stream start position
156
161k
  lexer->skipToNextLine();
157
161k
  Stream *curStr = lexer->getStream();
158
161k
  if (!curStr) {
159
904
    return NULL;
160
904
  }
161
161k
  GFileOffset pos = curStr->getPos();
162
163
161k
  GBool haveLength = gFalse;
164
161k
  GFileOffset length = 0;
165
161k
  GFileOffset endPos;
166
167
  // check for length in damaged file
168
161k
  if (xref && xref->getStreamEnd(pos, &endPos)) {
169
122k
    length = endPos - pos;
170
122k
    haveLength = gTrue;
171
172
  // get length from the stream object
173
122k
  } else {
174
38.7k
    Object obj;
175
38.7k
    dict->dictLookup("Length", &obj, recursion);
176
38.7k
    if (obj.isInt()) {
177
28.1k
      length = (GFileOffset)(Guint)obj.getInt();
178
28.1k
      haveLength = gTrue;
179
28.1k
    } else {
180
10.6k
      error(errSyntaxError, getPos(),
181
10.6k
      "Missing or invalid 'Length' attribute in stream");
182
10.6k
    }
183
38.7k
    obj.free();
184
38.7k
  }
185
186
  // in badly damaged PDF files, we can run off the end of the input
187
  // stream immediately after the "stream" token
188
161k
  if (!lexer->getStream()) {
189
0
    return NULL;
190
0
  }
191
192
  // copy the base stream (Lexer will free stream objects when it gets
193
  // to end of stream -- which can happen in the shift() calls below)
194
161k
  BaseStream *baseStr =
195
161k
      (BaseStream *)lexer->getStream()->getBaseStream()->copy();
196
197
  // 'Length' attribute is missing -- search for 'endstream'
198
161k
  if (!haveLength) {
199
10.6k
    GBool foundEndstream = gFalse;
200
10.6k
    char endstreamBuf[8];
201
10.6k
    if ((curStr = lexer->getStream())) {
202
10.6k
      int c;
203
5.16M
      while ((c = curStr->getChar()) != EOF) {
204
5.15M
  if (c == 'e' &&
205
125k
      curStr->getBlock(endstreamBuf, 8) == 8 &&
206
123k
      !memcmp(endstreamBuf, "ndstream", 8)) {
207
2.66k
    length = curStr->getPos() - 9 - pos;
208
2.66k
    foundEndstream = gTrue;
209
2.66k
    break;
210
2.66k
  }
211
5.15M
      }
212
10.6k
    }
213
10.6k
    if (!foundEndstream) {
214
7.96k
      error(errSyntaxError, getPos(), "Couldn't find 'endstream' for stream");
215
7.96k
      delete baseStr;
216
7.96k
      return NULL;
217
7.96k
    }
218
10.6k
  }
219
220
  // make new base stream
221
153k
  Stream *str = baseStr->makeSubStream(pos, gTrue, length, dict);
222
223
  // look for the 'endstream' marker
224
153k
  if (haveLength) {
225
    // skip over stream data
226
150k
    lexer->setPos(pos + length);
227
228
    // check for 'endstream'
229
    // NB: we never reuse the Parser object to parse objects after a
230
    // stream, and we could (if the PDF file is damaged) be in the
231
    // middle of binary data at this point, so we check the stream
232
    // data directly for 'endstream', rather than calling shift() to
233
    // parse objects
234
150k
    GBool foundEndstream = gFalse;
235
150k
    char endstreamBuf[8];
236
150k
    if ((curStr = lexer->getStream())) {
237
      // skip up to 100 whitespace chars
238
150k
      int c;
239
163k
      for (int i = 0; i < 100; ++i) {
240
163k
  c = curStr->getChar();
241
163k
  if (!Lexer::isSpace(c)) {
242
150k
    break;
243
150k
  }
244
163k
      }
245
150k
      if (c == 'e') {
246
125k
  if (curStr->getBlock(endstreamBuf, 8) == 8 &&
247
125k
      !memcmp(endstreamBuf, "ndstream", 8)) {
248
124k
    foundEndstream = gTrue;
249
124k
  }
250
125k
      }
251
150k
    }
252
150k
    if (!foundEndstream) {
253
25.9k
      error(errSyntaxError, getPos(), "Missing 'endstream'");
254
      // kludge for broken PDF files: just add 5k to the length, and
255
      // hope it's enough
256
      // (dict is now owned by str, so we need to copy it before deleting str)
257
25.9k
      Object obj;
258
25.9k
      dict->copy(&obj);
259
25.9k
      delete str;
260
25.9k
      length += 5000;
261
25.9k
      str = baseStr->makeSubStream(pos, gTrue, length, &obj);
262
25.9k
    }
263
150k
  }
264
265
  // free the copied base stream
266
153k
  delete baseStr;
267
268
  // handle decryption
269
153k
  if (fileKey) {
270
    // the 'Crypt' filter is used to mark unencrypted metadata streams
271
    //~ this should also check for an empty DecodeParams entry
272
1.75k
    GBool encrypted = gTrue;
273
1.75k
    Object obj;
274
1.75k
    dict->dictLookup("Filter", &obj, recursion);
275
1.75k
    if (obj.isName("Crypt")) {
276
1
      encrypted = gFalse;
277
1.75k
    } else if (obj.isArray() && obj.arrayGetLength() >= 1) {
278
18
      Object obj2;
279
18
      if (obj.arrayGet(0, &obj2)->isName("Crypt")) {
280
0
  encrypted = gFalse;
281
0
      }
282
18
      obj2.free();
283
18
    }
284
1.75k
    obj.free();
285
1.75k
    if (encrypted) {
286
1.75k
      str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
287
1.75k
            objNum, objGen);
288
1.75k
    }
289
1.75k
  }
290
291
  // get filters
292
153k
  str = str->addFilters(dict, recursion);
293
294
153k
  return str;
295
161k
}
296
297
257M
void Parser::shift() {
298
257M
  if (inlineImg > 0) {
299
146k
    if (inlineImg < 2) {
300
128k
      ++inlineImg;
301
128k
    } else {
302
      // in a damaged content stream, if 'ID' shows up in the middle
303
      // of a dictionary, we need to reset
304
17.6k
      inlineImg = 0;
305
17.6k
    }
306
257M
  } else if (buf2.isCmd("ID")) {
307
128k
    lexer->skipChar();    // skip char after 'ID' command
308
128k
    inlineImg = 1;
309
128k
  }
310
257M
  buf1.free();
311
257M
  buf1 = buf2;
312
257M
  if (inlineImg > 0)    // don't buffer inline image data
313
257k
    buf2.initNull();
314
257M
  else
315
257M
    lexer->getObj(&buf2);
316
257M
}