/src/poppler/poppler/Parser.cc

Source
//========================================================================
//
// Parser.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================

//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2006, 2009, 201, 2010, 2013, 2014, 2017-2020, 2025, 2026 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
// Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018, 2019 Adam Reichold <adam.reichold@t-online.de>
// Copyright (C) 2018 Marek Kasik <mkasik@redhat.com>
// Copyright (C) 2024 Nelson Benítez León <nbenitezl@gmail.com>
// Copyright (C) 2024-2026 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
// Copyright (C) 2025 Arnav V <arnav0872@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

#include <config.h>

#include <climits>
#include "Object.h"
#include "Array.h"
#include "Dict.h"
#include "Decrypt.h"
#include "Parser.h"
#include "XRef.h"
#include "Error.h"

// Max number of nested objects.  This is used to catch infinite loops
// in the object structure. And also technically valid files with
// lots of nested arrays that made us consume all the stack
constexpr int recursionLimit = 500;

Parser::Parser(XRef *xrefA, std::unique_ptr<Stream> &&streamA, bool allowStreamsA) : lexer { xrefA, std::move(streamA) }
{
    allowStreams = allowStreamsA;
    buf1 = lexer.getObj();
    buf2 = lexer.getObj();
    inlineImg = 0;
}

Parser::Parser(XRef *xrefA, Object *objectA, bool allowStreamsA) : lexer { xrefA, objectA }
{
    allowStreams = allowStreamsA;
    buf1 = lexer.getObj();
    buf2 = lexer.getObj();
    inlineImg = 0;
}

Parser::~Parser() = default;

Object Parser::getObj(int recursion)
{
    return getObj(false, nullptr, cryptRC4, 0, 0, 0, recursion);
}

static std::unique_ptr<GooString> decryptedString(const std::string &s, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
{
    DecryptStream decrypt(std::make_unique<MemStream>(s.c_str(), 0, s.size(), Object::null()), fileKey, encAlgorithm, keyLength, { .num = objNum, .gen = objGen });
    if (!decrypt.rewind()) {
        return {};
    }
    std::unique_ptr<GooString> res = std::make_unique<GooString>();
    int c;
    while ((c = decrypt.getChar()) != EOF) {
        res->push_back((char)c);
    }
    return res;
}

Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict, bool decryptString)
{
    Object obj;

    // refill buffer after inline image data
    if (inlineImg == 2) {
        buf1 = lexer.getObj();
        buf2 = lexer.getObj();
        inlineImg = 0;
    }

    if (unlikely(recursion >= recursionLimit)) {
        return Object::error();
    }

    // array
    if (!simpleOnly && buf1.isCmd("[")) {
        shift();
        obj = Object(std::make_unique<Array>(lexer.getXRef()));
        while (!buf1.isCmd("]") && !buf1.isEOF() && recursion + 1 < recursionLimit) {
            Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1);
            obj.arrayAdd(std::move(obj2));
        }
        if (recursion + 1 >= recursionLimit && strict) {
            goto err;
        }
        if (buf1.isEOF()) {
            error(errSyntaxError, getPos(), "End of file inside array");
            if (strict) {
                goto err;
            }
        }
        shift();

        // dictionary or stream
    } else if (!simpleOnly && buf1.isCmd("<<")) {
        shift(objNum);
        obj = Object(std::make_unique<Dict>(lexer.getXRef()));
        bool hasContentsEntry = false;
        while (!buf1.isCmd(">>") && !buf1.isEOF()) {
            if (!buf1.isName()) {
                error(errSyntaxError, getPos(), "Dictionary key must be a name object");
                if (strict) {
                    goto err;
                }
                shift();
            } else {
                // buf1 will go away in shift(), so keep the key
                const auto key = std::move(buf1);
                shift();
                if (buf1.isEOF() || buf1.isError()) {
                    if (strict && buf1.isError()) {
                        goto err;
                    }
                    break;
                }
                // We don't decrypt strings that are the value of "Contents" key entries. We decrypt them if needed a few lines below.
                // The "Contents" field of Sig dictionaries is not encrypted, but we can't know the type of the dictionary here yet
                // so we don't decrypt any Contents and if later we find it's not a Sig dictionary we decrypt it
                const bool isContents = !hasContentsEntry && key.isName("Contents");
                hasContentsEntry = hasContentsEntry || isContents;
                Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1, /*strict*/ false, /*decryptString*/ !isContents);
                if (unlikely(recursion + 1 >= recursionLimit)) {
                    break;
                }
                obj.dictAdd(key.getName(), std::move(obj2));
            }
        }
        if (buf1.isEOF()) {
            error(errSyntaxError, getPos(), "End of file inside dictionary");
            if (strict) {
                goto err;
            }
        }
        if (fileKey && hasContentsEntry) {
            Dict *dict = obj.getDict();
            const bool isSigDict = dict->is("Sig");
            if (!isSigDict) {
                const Object &contentsObj = dict->lookupNF("Contents");
                if (contentsObj.isString()) {
                    std::unique_ptr<GooString> s = decryptedString(contentsObj.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
                    dict->set("Contents", Object(std::move(s)));
                }
            }
        }
        // stream objects are not allowed inside content streams or
        // object streams
        if (buf2.isCmd("stream")) {
            if (allowStreams) {
                if (auto str = makeStream(std::move(obj), fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1, strict)) {
                    return Object(std::move(str));
                }
            }
            return Object::error();
        }
        shift();

        // indirect reference or integer
    } else if (buf1.isInt()) {
        const int num = buf1.getInt();
        shift();
        if (buf1.isInt() && buf2.isCmd("R")) {
            const int gen = buf1.getInt();
            shift();
            shift();

            if (unlikely(num <= 0 || gen < 0)) {
                return Object();
            }

            Ref r;
            r.num = num;
            r.gen = gen;
            return Object(r);
        }
        return Object(num);

        // string
    } else if (decryptString && buf1.isString() && fileKey) {
        std::unique_ptr<GooString> s2 = decryptedString(buf1.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
        obj = Object(std::move(s2));
        shift();

        // simple object
    } else {
        // avoid re-allocating memory for complex objects like strings by
        // shallow copy of <buf1> to <obj> and nulling <buf1> so that
        // subsequent buf1.free() won't free this memory
        obj = std::move(buf1);
        shift();
    }

    return obj;

err:
    return Object::error();
}

std::unique_ptr<Stream> Parser::makeStream(Object &&dict, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict)
{
    BaseStream *baseStr;
    Goffset length;
    Goffset pos, endPos;

    if (XRef *xref = lexer.getXRef()) {
        XRefEntry *entry = xref->getEntry(objNum, false);
        if (entry) {
            if (!entry->getFlag(XRefEntry::Parsing) || (objNum == 0 && objGen == 0)) {
                entry->setFlag(XRefEntry::Parsing, true);
            } else {
                error(errSyntaxError, getPos(), "Object '{0:d} {1:d} obj' is being already parsed", objNum, objGen);
                return nullptr;
            }
        }
    }

    // get stream start position
    lexer.skipToNextLine();
    Stream *lexerStream;
    if (!(lexerStream = lexer.getStream())) {
        return nullptr;
    }
    pos = lexerStream->getPos();

    // get length
    Object obj = dict.dictLookup("Length", recursion);
    if (obj.isInt()) {
        length = obj.getInt();
    } else if (obj.isInt64()) {
        length = obj.getInt64();
    } else {
        error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
        if (strict) {
            return nullptr;
        }
        length = 0;
    }

    // check for length in damaged file
    if (lexer.hasXRef() && lexer.getXRef()->getStreamEnd(pos, &endPos)) {
        length = endPos - pos;
    }

    // in badly damaged PDF files, we can run off the end of the input
    // stream immediately after the "stream" token
    if (!lexer.getStream()) {
        return nullptr;
    }
    baseStr = lexer.getStream()->getBaseStream();

    // skip over stream data
    if (Lexer::LOOK_VALUE_NOT_CACHED != lexer.lookCharLastValueCached) {
        // take into account the fact that we've cached one value
        pos = pos - 1;
        lexer.lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
    }
    if (unlikely(length < 0)) {
        return nullptr;
    }
    if (unlikely(pos > LLONG_MAX - length)) {
        return nullptr;
    }
    lexer.setPos(pos + length);

    // refill token buffers and check for 'endstream'
    shift(); // kill '>>'
    shift("endstream", objNum); // kill 'stream'
    if (buf1.isCmd("endstream")) {
        shift();
    } else {
        error(errSyntaxError, getPos(), "Missing 'endstream' or incorrect stream length");
        if (strict) {
            return nullptr;
        }
        if (lexer.hasXRef() && lexer.getStream()) {
            // shift until we find the proper endstream or we change to another object or reach eof
            length = lexer.getPos() - pos;
            if (buf1.isCmd("endstream")) {
                dict.dictSet("Length", Object(length));
            }
        } else {
            // When building the xref we can't use it so use this
            // kludge for broken PDF files: just add 5k to the length, and
            // hope its enough
            if (length < LLONG_MAX - pos - 5000) {
                length += 5000;
            }
        }
    }

    // make base stream
    auto str = baseStr->makeSubStream(pos, true, length, std::move(dict));

    // handle decryption
    if (fileKey) {
        str = std::make_unique<DecryptStream>(std::move(str), fileKey, encAlgorithm, keyLength, Ref { .num = objNum, .gen = objGen });
    }

    // get filters
    Dict *streamDict = str->getDict();
    str = Stream::addFilters(std::move(str), streamDict, recursion);

    if (XRef *xref = lexer.getXRef()) {
        // Don't try to reuse the entry from the block at the start
        // of the function, xref can change in the middle because of
        // reconstruction
        XRefEntry *entry = xref->getEntry(objNum, false);
        if (entry) {
            entry->setFlag(XRefEntry::Parsing, false);
        }
    }

    return str;
}

void Parser::shift(int objNum)
{
    if (inlineImg > 0) {
        if (inlineImg < 2) {
            ++inlineImg;
        } else {
            // in a damaged content stream, if 'ID' shows up in the middle
            // of a dictionary, we need to reset
            inlineImg = 0;
        }
    } else if (buf2.isCmd("ID")) {
        lexer.skipChar(); // skip char after 'ID' command
        inlineImg = 1;
    }
    buf1 = std::move(buf2);
    if (inlineImg > 0) { // don't buffer inline image data
        buf2.setToNull();
    } else {
        buf2 = lexer.getObj(objNum);
    }
}

void Parser::shift(const char *cmdA, int objNum)
{
    if (inlineImg > 0) {
        if (inlineImg < 2) {
            ++inlineImg;
        } else {
            // in a damaged content stream, if 'ID' shows up in the middle
            // of a dictionary, we need to reset
            inlineImg = 0;
        }
    } else if (buf2.isCmd("ID")) {
        lexer.skipChar(); // skip char after 'ID' command
        inlineImg = 1;
    }
    buf1 = std::move(buf2);
    if (inlineImg > 0) {
        buf2.setToNull();
    } else if (buf1.isCmd(cmdA)) {
        buf2 = lexer.getObj(objNum);
    } else {
        buf2 = lexer.getObj(cmdA, objNum);
    }
}

Coverage Report

Created: 2026-03-31 07:41

Line	Count	Source
1		//========================================================================
2		//
3		// Parser.cc
4		//
5		// Copyright 1996-2003 Glyph & Cog, LLC
6		//
7		//========================================================================
8
9		//========================================================================
10		//
11		// Modified under the Poppler project - http://poppler.freedesktop.org
12		//
13		// All changes made under the Poppler project to this file are licensed
14		// under GPL version 2 or later
15		//
16		// Copyright (C) 2006, 2009, 201, 2010, 2013, 2014, 2017-2020, 2025, 2026 Albert Astals Cid <aacid@kde.org>
17		// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18		// Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
19		// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
20		// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
21		// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
22		// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
23		// Copyright (C) 2018, 2019 Adam Reichold <adam.reichold@t-online.de>
24		// Copyright (C) 2018 Marek Kasik <mkasik@redhat.com>
25		// Copyright (C) 2024 Nelson Benítez León <nbenitezl@gmail.com>
26		// Copyright (C) 2024-2026 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
27		// Copyright (C) 2025 Arnav V <arnav0872@gmail.com>
28		//
29		// To see a description of the changes please see the Changelog file that
30		// came with your tarball or type make ChangeLog if you are building from git
31		//
32		//========================================================================
33
34		#include <config.h>
35
36		#include <climits>
37		#include "Object.h"
38		#include "Array.h"
39		#include "Dict.h"
40		#include "Decrypt.h"
41		#include "Parser.h"
42		#include "XRef.h"
43		#include "Error.h"
44
45		// Max number of nested objects. This is used to catch infinite loops
46		// in the object structure. And also technically valid files with
47		// lots of nested arrays that made us consume all the stack
48		constexpr int recursionLimit = 500;
49
50	23.9M	Parser::Parser(XRef *xrefA, std::unique_ptr<Stream> &&streamA, bool allowStreamsA) : lexer { xrefA, std::move(streamA) }
51	23.9M	{
52	23.9M	allowStreams = allowStreamsA;
53	23.9M	buf1 = lexer.getObj();
54	23.9M	buf2 = lexer.getObj();
55	23.9M	inlineImg = 0;
56	23.9M	}
57
58	1.66M	Parser::Parser(XRef xrefA, Object objectA, bool allowStreamsA) : lexer { xrefA, objectA }
59	1.66M	{
60	1.66M	allowStreams = allowStreamsA;
61	1.66M	buf1 = lexer.getObj();
62	1.66M	buf2 = lexer.getObj();
63	1.66M	inlineImg = 0;
64	1.66M	}
65
66	25.6M	Parser::~Parser() = default;
67
68		Object Parser::getObj(int recursion)
69	39.6M	{
70	39.6M	return getObj(false, nullptr, cryptRC4, 0, 0, 0, recursion);
71	39.6M	}
72
73		static std::unique_ptr<GooString> decryptedString(const std::string &s, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
74	231k	{
75	231k	DecryptStream decrypt(std::make_unique<MemStream>(s.c_str(), 0, s.size(), Object::null()), fileKey, encAlgorithm, keyLength, { .num = objNum, .gen = objGen });
76	231k	if (!decrypt.rewind()) {
77	0	return {};
78	0	}
79	231k	std::unique_ptr<GooString> res = std::make_unique<GooString>();
80	231k	int c;
81	19.3M	while ((c = decrypt.getChar()) != EOF) {
82	19.1M	res->push_back((char)c);
83	19.1M	}
84	231k	return res;
85	231k	}
86
87		Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict, bool decryptString)
88	907M	{
89	907M	Object obj;
90
91		// refill buffer after inline image data
92	907M	if (inlineImg == 2) {
93	453k	buf1 = lexer.getObj();
94	453k	buf2 = lexer.getObj();
95	453k	inlineImg = 0;
96	453k	}
97
98	907M	if (unlikely(recursion >= recursionLimit)) {
99	5.92k	return Object::error();
100	5.92k	}
101
102		// array
103	907M	if (!simpleOnly && buf1.isCmd("[")) {
104	24.8M	shift();
105	24.8M	obj = Object(std::make_unique<Array>(lexer.getXRef()));
106	330M	while (!buf1.isCmd("]") && !buf1.isEOF() && recursion + 1 < recursionLimit) {
107	305M	Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1);
108	305M	obj.arrayAdd(std::move(obj2));
109	305M	}
110	24.8M	if (recursion + 1 >= recursionLimit && strict) {
111	0	goto err;
112	0	}
113	24.8M	if (buf1.isEOF()) {
114	1.68M	error(errSyntaxError, getPos(), "End of file inside array");
115	1.68M	if (strict) {
116	1	goto err;
117	1	}
118	1.68M	}
119	24.8M	shift();
120
121		// dictionary or stream
122	882M	} else if (!simpleOnly && buf1.isCmd("<<")) {
123	26.6M	shift(objNum);
124	26.6M	obj = Object(std::make_unique<Dict>(lexer.getXRef()));
125	26.6M	bool hasContentsEntry = false;
126	332M	while (!buf1.isCmd(">>") && !buf1.isEOF()) {
127	306M	if (!buf1.isName()) {
128	111M	error(errSyntaxError, getPos(), "Dictionary key must be a name object");
129	111M	if (strict) {
130	48	goto err;
131	48	}
132	111M	shift();
133	194M	} else {
134		// buf1 will go away in shift(), so keep the key
135	194M	const auto key = std::move(buf1);
136	194M	shift();
137	194M	if (buf1.isEOF() \|\| buf1.isError()) {
138	710k	if (strict && buf1.isError()) {
139	8	goto err;
140	8	}
141	710k	break;
142	710k	}
143		// We don't decrypt strings that are the value of "Contents" key entries. We decrypt them if needed a few lines below.
144		// The "Contents" field of Sig dictionaries is not encrypted, but we can't know the type of the dictionary here yet
145		// so we don't decrypt any Contents and if later we find it's not a Sig dictionary we decrypt it
146	193M	const bool isContents = !hasContentsEntry && key.isName("Contents");
147	193M	hasContentsEntry = hasContentsEntry \|\| isContents;
148	193M	Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1, /strict/ false, /decryptString/ !isContents);
149	193M	if (unlikely(recursion + 1 >= recursionLimit)) {
150	5.92k	break;
151	5.92k	}
152	193M	obj.dictAdd(key.getName(), std::move(obj2));
153	193M	}
154	306M	}
155	26.6M	if (buf1.isEOF()) {
156	1.22M	error(errSyntaxError, getPos(), "End of file inside dictionary");
157	1.22M	if (strict) {
158	16	goto err;
159	16	}
160	1.22M	}
161	26.6M	if (fileKey && hasContentsEntry) {
162	9.57k	Dict *dict = obj.getDict();
163	9.57k	const bool isSigDict = dict->is("Sig");
164	9.57k	if (!isSigDict) {
165	8.97k	const Object &contentsObj = dict->lookupNF("Contents");
166	8.97k	if (contentsObj.isString()) {
167	4.79k	std::unique_ptr<GooString> s = decryptedString(contentsObj.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
168	4.79k	dict->set("Contents", Object(std::move(s)));
169	4.79k	}
170	8.97k	}
171	9.57k	}
172		// stream objects are not allowed inside content streams or
173		// object streams
174	26.6M	if (buf2.isCmd("stream")) {
175	8.04M	if (allowStreams) {
176	7.80M	if (auto str = makeStream(std::move(obj), fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1, strict)) {
177	7.78M	return Object(std::move(str));
178	7.78M	}
179	7.80M	}
180	259k	return Object::error();
181	8.04M	}
182	18.5M	shift();
183
184		// indirect reference or integer
185	855M	} else if (buf1.isInt()) {
186	343M	const int num = buf1.getInt();
187	343M	shift();
188	343M	if (buf1.isInt() && buf2.isCmd("R")) {
189	46.3M	const int gen = buf1.getInt();
190	46.3M	shift();
191	46.3M	shift();
192
193	46.3M	if (unlikely(num <= 0 \|\| gen < 0)) {
194	887k	return Object();
195	887k	}
196
197	45.4M	Ref r;
198	45.4M	r.num = num;
199	45.4M	r.gen = gen;
200	45.4M	return Object(r);
201	46.3M	}
202	297M	return Object(num);
203
204		// string
205	512M	} else if (decryptString && buf1.isString() && fileKey) {
206	226k	std::unique_ptr<GooString> s2 = decryptedString(buf1.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
207	226k	obj = Object(std::move(s2));
208	226k	shift();
209
210		// simple object
211	511M	} else {
212		// avoid re-allocating memory for complex objects like strings by
213		// shallow copy of <buf1> to <obj> and nulling <buf1> so that
214		// subsequent buf1.free() won't free this memory
215	511M	obj = std::move(buf1);
216	511M	shift();
217	511M	}
218
219	555M	return obj;
220
221	73	err:
222	73	return Object::error();
223	907M	}
224
225		std::unique_ptr<Stream> Parser::makeStream(Object &&dict, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict)
226	7.80M	{
227	7.80M	BaseStream *baseStr;
228	7.80M	Goffset length;
229	7.80M	Goffset pos, endPos;
230
231	7.80M	if (XRef *xref = lexer.getXRef()) {
232	7.43M	XRefEntry *entry = xref->getEntry(objNum, false);
233	7.43M	if (entry) {
234	7.43M	if (!entry->getFlag(XRefEntry::Parsing) \|\| (objNum == 0 && objGen == 0)) {
235	7.42M	entry->setFlag(XRefEntry::Parsing, true);
236	7.42M	} else {
237	8.07k	error(errSyntaxError, getPos(), "Object '{0:d} {1:d} obj' is being already parsed", objNum, objGen);
238	8.07k	return nullptr;
239	8.07k	}
240	7.43M	}
241	7.43M	}
242
243		// get stream start position
244	7.79M	lexer.skipToNextLine();
245	7.79M	Stream *lexerStream;
246	7.79M	if (!(lexerStream = lexer.getStream())) {
247	889	return nullptr;
248	889	}
249	7.79M	pos = lexerStream->getPos();
250
251		// get length
252	7.79M	Object obj = dict.dictLookup("Length", recursion);
253	7.79M	if (obj.isInt()) {
254	6.83M	length = obj.getInt();
255	6.83M	} else if (obj.isInt64()) {
256	3.20k	length = obj.getInt64();
257	951k	} else {
258	951k	error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
259	951k	if (strict) {
260	19	return nullptr;
261	19	}
262	951k	length = 0;
263	951k	}
264
265		// check for length in damaged file
266	7.79M	if (lexer.hasXRef() && lexer.getXRef()->getStreamEnd(pos, &endPos)) {
267	5.88M	length = endPos - pos;
268	5.88M	}
269
270		// in badly damaged PDF files, we can run off the end of the input
271		// stream immediately after the "stream" token
272	7.79M	if (!lexer.getStream()) {
273	0	return nullptr;
274	0	}
275	7.79M	baseStr = lexer.getStream()->getBaseStream();
276
277		// skip over stream data
278	7.79M	if (Lexer::LOOK_VALUE_NOT_CACHED != lexer.lookCharLastValueCached) {
279		// take into account the fact that we've cached one value
280	195k	pos = pos - 1;
281	195k	lexer.lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
282	195k	}
283	7.79M	if (unlikely(length < 0)) {
284	1.39k	return nullptr;
285	1.39k	}
286	7.78M	if (unlikely(pos > LLONG_MAX - length)) {
287	183	return nullptr;
288	183	}
289	7.78M	lexer.setPos(pos + length);
290
291		// refill token buffers and check for 'endstream'
292	7.78M	shift(); // kill '>>'
293	7.78M	shift("endstream", objNum); // kill 'stream'
294	7.78M	if (buf1.isCmd("endstream")) {
295	6.65M	shift();
296	6.65M	} else {
297	1.12M	error(errSyntaxError, getPos(), "Missing 'endstream' or incorrect stream length");
298	1.12M	if (strict) {
299	67	return nullptr;
300	67	}
301	1.12M	if (lexer.hasXRef() && lexer.getStream()) {
302		// shift until we find the proper endstream or we change to another object or reach eof
303	425k	length = lexer.getPos() - pos;
304	425k	if (buf1.isCmd("endstream")) {
305	0	dict.dictSet("Length", Object(length));
306	0	}
307	704k	} else {
308		// When building the xref we can't use it so use this
309		// kludge for broken PDF files: just add 5k to the length, and
310		// hope its enough
311	704k	if (length < LLONG_MAX - pos - 5000) {
312	703k	length += 5000;
313	703k	}
314	704k	}
315	1.12M	}
316
317		// make base stream
318	7.78M	auto str = baseStr->makeSubStream(pos, true, length, std::move(dict));
319
320		// handle decryption
321	7.78M	if (fileKey) {
322	69.4k	str = std::make_unique<DecryptStream>(std::move(str), fileKey, encAlgorithm, keyLength, Ref { .num = objNum, .gen = objGen });
323	69.4k	}
324
325		// get filters
326	7.78M	Dict *streamDict = str->getDict();
327	7.78M	str = Stream::addFilters(std::move(str), streamDict, recursion);
328
329	7.78M	if (XRef *xref = lexer.getXRef()) {
330		// Don't try to reuse the entry from the block at the start
331		// of the function, xref can change in the middle because of
332		// reconstruction
333	7.42M	XRefEntry *entry = xref->getEntry(objNum, false);
334	7.42M	if (entry) {
335	7.42M	entry->setFlag(XRefEntry::Parsing, false);
336	7.42M	}
337	7.42M	}
338
339	7.78M	return str;
340	7.78M	}
341
342		void Parser::shift(int objNum)
343	1.36G	{
344	1.36G	if (inlineImg > 0) {
345	520k	if (inlineImg < 2) {
346	487k	++inlineImg;
347	487k	} else {
348		// in a damaged content stream, if 'ID' shows up in the middle
349		// of a dictionary, we need to reset
350	33.4k	inlineImg = 0;
351	33.4k	}
352	1.36G	} else if (buf2.isCmd("ID")) {
353	487k	lexer.skipChar(); // skip char after 'ID' command
354	487k	inlineImg = 1;
355	487k	}
356	1.36G	buf1 = std::move(buf2);
357	1.36G	if (inlineImg > 0) { // don't buffer inline image data
358	974k	buf2.setToNull();
359	1.36G	} else {
360	1.36G	buf2 = lexer.getObj(objNum);
361	1.36G	}
362	1.36G	}
363
364		void Parser::shift(const char *cmdA, int objNum)
365	7.78M	{
366	7.78M	if (inlineImg > 0) {
367	0	if (inlineImg < 2) {
368	0	++inlineImg;
369	0	} else {
370		// in a damaged content stream, if 'ID' shows up in the middle
371		// of a dictionary, we need to reset
372	0	inlineImg = 0;
373	0	}
374	7.78M	} else if (buf2.isCmd("ID")) {
375	496	lexer.skipChar(); // skip char after 'ID' command
376	496	inlineImg = 1;
377	496	}
378	7.78M	buf1 = std::move(buf2);
379	7.78M	if (inlineImg > 0) {
380	496	buf2.setToNull();
381	7.78M	} else if (buf1.isCmd(cmdA)) {
382	6.65M	buf2 = lexer.getObj(objNum);
383	6.65M	} else {
384	1.12M	buf2 = lexer.getObj(cmdA, objNum);
385	1.12M	}
386	7.78M	}