Coverage Report

Created: 2022-10-31 07:00

/src/ghostpdl/devices/vector/gdevpdfr.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2022 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
17
/* Named object pdfmark processing */
18
#include "memory_.h"
19
#include "gx.h"
20
#include "gserrors.h"
21
#include "gsutil.h"   /* for bytes_compare */
22
#include "gdevpdfx.h"
23
#include "gdevpdfo.h"
24
#include "scanchar.h"
25
#include "strimpl.h"
26
#include "sstring.h"
27
28
#ifndef gs_error_syntaxerror
29
#  define gs_error_syntaxerror gs_error_rangecheck
30
#endif
31
32
/* Test whether an object name has valid syntax, {name}. */
33
bool
34
pdf_objname_is_valid(const byte *data, uint size)
35
921
{
36
921
    return (size >= 2 && data[0] == '{' &&
37
921
            (const byte *)memchr(data, '}', size) == data + size - 1);
38
921
}
39
40
/*
41
 * Look up a named object.  Return_error(gs_error_rangecheck if the syntax is invalid.
42
 * If the object is missing, return gs_error_undefined.
43
 */
44
int
45
pdf_find_named(gx_device_pdf * pdev, const gs_param_string * pname,
46
               cos_object_t **ppco)
47
750
{
48
750
    const cos_value_t *pvalue;
49
50
750
    if (!pdf_objname_is_valid(pname->data, pname->size))
51
0
        return_error(gs_error_rangecheck);
52
750
    if ((pvalue = cos_dict_find(pdev->local_named_objects, pname->data,
53
750
                                pname->size)) != 0 ||
54
750
        (pvalue = cos_dict_find(pdev->global_named_objects, pname->data,
55
171
                                pname->size)) != 0
56
750
        ) {
57
579
        *ppco = pvalue->contents.object;
58
579
        return 0;
59
579
    }
60
750
    return_error(gs_error_undefined);
61
750
}
62
63
/*
64
 * Create a (local) named object.  id = -1L means do not assign an id.
65
 * pname = 0 means just create the object, do not name it.  Note that
66
 * during initialization, local_named_objects == global_named_objects.
67
 */
68
int
69
pdf_create_named(gx_device_pdf *pdev, const gs_param_string *pname,
70
                 cos_type_t cotype, cos_object_t **ppco, long id)
71
105k
{
72
105k
    cos_object_t *pco;
73
105k
    cos_value_t value;
74
75
105k
    *ppco = pco = cos_object_alloc(pdev, "pdf_create_named");
76
105k
    if (pco == 0)
77
0
        return_error(gs_error_VMerror);
78
105k
    pco->id =
79
105k
        (id == -1 ? 0L : id == 0 ? pdf_obj_ref(pdev) : id);
80
105k
    if (pname) {
81
32.3k
        int code = cos_dict_put(pdev->local_named_objects, pname->data,
82
32.3k
                                pname->size, cos_object_value(&value, pco));
83
84
32.3k
        if (code < 0)
85
0
            return code;
86
32.3k
    }
87
105k
    if (cotype != cos_type_generic)
88
105k
        cos_become(pco, cotype);
89
105k
    *ppco = pco;
90
105k
    return 0;
91
105k
}
92
int
93
pdf_create_named_dict(gx_device_pdf *pdev, const gs_param_string *pname,
94
                      cos_dict_t **ppcd, long id)
95
48.2k
{
96
48.2k
    cos_object_t *pco;
97
48.2k
    int code = pdf_create_named(pdev, pname, cos_type_dict, &pco, id);
98
99
48.2k
    *ppcd = (cos_dict_t *)pco;
100
48.2k
    return code;
101
48.2k
}
102
103
/*
104
 * Look up a named object as for pdf_find_named.  If the object does not
105
 * exist, create it (as a dictionary if it is one of the predefined names
106
 * {ThisPage}, {NextPage}, {PrevPage}, or {Page<#>}, otherwise as a
107
 * generic object) and return 1.
108
 */
109
int
110
pdf_refer_named(gx_device_pdf * pdev, const gs_param_string * pname_orig,
111
                cos_object_t **ppco)
112
750
{
113
750
    const gs_param_string *pname = pname_orig;
114
750
    int code = pdf_find_named(pdev, pname, ppco);
115
750
    char page_name_chars[6 + 10 + 2]; /* {Page<n>}, enough for an int */
116
750
    gs_param_string pnstr;
117
750
    int page_number;
118
119
750
    if (code != gs_error_undefined)
120
579
        return code;
121
    /*
122
     * Check for a predefined name.  Map ThisPage, PrevPage, and NextPage
123
     * to the appropriate Page<#> name.
124
     */
125
171
    if (pname->size >= 7 && pname->size < sizeof(page_name_chars)) {
126
171
        memcpy(page_name_chars, pname->data, pname->size);
127
171
        page_name_chars[pname->size] = 0;
128
171
        if (sscanf(page_name_chars, "{Page%d}", &page_number) == 1)
129
0
            goto cpage;
130
171
    }
131
171
    if (pdf_key_eq(pname, "{ThisPage}"))
132
0
        page_number = pdev->next_page + 1;
133
171
    else if (pdf_key_eq(pname, "{NextPage}"))
134
0
        page_number = pdev->next_page + 2;
135
171
    else if (pdf_key_eq(pname, "{PrevPage}"))
136
0
        page_number = pdev->next_page;
137
171
    else {
138
171
        code = pdf_create_named(pdev, pname, cos_type_generic, ppco, 0L);
139
171
        return (code < 0 ? code : 1);
140
171
    }
141
0
    if (page_number <= 0)
142
0
        return code;
143
0
    gs_snprintf(page_name_chars, sizeof(page_name_chars), "{Page%d}", page_number);
144
0
    param_string_from_string(pnstr, page_name_chars);
145
0
    pname = &pnstr;
146
0
    code = pdf_find_named(pdev, pname, ppco);
147
0
    if (code != gs_error_undefined)
148
0
        return code;
149
0
 cpage:
150
0
    if (pdf_page_id(pdev, page_number) <= 0)
151
0
        return_error(gs_error_rangecheck);
152
0
    *ppco = COS_OBJECT(pdev->pages[page_number - 1].Page);
153
0
    return 0;
154
0
}
155
156
/*
157
 * Look up a named object as for pdf_refer_named.  If the object already
158
 * exists and is not simply a forward reference, return gs_error_rangecheck;
159
 * if it exists as a forward reference, set its type and return 0;
160
 * otherwise, create the object with the given type and return 1.
161
 */
162
int
163
pdf_make_named(gx_device_pdf * pdev, const gs_param_string * pname,
164
               cos_type_t cotype, cos_object_t **ppco, bool assign_id)
165
57.5k
{
166
57.5k
    if (pname) {
167
171
        int code = pdf_refer_named(pdev, pname, ppco);
168
171
        cos_object_t *pco = *ppco;
169
170
171
        if (code < 0)
171
0
            return code;
172
171
        if (cos_type(pco) != cos_type_generic)
173
0
            return_error(gs_error_rangecheck);
174
171
        if (assign_id && pco->id == 0)
175
0
            pco->id = pdf_obj_ref(pdev);
176
171
        cos_become(pco, cotype);
177
171
        return code;
178
57.3k
    } else {
179
57.3k
        int code = pdf_create_named(pdev, pname, cotype, ppco,
180
57.3k
                                    (assign_id ? 0L : -1L));
181
182
57.3k
        return (code < 0 ? code : 1);
183
57.3k
    }
184
57.5k
}
185
int
186
pdf_make_named_dict(gx_device_pdf * pdev, const gs_param_string * pname,
187
                    cos_dict_t **ppcd, bool assign_id)
188
57.3k
{
189
57.3k
    cos_object_t *pco;
190
57.3k
    int code = pdf_make_named(pdev, pname, cos_type_dict, &pco, assign_id);
191
192
57.3k
    *ppcd = (cos_dict_t *)pco;
193
57.3k
    return code;
194
57.3k
}
195
196
/*
197
 * Look up a named object as for pdf_refer_named.  If the object does not
198
 * exist, return gs_error_undefined; if the object exists but has the wrong type,
199
 * return gs_error_typecheck.
200
 */
201
int
202
pdf_get_named(gx_device_pdf * pdev, const gs_param_string * pname,
203
              cos_type_t cotype, cos_object_t **ppco)
204
50
{
205
50
    int code = pdf_refer_named(pdev, pname, ppco);
206
207
50
    if (code < 0)
208
0
        return code;
209
50
    if (cos_type(*ppco) != cotype)
210
0
        return_error(gs_error_typecheck);
211
50
    return code;
212
50
}
213
214
/*
215
 * Push the current local namespace onto the namespace stack, and reset it
216
 * to an empty namespace.
217
 */
218
int
219
pdf_push_namespace(gx_device_pdf *pdev)
220
0
{
221
0
    int code = cos_array_add_object(pdev->Namespace_stack,
222
0
                                    COS_OBJECT(pdev->local_named_objects));
223
0
    cos_dict_t *pcd =
224
0
        cos_dict_alloc(pdev, "pdf_push_namespace(local_named_objects)");
225
0
    cos_array_t *pca =
226
0
        cos_array_alloc(pdev, "pdf_push_namespace(NI_stack)");
227
228
0
    if (code < 0 ||
229
0
        (code = cos_array_add_object(pdev->Namespace_stack,
230
0
                                     COS_OBJECT(pdev->NI_stack))) < 0
231
0
        )
232
0
        return code;
233
0
    if (pcd == 0 || pca == 0)
234
0
        return_error(gs_error_VMerror);
235
0
    pdev->local_named_objects = pcd;
236
0
    pdev->NI_stack = pca;
237
0
    return 0;
238
0
}
239
240
/*
241
 * Pop the top local namespace from the namespace stack.  Return an error if
242
 * the stack is empty.
243
 */
244
int
245
pdf_pop_namespace(gx_device_pdf *pdev)
246
16.0k
{
247
16.0k
    cos_value_t nis_value, lno_value;
248
16.0k
    int code = cos_array_unadd(pdev->Namespace_stack, &nis_value);
249
250
16.0k
    if (code < 0 ||
251
16.0k
        (code = cos_array_unadd(pdev->Namespace_stack, &lno_value)) < 0
252
16.0k
        )
253
16.0k
        return code;
254
0
    COS_FREE(pdev->local_named_objects,
255
0
             "pdf_pop_namespace(local_named_objects)");
256
0
    pdev->local_named_objects = (cos_dict_t *)lno_value.contents.object;
257
0
    COS_FREE(pdev->NI_stack, "pdf_pop_namespace(NI_stack)");
258
0
    pdev->NI_stack = (cos_array_t *)nis_value.contents.object;
259
0
    return 0;
260
16.0k
}
261
262
/*
263
 * Scan a token from a string.  <<, >>, [, and ] are treated as tokens.
264
 * Return 1 if a token was scanned, 0 if we reached the end of the string,
265
 * or an error.  On a successful return, the token extends from *ptoken up
266
 * to but not including *pscan.
267
 *
268
 * Note that this scanner expects a subset of PostScript syntax, not PDF
269
 * syntax.  In particular, it doesn't understand ASCII85 strings,
270
 * doesn't process the PDF #-escape syntax within names, and does only
271
 * minimal syntax checking.  It also recognizes one extension to PostScript
272
 * syntax, to allow gs_pdfwr.ps to pass names that include non-regular
273
 * characters: If a name is immediately preceded by two null characters,
274
 * the name includes everything up to a following null character.  The only
275
 * place that currently generates this convention is the PostScript code
276
 * that pre-processes the arguments for pdfmarks, in lib/gs_pdfwr.ps.
277
 */
278
int
279
pdf_scan_token(const byte **pscan, const byte * end, const byte **ptoken)
280
2.03M
{
281
2.03M
    const byte *p = *pscan;
282
283
2.94M
    while (p < end && scan_char_decoder[*p] == ctype_space) {
284
912k
        ++p;
285
912k
        if (p[-1] == 0 && p + 1 < end && p + 2 < end && *p == 0 && p[1] == 0 && p[2] == '/') {
286
        /* Special handling for names delimited by a triple start and double end null character. */
287
0
            *ptoken = p + 2;
288
0
            while (*p != 0 || p[1] != 0)
289
0
                if (++p >= end || p + 1 >= end)
290
0
                    return_error(gs_error_syntaxerror); /* no terminator */
291
0
            *pscan = p + 1;
292
0
            return 1;
293
912k
        } else {
294
912k
            if (p[-1] == 0 && p + 1 < end && *p == 0 && p[2] == '/') {
295
            /* Special handling for names delimited by a double start and single end null character. */
296
0
                *ptoken = ++p;
297
0
                while (*p != 0)
298
0
                    if (++p >= end)
299
0
                        return_error(gs_error_syntaxerror); /* no terminator */
300
0
                *pscan = p;
301
0
                return 1;
302
0
            }
303
912k
        }
304
912k
    }
305
2.03M
    *ptoken = p;
306
2.03M
    if (p >= end) {
307
443k
        *pscan = p;
308
443k
        return 0;
309
443k
    }
310
1.58M
    switch (*p) {
311
0
    case '%':
312
0
    case ')':
313
0
        return_error(gs_error_syntaxerror);
314
86.2k
    case '(': {
315
        /* Skip over the string. */
316
86.2k
        byte buf[50];   /* size is arbitrary */
317
86.2k
        stream_cursor_read r;
318
86.2k
        stream_cursor_write w;
319
86.2k
        stream_PSSD_state ss;
320
86.2k
        int status;
321
322
86.2k
        s_PSSD_init((stream_state *)&ss);
323
324
        /* "p + 1" - skip the '(' */
325
86.2k
        stream_cursor_read_init(&r, p + 1, (end - p) - 1);
326
327
113k
        do {
328
113k
            stream_cursor_write_init(&w, buf, sizeof(buf));
329
113k
            status = (*s_PSSD_template.process)
330
113k
                ((stream_state *) & ss, &r, &w, true);
331
113k
        }
332
113k
        while (status == 1);
333
86.2k
        *pscan = r.ptr + 1;
334
86.2k
        return 1;
335
0
    }
336
30.4k
    case '<':
337
30.4k
        if (end - p < 2)
338
0
            return_error(gs_error_syntaxerror);
339
30.4k
        if (p[1] != '<') {
340
            /* This is handling a hex string, just skips across the entire string to the '>' */
341
            /*
342
             * We need the cast because some compilers declare memchar as
343
             * returning a char * rather than a void *.
344
             */
345
0
            p = (const byte *)memchr(p + 1, '>', end - p - 1);
346
0
            if (p == 0)
347
0
                return_error(gs_error_syntaxerror);
348
0
            *pscan = p + 1;
349
0
            return 1;
350
30.4k
        } else {
351
            /* This case is is beginning of a dict, "<<". Return it as a token. */
352
30.4k
            *pscan = p + 2;
353
30.4k
            return 1;
354
30.4k
        }
355
0
        break;
356
30.4k
    case '>':
357
        /* This case is the end of a dict, ">>". Return it as a token. */
358
30.4k
        if (end - p < 2 || p[1] != '>')
359
0
            return_error(gs_error_syntaxerror);
360
30.4k
        *pscan = p + 2;
361
30.4k
        return 1;
362
231k
    case '[': case ']': case '{': case '}':
363
231k
        *pscan = p + 1;
364
231k
        return 1;
365
171k
    case '/':
366
171k
        ++p;
367
1.20M
    default:
368
1.20M
        break;
369
1.58M
    }
370
5.77M
    while (p < end && scan_char_decoder[*p] <= ctype_name)
371
4.56M
        ++p;
372
1.20M
    *pscan = p;
373
1.20M
    if (p == *ptoken)    /* no chars scanned, i.e., not ctype_name */
374
8
        return_error(gs_error_syntaxerror);
375
1.20M
    return 1;
376
1.20M
}
377
/*
378
 * Scan a possibly composite token: arrays and dictionaries are treated as
379
 * single tokens.
380
 */
381
int
382
pdf_scan_token_composite(const byte **pscan, const byte * end,
383
                         const byte **ptoken_orig)
384
460
{
385
460
    int level = 0;
386
460
    const byte *ignore_token;
387
460
    const byte **ptoken = ptoken_orig;
388
460
    int code;
389
390
1.17k
    do {
391
1.17k
        code = pdf_scan_token(pscan, end, ptoken);
392
1.17k
        if (code <= 0)
393
0
            return (code < 0 || level == 0 ? code :
394
0
                    gs_note_error(gs_error_syntaxerror));
395
1.17k
        switch (**ptoken) {
396
358
        case '<': case '[': case '{':
397
358
            ++level; break;
398
358
        case '>': case ']': case '}':
399
358
            if (level == 0)
400
0
                return_error(gs_error_syntaxerror);
401
358
            --level; break;
402
1.17k
        }
403
1.17k
        ptoken = &ignore_token;
404
1.17k
    } while (level);
405
460
    return code;
406
460
}
407
408
/* Replace object names with object references in a (parameter) string. */
409
static const byte *
410
pdfmark_next_object(const byte * scan, const byte * end, const byte **pname,
411
                    cos_object_t **ppco, gx_device_pdf * pdev)
412
444k
{
413
    /*
414
     * Starting at scan, find the next object reference, set *pname
415
     * to point to it in the string, store the object at *ppco,
416
     * and return a pointer to the first character beyond the
417
     * reference.  If there are no more object references, set
418
     * *pname = end, *ppco = 0, and return end.
419
     */
420
444k
    int code;
421
422
2.03M
    while ((code = pdf_scan_token(&scan, end, pname)) != 0) {
423
1.58M
        gs_param_string sname;
424
425
1.58M
        if (code < 0) {
426
8
            ++scan;
427
8
            continue;
428
8
        }
429
1.58M
        if (**pname != '{')
430
1.58M
            continue;
431
        /* Back up over the { and rescan as a single token. */
432
358
        scan = *pname;
433
358
        code = pdf_scan_token_composite(&scan, end, pname);
434
358
        if (code < 0) {
435
0
            ++scan;
436
0
            continue;
437
0
        }
438
358
        sname.data = *pname;
439
358
        sname.size = scan - sname.data;
440
        /*
441
         * Forward references are allowed.  If there is an error,
442
         * simply retain the name as a literal string.
443
         */
444
358
        code = pdf_refer_named(pdev, &sname, ppco);
445
358
        if (code < 0)
446
0
            continue;
447
358
        return scan;
448
358
    }
449
443k
    *ppco = 0;
450
443k
    return end;
451
444k
}
452
int
453
pdf_replace_names(gx_device_pdf * pdev, const gs_param_string * from,
454
                  gs_param_string * to)
455
443k
{
456
443k
    const byte *start = from->data;
457
443k
    const byte *end = start + from->size;
458
443k
    const byte *scan, *to_free = NULL;
459
443k
    uint size = 0;
460
443k
    cos_object_t *pco;
461
443k
    bool any = false;
462
443k
    byte *sto;
463
443k
    char ref[1 + 10 + 5 + 1]; /* max obj number is 10 digits */
464
465
    /* Do a first pass to compute the length of the result. */
466
887k
    for (scan = start; scan < end;) {
467
443k
        const byte *sname;
468
443k
        const byte *next =
469
443k
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
470
471
443k
        size += sname - scan;
472
443k
        if (pco) {
473
179
            gs_snprintf(ref, sizeof(ref), " %ld 0 R ", pco->id);
474
179
            size += strlen(ref);
475
            /* Special 'name' escaping convention (see gs_pdfwr.ps, /.pdf===dict
476
             * the /nametype procedure). We do not want to write out the NULL
477
             * characters, we'll remove them in pass 2, for now don't count
478
             * them into the string size.
479
             */
480
179
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00)
481
0
                size -= 3;
482
179
        }
483
443k
        scan = next;
484
443k
        any |= next != sname;
485
443k
    }
486
443k
    to->persistent = true; /* ??? */
487
443k
    if (!any) {
488
443k
        if (to->data != start) {
489
0
            gs_free_object(pdev->pdf_memory, (byte *)to->data, "pdf_replace_names");
490
0
            to->data = start;
491
0
        }
492
443k
        to->size = size;
493
443k
        return 0;
494
443k
    }
495
179
    sto = gs_alloc_bytes(pdev->pdf_memory, size, "pdf_replace_names");
496
179
    if (sto == 0)
497
0
        return_error(gs_error_VMerror);
498
179
    to_free = to->data;
499
179
    to->data = sto;
500
179
    to->size = size;
501
    /* Do a second pass to do the actual substitutions. */
502
389
    for (scan = start; scan < end;) {
503
210
        const byte *sname;
504
210
        const byte *next =
505
210
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
506
210
        uint copy = sname - scan;
507
210
        int rlen;
508
509
210
        memcpy(sto, scan, copy);
510
210
        sto += copy;
511
210
        if (pco) {
512
179
            gs_snprintf(ref, sizeof(ref), " %ld 0 R ", pco->id);
513
179
            rlen = strlen(ref);
514
179
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00) {
515
0
                sto -= 2;
516
0
                next++;
517
0
            }
518
179
            memcpy(sto, ref, rlen);
519
179
            sto += rlen;
520
179
        }
521
210
        scan = next;
522
210
    }
523
179
    gs_free_object(pdev->pdf_memory, (byte *)to_free, "pdf_replace_names");
524
179
    return 0;
525
179
}