Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/devices/vector/gdevpdfr.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2024 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/* Named object pdfmark processing */
18
#include "memory_.h"
19
#include "gx.h"
20
#include "gserrors.h"
21
#include "gsutil.h"   /* for bytes_compare */
22
#include "gdevpdfx.h"
23
#include "gdevpdfo.h"
24
#include "scanchar.h"
25
#include "strimpl.h"
26
#include "sstring.h"
27
28
#ifndef gs_error_syntaxerror
29
#  define gs_error_syntaxerror gs_error_rangecheck
30
#endif
31
32
/* Test whether an object name has valid syntax, {name}. */
33
bool
34
pdf_objname_is_valid(const byte *data, uint size)
35
81.9k
{
36
81.9k
    return (size >= 2 && data[0] == '{' &&
37
81.9k
            (const byte *)memchr(data, '}', size) == data + size - 1);
38
81.9k
}
39
40
/*
41
 * Look up a named object.  Return_error(gs_error_rangecheck if the syntax is invalid.
42
 * If the object is missing, return gs_error_undefined.
43
 */
44
int
45
pdf_find_named(gx_device_pdf * pdev, const gs_param_string * pname,
46
               cos_object_t **ppco)
47
43.9k
{
48
43.9k
    const cos_value_t *pvalue;
49
50
43.9k
    if (!pdf_objname_is_valid(pname->data, pname->size))
51
0
        return_error(gs_error_rangecheck);
52
43.9k
    if ((pvalue = cos_dict_find(pdev->local_named_objects, pname->data,
53
43.9k
                                pname->size)) != 0 ||
54
43.9k
        (pvalue = cos_dict_find(pdev->global_named_objects, pname->data,
55
4.35k
                                pname->size)) != 0
56
43.9k
        ) {
57
39.5k
        *ppco = pvalue->contents.object;
58
39.5k
        return 0;
59
39.5k
    }
60
43.9k
    return_error(gs_error_undefined);
61
43.9k
}
62
63
/*
64
 * Create a (local) named object.  id = -1L means do not assign an id.
65
 * pname = 0 means just create the object, do not name it.  Note that
66
 * during initialization, local_named_objects == global_named_objects.
67
 */
68
int
69
pdf_create_named(gx_device_pdf *pdev, const gs_param_string *pname,
70
                 cos_type_t cotype, cos_object_t **ppco, int64_t id)
71
191k
{
72
191k
    cos_object_t *pco;
73
191k
    cos_value_t value;
74
75
191k
    *ppco = pco = cos_object_alloc(pdev, "pdf_create_named");
76
191k
    if (pco == 0)
77
0
        return_error(gs_error_VMerror);
78
191k
    pco->id =
79
191k
        (id == -1 ? 0L : id == 0 ? pdf_obj_ref(pdev) : id);
80
191k
    if (pname) {
81
88.0k
        int code = cos_dict_put(pdev->local_named_objects, pname->data,
82
88.0k
                                pname->size, cos_object_value(&value, pco));
83
84
88.0k
        if (code < 0)
85
0
            return code;
86
88.0k
    }
87
191k
    if (cotype != cos_type_generic)
88
186k
        cos_become(pco, cotype);
89
191k
    *ppco = pco;
90
191k
    return 0;
91
191k
}
92
int
93
pdf_create_named_dict(gx_device_pdf *pdev, const gs_param_string *pname,
94
                      cos_dict_t **ppcd, int64_t id)
95
125k
{
96
125k
    cos_object_t *pco;
97
125k
    int code = pdf_create_named(pdev, pname, cos_type_dict, &pco, id);
98
99
125k
    *ppcd = (cos_dict_t *)pco;
100
125k
    return code;
101
125k
}
102
103
/*
104
 * Look up a named object as for pdf_find_named.  If the object does not
105
 * exist, create it (as a dictionary if it is one of the predefined names
106
 * {ThisPage}, {NextPage}, {PrevPage}, or {Page<#>}, otherwise as a
107
 * generic object) and return 1.
108
 */
109
int
110
pdf_refer_named(gx_device_pdf * pdev, const gs_param_string * pname_orig,
111
                cos_object_t **ppco)
112
43.9k
{
113
43.9k
    const gs_param_string *pname = pname_orig;
114
43.9k
    int code = pdf_find_named(pdev, pname, ppco);
115
43.9k
    char page_name_chars[6 + 10 + 2]; /* {Page<n>}, enough for an int */
116
43.9k
    gs_param_string pnstr;
117
43.9k
    int page_number;
118
119
43.9k
    if (code != gs_error_undefined)
120
39.5k
        return code;
121
    /*
122
     * Check for a predefined name.  Map ThisPage, PrevPage, and NextPage
123
     * to the appropriate Page<#> name.
124
     */
125
4.35k
    if (pname->size >= 7 && pname->size < sizeof(page_name_chars)) {
126
4.34k
        memcpy(page_name_chars, pname->data, pname->size);
127
4.34k
        page_name_chars[pname->size] = 0;
128
4.34k
        if (sscanf(page_name_chars, "{Page%d}", &page_number) == 1)
129
0
            goto cpage;
130
4.34k
    }
131
4.35k
    if (pdf_key_eq(pname, "{ThisPage}"))
132
0
        page_number = pdev->next_page + 1;
133
4.35k
    else if (pdf_key_eq(pname, "{NextPage}"))
134
0
        page_number = pdev->next_page + 2;
135
4.35k
    else if (pdf_key_eq(pname, "{PrevPage}"))
136
0
        page_number = pdev->next_page;
137
4.35k
    else {
138
4.35k
        code = pdf_create_named(pdev, pname, cos_type_generic, ppco, 0L);
139
4.35k
        return (code < 0 ? code : 1);
140
4.35k
    }
141
0
    if (page_number <= 0)
142
0
        return code;
143
0
    gs_snprintf(page_name_chars, sizeof(page_name_chars), "{Page%d}", page_number);
144
0
    param_string_from_string(pnstr, page_name_chars);
145
0
    pname = &pnstr;
146
0
    code = pdf_find_named(pdev, pname, ppco);
147
0
    if (code != gs_error_undefined)
148
0
        return code;
149
0
 cpage:
150
0
    if (pdf_page_id(pdev, page_number) <= 0)
151
0
        return_error(gs_error_rangecheck);
152
0
    *ppco = COS_OBJECT(pdev->pages[page_number - 1].Page);
153
0
    return 0;
154
0
}
155
156
/*
157
 * Look up a named object as for pdf_refer_named.  If the object already
158
 * exists and is not simply a forward reference, return gs_error_rangecheck;
159
 * if it exists as a forward reference, set its type and return 0;
160
 * otherwise, create the object with the given type and return 1.
161
 */
162
int
163
pdf_make_named(gx_device_pdf * pdev, const gs_param_string * pname,
164
               cos_type_t cotype, cos_object_t **ppco, bool assign_id)
165
65.3k
{
166
65.3k
    if (pname) {
167
4.27k
        int code = pdf_refer_named(pdev, pname, ppco);
168
4.27k
        cos_object_t *pco = *ppco;
169
170
4.27k
        if (code < 0)
171
0
            return code;
172
4.27k
        if (cos_type(pco) != cos_type_generic)
173
4
            return_error(gs_error_rangecheck);
174
4.27k
        if (assign_id && pco->id == 0)
175
0
            pco->id = pdf_obj_ref(pdev);
176
4.27k
        cos_become(pco, cotype);
177
4.27k
        return code;
178
61.0k
    } else {
179
61.0k
        int code = pdf_create_named(pdev, pname, cotype, ppco,
180
61.0k
                                    (assign_id ? 0L : -1L));
181
182
61.0k
        return (code < 0 ? code : 1);
183
61.0k
    }
184
65.3k
}
185
int
186
pdf_make_named_dict(gx_device_pdf * pdev, const gs_param_string * pname,
187
                    cos_dict_t **ppcd, bool assign_id)
188
61.0k
{
189
61.0k
    cos_object_t *pco;
190
61.0k
    int code = pdf_make_named(pdev, pname, cos_type_dict, &pco, assign_id);
191
192
61.0k
    *ppcd = (cos_dict_t *)pco;
193
61.0k
    return code;
194
61.0k
}
195
196
/*
197
 * Look up a named object as for pdf_refer_named.  If the object does not
198
 * exist, return gs_error_undefined; if the object exists but has the wrong type,
199
 * return gs_error_typecheck.
200
 */
201
int
202
pdf_get_named(gx_device_pdf * pdev, const gs_param_string * pname,
203
              cos_type_t cotype, cos_object_t **ppco)
204
898
{
205
898
    int code = pdf_refer_named(pdev, pname, ppco);
206
207
898
    if (code < 0)
208
0
        return code;
209
898
    if (cos_type(*ppco) != cotype)
210
0
        return_error(gs_error_typecheck);
211
898
    return code;
212
898
}
213
214
/*
215
 * Push the current local namespace onto the namespace stack, and reset it
216
 * to an empty namespace.
217
 */
218
int
219
pdf_push_namespace(gx_device_pdf *pdev)
220
8
{
221
8
    int code = cos_array_add_object(pdev->Namespace_stack,
222
8
                                    COS_OBJECT(pdev->local_named_objects));
223
8
    cos_dict_t *pcd =
224
8
        cos_dict_alloc(pdev, "pdf_push_namespace(local_named_objects)");
225
8
    cos_array_t *pca =
226
8
        cos_array_alloc(pdev, "pdf_push_namespace(NI_stack)");
227
228
8
    if (code < 0 ||
229
8
        (code = cos_array_add_object(pdev->Namespace_stack,
230
8
                                     COS_OBJECT(pdev->NI_stack))) < 0
231
8
        )
232
0
        return code;
233
8
    if (pcd == 0 || pca == 0)
234
0
        return_error(gs_error_VMerror);
235
8
    pdev->local_named_objects = pcd;
236
8
    pdev->NI_stack = pca;
237
8
    return 0;
238
8
}
239
240
/*
241
 * Pop the top local namespace from the namespace stack.  Return an error if
242
 * the stack is empty.
243
 */
244
int
245
pdf_pop_namespace(gx_device_pdf *pdev)
246
41.8k
{
247
41.8k
    cos_value_t nis_value, lno_value;
248
41.8k
    int code = cos_array_unadd(pdev->Namespace_stack, &nis_value);
249
250
41.8k
    if (code < 0 ||
251
41.8k
        (code = cos_array_unadd(pdev->Namespace_stack, &lno_value)) < 0
252
41.8k
        )
253
41.8k
        return code;
254
8
    COS_FREE(pdev->local_named_objects,
255
8
             "pdf_pop_namespace(local_named_objects)");
256
8
    pdev->local_named_objects = (cos_dict_t *)lno_value.contents.object;
257
8
    COS_FREE(pdev->NI_stack, "pdf_pop_namespace(NI_stack)");
258
8
    pdev->NI_stack = (cos_array_t *)nis_value.contents.object;
259
8
    return 0;
260
41.8k
}
261
262
/*
263
 * Scan a token from a string.  <<, >>, [, and ] are treated as tokens.
264
 * Return 1 if a token was scanned, 0 if we reached the end of the string,
265
 * or an error.  On a successful return, the token extends from *ptoken up
266
 * to but not including *pscan.
267
 *
268
 * Note that this scanner expects a subset of PostScript syntax, not PDF
269
 * syntax.  In particular, it doesn't understand ASCII85 strings,
270
 * doesn't process the PDF #-escape syntax within names, and does only
271
 * minimal syntax checking.  It also recognizes one extension to PostScript
272
 * syntax, to allow gs_pdfwr.ps to pass names that include non-regular
273
 * characters: If a name is immediately preceded by two null characters,
274
 * the name includes everything up to a following null character.  The only
275
 * place that currently generates this convention is the PostScript code
276
 * that pre-processes the arguments for pdfmarks, in lib/gs_pdfwr.ps.
277
 */
278
int
279
pdf_scan_token(const byte **pscan, const byte * end, const byte **ptoken)
280
1.58M
{
281
1.58M
    const byte *p = *pscan;
282
283
2.28M
    while (p < end && scan_char_decoder[*p] == ctype_space) {
284
692k
        ++p;
285
692k
        if (p[-1] == 0 && p + 1 < end && p + 2 < end && *p == 0 && p[1] == 0 && p[2] == '/') {
286
        /* Special handling for names delimited by a triple start and double end null character. */
287
0
            *ptoken = p + 2;
288
0
            while (*p != 0 || p[1] != 0)
289
0
                if (++p >= end || p + 1 >= end)
290
0
                    return_error(gs_error_syntaxerror); /* no terminator */
291
0
            *pscan = p + 1;
292
0
            return 1;
293
692k
        } else {
294
692k
            if (p[-1] == 0 && p + 1 < end && *p == 0 && p[2] == '/') {
295
            /* Special handling for names delimited by a double start and single end null character. */
296
0
                *ptoken = ++p;
297
0
                while (*p != 0)
298
0
                    if (++p >= end)
299
0
                        return_error(gs_error_syntaxerror); /* no terminator */
300
0
                *pscan = p;
301
0
                return 1;
302
0
            }
303
692k
        }
304
692k
    }
305
1.58M
    *ptoken = p;
306
1.58M
    if (p >= end) {
307
348k
        *pscan = p;
308
348k
        return 0;
309
348k
    }
310
1.23M
    switch (*p) {
311
0
    case '%':
312
0
    case ')':
313
0
        return_error(gs_error_syntaxerror);
314
70.3k
    case '(': {
315
        /* Skip over the string. */
316
70.3k
        byte buf[50];   /* size is arbitrary */
317
70.3k
        stream_cursor_read r;
318
70.3k
        stream_cursor_write w;
319
70.3k
        stream_PSSD_state ss;
320
70.3k
        int status;
321
322
70.3k
        s_PSSD_init((stream_state *)&ss);
323
324
        /* "p + 1" - skip the '(' */
325
70.3k
        stream_cursor_read_init(&r, p + 1, (end - p) - 1);
326
327
100k
        do {
328
100k
            stream_cursor_write_init(&w, buf, sizeof(buf));
329
100k
            status = (*s_PSSD_template.process)
330
100k
                ((stream_state *) & ss, &r, &w, true);
331
100k
        }
332
100k
        while (status == 1);
333
70.3k
        *pscan = r.ptr + 1;
334
70.3k
        return 1;
335
0
    }
336
20.8k
    case '<':
337
20.8k
        if (end - p < 2)
338
0
            return_error(gs_error_syntaxerror);
339
20.8k
        if (p[1] != '<') {
340
            /* This is handling a hex string, just skips across the entire string to the '>' */
341
            /*
342
             * We need the cast because some compilers declare memchar as
343
             * returning a char * rather than a void *.
344
             */
345
0
            p = (const byte *)memchr(p + 1, '>', end - p - 1);
346
0
            if (p == 0)
347
0
                return_error(gs_error_syntaxerror);
348
0
            *pscan = p + 1;
349
0
            return 1;
350
20.8k
        } else {
351
            /* This case is is beginning of a dict, "<<". Return it as a token. */
352
20.8k
            *pscan = p + 2;
353
20.8k
            return 1;
354
20.8k
        }
355
0
        break;
356
21.2k
    case '>':
357
        /* This case is the end of a dict, ">>". Return it as a token. */
358
21.2k
        if (end - p < 2 || p[1] != '>')
359
0
            return_error(gs_error_syntaxerror);
360
21.2k
        *pscan = p + 2;
361
21.2k
        return 1;
362
244k
    case '[': case ']': case '{': case '}':
363
244k
        *pscan = p + 1;
364
244k
        return 1;
365
172k
    case '/':
366
172k
        ++p;
367
882k
    default:
368
882k
        break;
369
1.23M
    }
370
4.52M
    while (p < end && scan_char_decoder[*p] <= ctype_name)
371
3.64M
        ++p;
372
882k
    *pscan = p;
373
882k
    if (p == *ptoken)    /* no chars scanned, i.e., not ctype_name */
374
55
        return_error(gs_error_syntaxerror);
375
882k
    return 1;
376
882k
}
377
/*
378
 * Scan a possibly composite token: arrays and dictionaries are treated as
379
 * single tokens.
380
 */
381
int
382
pdf_scan_token_composite(const byte **pscan, const byte * end,
383
                         const byte **ptoken_orig)
384
26.8k
{
385
26.8k
    int level = 0;
386
26.8k
    const byte *ignore_token;
387
26.8k
    const byte **ptoken = ptoken_orig;
388
26.8k
    int code;
389
390
78.2k
    do {
391
78.2k
        code = pdf_scan_token(pscan, end, ptoken);
392
78.2k
        if (code <= 0)
393
0
            return (code < 0 || level == 0 ? code :
394
0
                    gs_note_error(gs_error_syntaxerror));
395
78.2k
        switch (**ptoken) {
396
25.6k
        case '<': case '[': case '{':
397
25.6k
            ++level; break;
398
25.6k
        case '>': case ']': case '}':
399
25.6k
            if (level == 0)
400
0
                return_error(gs_error_syntaxerror);
401
25.6k
            --level; break;
402
78.2k
        }
403
78.2k
        ptoken = &ignore_token;
404
78.2k
    } while (level);
405
26.8k
    return code;
406
26.8k
}
407
408
/* Replace object names with object references in a (parameter) string. */
409
static const byte *
410
pdfmark_next_object(const byte * scan, const byte * end, const byte **pname,
411
                    cos_object_t **ppco, gx_device_pdf * pdev)
412
374k
{
413
    /*
414
     * Starting at scan, find the next object reference, set *pname
415
     * to point to it in the string, store the object at *ppco,
416
     * and return a pointer to the first character beyond the
417
     * reference.  If there are no more object references, set
418
     * *pname = end, *ppco = 0, and return end.
419
     */
420
374k
    int code;
421
422
1.50M
    while ((code = pdf_scan_token(&scan, end, pname)) != 0) {
423
1.15M
        gs_param_string sname;
424
425
1.15M
        if (code < 0) {
426
55
            ++scan;
427
55
            continue;
428
55
        }
429
1.15M
        if (**pname != '{')
430
1.13M
            continue;
431
        /* Back up over the { and rescan as a single token. */
432
25.5k
        scan = *pname;
433
25.5k
        code = pdf_scan_token_composite(&scan, end, pname);
434
25.5k
        if (code < 0) {
435
0
            ++scan;
436
0
            continue;
437
0
        }
438
25.5k
        sname.data = *pname;
439
25.5k
        sname.size = scan - sname.data;
440
        /*
441
         * Forward references are allowed.  If there is an error,
442
         * simply retain the name as a literal string.
443
         */
444
25.5k
        code = pdf_refer_named(pdev, &sname, ppco);
445
25.5k
        if (code < 0)
446
0
            continue;
447
25.5k
        return scan;
448
25.5k
    }
449
348k
    *ppco = 0;
450
348k
    return end;
451
374k
}
452
int
453
pdf_replace_names(gx_device_pdf * pdev, const gs_param_string * from,
454
                  gs_param_string * to)
455
350k
{
456
350k
    const byte *start = from->data;
457
350k
    const byte *end = start + from->size;
458
350k
    const byte *scan, *to_free = NULL;
459
350k
    uint size = 0;
460
350k
    cos_object_t *pco;
461
350k
    bool any = false;
462
350k
    byte *sto;
463
350k
    char ref[1 + 10 + 5 + 1]; /* max obj number is 10 digits */
464
465
    /* Do a first pass to compute the length of the result. */
466
710k
    for (scan = start; scan < end;) {
467
360k
        const byte *sname;
468
360k
        const byte *next =
469
360k
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
470
471
360k
        size += sname - scan;
472
360k
        if (pco) {
473
12.7k
            gs_snprintf(ref, sizeof(ref), " %"PRId64" 0 R ", pco->id);
474
12.7k
            size += strlen(ref);
475
            /* Special 'name' escaping convention (see gs_pdfwr.ps, /.pdf===dict
476
             * the /nametype procedure). We do not want to write out the NULL
477
             * characters, we'll remove them in pass 2, for now don't count
478
             * them into the string size.
479
             */
480
12.7k
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00)
481
0
                size -= 3;
482
12.7k
        }
483
360k
        scan = next;
484
360k
        any |= next != sname;
485
360k
    }
486
350k
    to->persistent = true; /* ??? */
487
350k
    if (!any) {
488
347k
        if (to->data != start) {
489
0
            gs_free_object(pdev->pdf_memory, (byte *)to->data, "pdf_replace_names");
490
0
            to->data = start;
491
0
        }
492
347k
        to->size = size;
493
347k
        return 0;
494
347k
    }
495
2.69k
    sto = gs_alloc_bytes(pdev->pdf_memory, size, "pdf_replace_names");
496
2.69k
    if (sto == 0)
497
0
        return_error(gs_error_VMerror);
498
2.69k
    to_free = to->data;
499
2.69k
    to->data = sto;
500
2.69k
    to->size = size;
501
    /* Do a second pass to do the actual substitutions. */
502
16.2k
    for (scan = start; scan < end;) {
503
13.5k
        const byte *sname;
504
13.5k
        const byte *next =
505
13.5k
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
506
13.5k
        uint copy = sname - scan;
507
13.5k
        int rlen;
508
509
13.5k
        memcpy(sto, scan, copy);
510
13.5k
        sto += copy;
511
13.5k
        if (pco) {
512
12.7k
            gs_snprintf(ref, sizeof(ref), " %"PRId64" 0 R ", pco->id);
513
12.7k
            rlen = strlen(ref);
514
12.7k
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00) {
515
0
                sto -= 2;
516
0
                next++;
517
0
            }
518
12.7k
            memcpy(sto, ref, rlen);
519
12.7k
            sto += rlen;
520
12.7k
        }
521
13.5k
        scan = next;
522
13.5k
    }
523
2.69k
    gs_free_object(pdev->pdf_memory, (byte *)to_free, "pdf_replace_names");
524
2.69k
    return 0;
525
2.69k
}