Coverage Report

Created: 2025-06-24 07:01

/src/ghostpdl/devices/vector/gdevpdfr.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2024 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/* Named object pdfmark processing */
18
#include "memory_.h"
19
#include "gx.h"
20
#include "gserrors.h"
21
#include "gsutil.h"   /* for bytes_compare */
22
#include "gdevpdfx.h"
23
#include "gdevpdfo.h"
24
#include "scanchar.h"
25
#include "strimpl.h"
26
#include "sstring.h"
27
28
#ifndef gs_error_syntaxerror
29
#  define gs_error_syntaxerror gs_error_rangecheck
30
#endif
31
32
/* Test whether an object name has valid syntax, {name}. */
33
bool
34
pdf_objname_is_valid(const byte *data, uint size)
35
18.3k
{
36
18.3k
    return (size >= 2 && data[0] == '{' &&
37
18.3k
            (const byte *)memchr(data, '}', size) == data + size - 1);
38
18.3k
}
39
40
/*
41
 * Look up a named object.  Return_error(gs_error_rangecheck if the syntax is invalid.
42
 * If the object is missing, return gs_error_undefined.
43
 */
44
int
45
pdf_find_named(gx_device_pdf * pdev, const gs_param_string * pname,
46
               cos_object_t **ppco)
47
9.96k
{
48
9.96k
    const cos_value_t *pvalue;
49
50
9.96k
    if (!pdf_objname_is_valid(pname->data, pname->size))
51
0
        return_error(gs_error_rangecheck);
52
9.96k
    if ((pvalue = cos_dict_find(pdev->local_named_objects, pname->data,
53
9.96k
                                pname->size)) != 0 ||
54
9.96k
        (pvalue = cos_dict_find(pdev->global_named_objects, pname->data,
55
1.13k
                                pname->size)) != 0
56
9.96k
        ) {
57
8.83k
        *ppco = pvalue->contents.object;
58
8.83k
        return 0;
59
8.83k
    }
60
9.96k
    return_error(gs_error_undefined);
61
9.96k
}
62
63
/*
64
 * Create a (local) named object.  id = -1L means do not assign an id.
65
 * pname = 0 means just create the object, do not name it.  Note that
66
 * during initialization, local_named_objects == global_named_objects.
67
 */
68
int
69
pdf_create_named(gx_device_pdf *pdev, const gs_param_string *pname,
70
                 cos_type_t cotype, cos_object_t **ppco, int64_t id)
71
109k
{
72
109k
    cos_object_t *pco;
73
109k
    cos_value_t value;
74
75
109k
    *ppco = pco = cos_object_alloc(pdev, "pdf_create_named");
76
109k
    if (pco == 0)
77
0
        return_error(gs_error_VMerror);
78
109k
    pco->id =
79
109k
        (id == -1 ? 0L : id == 0 ? pdf_obj_ref(pdev) : id);
80
109k
    if (pname) {
81
69.1k
        int code = cos_dict_put(pdev->local_named_objects, pname->data,
82
69.1k
                                pname->size, cos_object_value(&value, pco));
83
84
69.1k
        if (code < 0)
85
0
            return code;
86
69.1k
    }
87
109k
    if (cotype != cos_type_generic)
88
108k
        cos_become(pco, cotype);
89
109k
    *ppco = pco;
90
109k
    return 0;
91
109k
}
92
int
93
pdf_create_named_dict(gx_device_pdf *pdev, const gs_param_string *pname,
94
                      cos_dict_t **ppcd, int64_t id)
95
102k
{
96
102k
    cos_object_t *pco;
97
102k
    int code = pdf_create_named(pdev, pname, cos_type_dict, &pco, id);
98
99
102k
    *ppcd = (cos_dict_t *)pco;
100
102k
    return code;
101
102k
}
102
103
/*
104
 * Look up a named object as for pdf_find_named.  If the object does not
105
 * exist, create it (as a dictionary if it is one of the predefined names
106
 * {ThisPage}, {NextPage}, {PrevPage}, or {Page<#>}, otherwise as a
107
 * generic object) and return 1.
108
 */
109
int
110
pdf_refer_named(gx_device_pdf * pdev, const gs_param_string * pname_orig,
111
                cos_object_t **ppco)
112
9.96k
{
113
9.96k
    const gs_param_string *pname = pname_orig;
114
9.96k
    int code = pdf_find_named(pdev, pname, ppco);
115
9.96k
    char page_name_chars[6 + 10 + 2]; /* {Page<n>}, enough for an int */
116
9.96k
    gs_param_string pnstr;
117
9.96k
    int page_number;
118
119
9.96k
    if (code != gs_error_undefined)
120
8.83k
        return code;
121
    /*
122
     * Check for a predefined name.  Map ThisPage, PrevPage, and NextPage
123
     * to the appropriate Page<#> name.
124
     */
125
1.13k
    if (pname->size >= 7 && pname->size < sizeof(page_name_chars)) {
126
1.12k
        memcpy(page_name_chars, pname->data, pname->size);
127
1.12k
        page_name_chars[pname->size] = 0;
128
1.12k
        if (sscanf(page_name_chars, "{Page%d}", &page_number) == 1)
129
0
            goto cpage;
130
1.12k
    }
131
1.13k
    if (pdf_key_eq(pname, "{ThisPage}"))
132
0
        page_number = pdev->next_page + 1;
133
1.13k
    else if (pdf_key_eq(pname, "{NextPage}"))
134
0
        page_number = pdev->next_page + 2;
135
1.13k
    else if (pdf_key_eq(pname, "{PrevPage}"))
136
0
        page_number = pdev->next_page;
137
1.13k
    else {
138
1.13k
        code = pdf_create_named(pdev, pname, cos_type_generic, ppco, 0L);
139
1.13k
        return (code < 0 ? code : 1);
140
1.13k
    }
141
0
    if (page_number <= 0)
142
0
        return code;
143
0
    gs_snprintf(page_name_chars, sizeof(page_name_chars), "{Page%d}", page_number);
144
0
    param_string_from_string(pnstr, page_name_chars);
145
0
    pname = &pnstr;
146
0
    code = pdf_find_named(pdev, pname, ppco);
147
0
    if (code != gs_error_undefined)
148
0
        return code;
149
0
 cpage:
150
0
    if (pdf_page_id(pdev, page_number) <= 0)
151
0
        return_error(gs_error_rangecheck);
152
0
    *ppco = COS_OBJECT(pdev->pages[page_number - 1].Page);
153
0
    return 0;
154
0
}
155
156
/*
157
 * Look up a named object as for pdf_refer_named.  If the object already
158
 * exists and is not simply a forward reference, return gs_error_rangecheck;
159
 * if it exists as a forward reference, set its type and return 0;
160
 * otherwise, create the object with the given type and return 1.
161
 */
162
int
163
pdf_make_named(gx_device_pdf * pdev, const gs_param_string * pname,
164
               cos_type_t cotype, cos_object_t **ppco, bool assign_id)
165
7.30k
{
166
7.30k
    if (pname) {
167
1.11k
        int code = pdf_refer_named(pdev, pname, ppco);
168
1.11k
        cos_object_t *pco = *ppco;
169
170
1.11k
        if (code < 0)
171
0
            return code;
172
1.11k
        if (cos_type(pco) != cos_type_generic)
173
0
            return_error(gs_error_rangecheck);
174
1.11k
        if (assign_id && pco->id == 0)
175
0
            pco->id = pdf_obj_ref(pdev);
176
1.11k
        cos_become(pco, cotype);
177
1.11k
        return code;
178
6.19k
    } else {
179
6.19k
        int code = pdf_create_named(pdev, pname, cotype, ppco,
180
6.19k
                                    (assign_id ? 0L : -1L));
181
182
6.19k
        return (code < 0 ? code : 1);
183
6.19k
    }
184
7.30k
}
185
int
186
pdf_make_named_dict(gx_device_pdf * pdev, const gs_param_string * pname,
187
                    cos_dict_t **ppcd, bool assign_id)
188
6.19k
{
189
6.19k
    cos_object_t *pco;
190
6.19k
    int code = pdf_make_named(pdev, pname, cos_type_dict, &pco, assign_id);
191
192
6.19k
    *ppcd = (cos_dict_t *)pco;
193
6.19k
    return code;
194
6.19k
}
195
196
/*
197
 * Look up a named object as for pdf_refer_named.  If the object does not
198
 * exist, return gs_error_undefined; if the object exists but has the wrong type,
199
 * return gs_error_typecheck.
200
 */
201
int
202
pdf_get_named(gx_device_pdf * pdev, const gs_param_string * pname,
203
              cos_type_t cotype, cos_object_t **ppco)
204
396
{
205
396
    int code = pdf_refer_named(pdev, pname, ppco);
206
207
396
    if (code < 0)
208
0
        return code;
209
396
    if (cos_type(*ppco) != cotype)
210
0
        return_error(gs_error_typecheck);
211
396
    return code;
212
396
}
213
214
/*
215
 * Push the current local namespace onto the namespace stack, and reset it
216
 * to an empty namespace.
217
 */
218
int
219
pdf_push_namespace(gx_device_pdf *pdev)
220
8
{
221
8
    int code = cos_array_add_object(pdev->Namespace_stack,
222
8
                                    COS_OBJECT(pdev->local_named_objects));
223
8
    cos_dict_t *pcd =
224
8
        cos_dict_alloc(pdev, "pdf_push_namespace(local_named_objects)");
225
8
    cos_array_t *pca =
226
8
        cos_array_alloc(pdev, "pdf_push_namespace(NI_stack)");
227
228
8
    if (code < 0 ||
229
8
        (code = cos_array_add_object(pdev->Namespace_stack,
230
8
                                     COS_OBJECT(pdev->NI_stack))) < 0
231
8
        )
232
0
        return code;
233
8
    if (pcd == 0 || pca == 0)
234
0
        return_error(gs_error_VMerror);
235
8
    pdev->local_named_objects = pcd;
236
8
    pdev->NI_stack = pca;
237
8
    return 0;
238
8
}
239
240
/*
241
 * Pop the top local namespace from the namespace stack.  Return an error if
242
 * the stack is empty.
243
 */
244
int
245
pdf_pop_namespace(gx_device_pdf *pdev)
246
34.0k
{
247
34.0k
    cos_value_t nis_value, lno_value;
248
34.0k
    int code = cos_array_unadd(pdev->Namespace_stack, &nis_value);
249
250
34.0k
    if (code < 0 ||
251
34.0k
        (code = cos_array_unadd(pdev->Namespace_stack, &lno_value)) < 0
252
34.0k
        )
253
34.0k
        return code;
254
8
    COS_FREE(pdev->local_named_objects,
255
8
             "pdf_pop_namespace(local_named_objects)");
256
8
    pdev->local_named_objects = (cos_dict_t *)lno_value.contents.object;
257
8
    COS_FREE(pdev->NI_stack, "pdf_pop_namespace(NI_stack)");
258
8
    pdev->NI_stack = (cos_array_t *)nis_value.contents.object;
259
8
    return 0;
260
34.0k
}
261
262
/*
263
 * Scan a token from a string.  <<, >>, [, and ] are treated as tokens.
264
 * Return 1 if a token was scanned, 0 if we reached the end of the string,
265
 * or an error.  On a successful return, the token extends from *ptoken up
266
 * to but not including *pscan.
267
 *
268
 * Note that this scanner expects a subset of PostScript syntax, not PDF
269
 * syntax.  In particular, it doesn't understand ASCII85 strings,
270
 * doesn't process the PDF #-escape syntax within names, and does only
271
 * minimal syntax checking.  It also recognizes one extension to PostScript
272
 * syntax, to allow gs_pdfwr.ps to pass names that include non-regular
273
 * characters: If a name is immediately preceded by two null characters,
274
 * the name includes everything up to a following null character.  The only
275
 * place that currently generates this convention is the PostScript code
276
 * that pre-processes the arguments for pdfmarks, in lib/gs_pdfwr.ps.
277
 */
278
int
279
pdf_scan_token(const byte **pscan, const byte * end, const byte **ptoken)
280
109k
{
281
109k
    const byte *p = *pscan;
282
283
158k
    while (p < end && scan_char_decoder[*p] == ctype_space) {
284
48.5k
        ++p;
285
48.5k
        if (p[-1] == 0 && p + 1 < end && p + 2 < end && *p == 0 && p[1] == 0 && p[2] == '/') {
286
        /* Special handling for names delimited by a triple start and double end null character. */
287
0
            *ptoken = p + 2;
288
0
            while (*p != 0 || p[1] != 0)
289
0
                if (++p >= end || p + 1 >= end)
290
0
                    return_error(gs_error_syntaxerror); /* no terminator */
291
0
            *pscan = p + 1;
292
0
            return 1;
293
48.5k
        } else {
294
48.5k
            if (p[-1] == 0 && p + 1 < end && *p == 0 && p[2] == '/') {
295
            /* Special handling for names delimited by a double start and single end null character. */
296
0
                *ptoken = ++p;
297
0
                while (*p != 0)
298
0
                    if (++p >= end)
299
0
                        return_error(gs_error_syntaxerror); /* no terminator */
300
0
                *pscan = p;
301
0
                return 1;
302
0
            }
303
48.5k
        }
304
48.5k
    }
305
109k
    *ptoken = p;
306
109k
    if (p >= end) {
307
22.6k
        *pscan = p;
308
22.6k
        return 0;
309
22.6k
    }
310
87.3k
    switch (*p) {
311
0
    case '%':
312
0
    case ')':
313
0
        return_error(gs_error_syntaxerror);
314
4.68k
    case '(': {
315
        /* Skip over the string. */
316
4.68k
        byte buf[50];   /* size is arbitrary */
317
4.68k
        stream_cursor_read r;
318
4.68k
        stream_cursor_write w;
319
4.68k
        stream_PSSD_state ss;
320
4.68k
        int status;
321
322
4.68k
        s_PSSD_init((stream_state *)&ss);
323
324
        /* "p + 1" - skip the '(' */
325
4.68k
        stream_cursor_read_init(&r, p + 1, (end - p) - 1);
326
327
6.61k
        do {
328
6.61k
            stream_cursor_write_init(&w, buf, sizeof(buf));
329
6.61k
            status = (*s_PSSD_template.process)
330
6.61k
                ((stream_state *) & ss, &r, &w, true);
331
6.61k
        }
332
6.61k
        while (status == 1);
333
4.68k
        *pscan = r.ptr + 1;
334
4.68k
        return 1;
335
0
    }
336
794
    case '<':
337
794
        if (end - p < 2)
338
0
            return_error(gs_error_syntaxerror);
339
794
        if (p[1] != '<') {
340
            /* This is handling a hex string, just skips across the entire string to the '>' */
341
            /*
342
             * We need the cast because some compilers declare memchar as
343
             * returning a char * rather than a void *.
344
             */
345
0
            p = (const byte *)memchr(p + 1, '>', end - p - 1);
346
0
            if (p == 0)
347
0
                return_error(gs_error_syntaxerror);
348
0
            *pscan = p + 1;
349
0
            return 1;
350
794
        } else {
351
            /* This case is is beginning of a dict, "<<". Return it as a token. */
352
794
            *pscan = p + 2;
353
794
            return 1;
354
794
        }
355
0
        break;
356
900
    case '>':
357
        /* This case is the end of a dict, ">>". Return it as a token. */
358
900
        if (end - p < 2 || p[1] != '>')
359
0
            return_error(gs_error_syntaxerror);
360
900
        *pscan = p + 2;
361
900
        return 1;
362
26.8k
    case '[': case ']': case '{': case '}':
363
26.8k
        *pscan = p + 1;
364
26.8k
        return 1;
365
17.4k
    case '/':
366
17.4k
        ++p;
367
54.1k
    default:
368
54.1k
        break;
369
87.3k
    }
370
330k
    while (p < end && scan_char_decoder[*p] <= ctype_name)
371
276k
        ++p;
372
54.1k
    *pscan = p;
373
54.1k
    if (p == *ptoken)    /* no chars scanned, i.e., not ctype_name */
374
3
        return_error(gs_error_syntaxerror);
375
54.1k
    return 1;
376
54.1k
}
377
/*
378
 * Scan a possibly composite token: arrays and dictionaries are treated as
379
 * single tokens.
380
 */
381
int
382
pdf_scan_token_composite(const byte **pscan, const byte * end,
383
                         const byte **ptoken_orig)
384
6.06k
{
385
6.06k
    int level = 0;
386
6.06k
    const byte *ignore_token;
387
6.06k
    const byte **ptoken = ptoken_orig;
388
6.06k
    int code;
389
390
17.7k
    do {
391
17.7k
        code = pdf_scan_token(pscan, end, ptoken);
392
17.7k
        if (code <= 0)
393
0
            return (code < 0 || level == 0 ? code :
394
0
                    gs_note_error(gs_error_syntaxerror));
395
17.7k
        switch (**ptoken) {
396
5.82k
        case '<': case '[': case '{':
397
5.82k
            ++level; break;
398
5.82k
        case '>': case ']': case '}':
399
5.82k
            if (level == 0)
400
0
                return_error(gs_error_syntaxerror);
401
5.82k
            --level; break;
402
17.7k
        }
403
17.7k
        ptoken = &ignore_token;
404
17.7k
    } while (level);
405
6.06k
    return code;
406
6.06k
}
407
408
/* Replace object names with object references in a (parameter) string. */
409
static const byte *
410
pdfmark_next_object(const byte * scan, const byte * end, const byte **pname,
411
                    cos_object_t **ppco, gx_device_pdf * pdev)
412
28.4k
{
413
    /*
414
     * Starting at scan, find the next object reference, set *pname
415
     * to point to it in the string, store the object at *ppco,
416
     * and return a pointer to the first character beyond the
417
     * reference.  If there are no more object references, set
418
     * *pname = end, *ppco = 0, and return end.
419
     */
420
28.4k
    int code;
421
422
91.7k
    while ((code = pdf_scan_token(&scan, end, pname)) != 0) {
423
69.1k
        gs_param_string sname;
424
425
69.1k
        if (code < 0) {
426
3
            ++scan;
427
3
            continue;
428
3
        }
429
69.1k
        if (**pname != '{')
430
63.3k
            continue;
431
        /* Back up over the { and rescan as a single token. */
432
5.78k
        scan = *pname;
433
5.78k
        code = pdf_scan_token_composite(&scan, end, pname);
434
5.78k
        if (code < 0) {
435
0
            ++scan;
436
0
            continue;
437
0
        }
438
5.78k
        sname.data = *pname;
439
5.78k
        sname.size = scan - sname.data;
440
        /*
441
         * Forward references are allowed.  If there is an error,
442
         * simply retain the name as a literal string.
443
         */
444
5.78k
        code = pdf_refer_named(pdev, &sname, ppco);
445
5.78k
        if (code < 0)
446
0
            continue;
447
5.78k
        return scan;
448
5.78k
    }
449
22.6k
    *ppco = 0;
450
22.6k
    return end;
451
28.4k
}
452
int
453
pdf_replace_names(gx_device_pdf * pdev, const gs_param_string * from,
454
                  gs_param_string * to)
455
22.8k
{
456
22.8k
    const byte *start = from->data;
457
22.8k
    const byte *end = start + from->size;
458
22.8k
    const byte *scan, *to_free = NULL;
459
22.8k
    uint size = 0;
460
22.8k
    cos_object_t *pco;
461
22.8k
    bool any = false;
462
22.8k
    byte *sto;
463
22.8k
    char ref[1 + 10 + 5 + 1]; /* max obj number is 10 digits */
464
465
    /* Do a first pass to compute the length of the result. */
466
48.1k
    for (scan = start; scan < end;) {
467
25.3k
        const byte *sname;
468
25.3k
        const byte *next =
469
25.3k
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
470
471
25.3k
        size += sname - scan;
472
25.3k
        if (pco) {
473
2.89k
            gs_snprintf(ref, sizeof(ref), " %"PRId64" 0 R ", pco->id);
474
2.89k
            size += strlen(ref);
475
            /* Special 'name' escaping convention (see gs_pdfwr.ps, /.pdf===dict
476
             * the /nametype procedure). We do not want to write out the NULL
477
             * characters, we'll remove them in pass 2, for now don't count
478
             * them into the string size.
479
             */
480
2.89k
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00)
481
0
                size -= 3;
482
2.89k
        }
483
25.3k
        scan = next;
484
25.3k
        any |= next != sname;
485
25.3k
    }
486
22.8k
    to->persistent = true; /* ??? */
487
22.8k
    if (!any) {
488
22.3k
        if (to->data != start) {
489
0
            gs_free_object(pdev->pdf_memory, (byte *)to->data, "pdf_replace_names");
490
0
            to->data = start;
491
0
        }
492
22.3k
        to->size = size;
493
22.3k
        return 0;
494
22.3k
    }
495
532
    sto = gs_alloc_bytes(pdev->pdf_memory, size, "pdf_replace_names");
496
532
    if (sto == 0)
497
0
        return_error(gs_error_VMerror);
498
532
    to_free = to->data;
499
532
    to->data = sto;
500
532
    to->size = size;
501
    /* Do a second pass to do the actual substitutions. */
502
3.58k
    for (scan = start; scan < end;) {
503
3.05k
        const byte *sname;
504
3.05k
        const byte *next =
505
3.05k
            pdfmark_next_object(scan, end, &sname, &pco, pdev);
506
3.05k
        uint copy = sname - scan;
507
3.05k
        int rlen;
508
509
3.05k
        memcpy(sto, scan, copy);
510
3.05k
        sto += copy;
511
3.05k
        if (pco) {
512
2.89k
            gs_snprintf(ref, sizeof(ref), " %"PRId64" 0 R ", pco->id);
513
2.89k
            rlen = strlen(ref);
514
2.89k
            if (sname >= (start + 2) && sname[-1] == 0x00 && sname[-2] == 0x00 && next[0] == 0x00) {
515
0
                sto -= 2;
516
0
                next++;
517
0
            }
518
2.89k
            memcpy(sto, ref, rlen);
519
2.89k
            sto += rlen;
520
2.89k
        }
521
3.05k
        scan = next;
522
3.05k
    }
523
532
    gs_free_object(pdev->pdf_memory, (byte *)to_free, "pdf_replace_names");
524
532
    return 0;
525
532
}