Coverage Report

Created: 2026-04-01 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source
1
/* Copyright (C) 2020-2026 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
709M
{
39
709M
    int bytes = 0;
40
709M
    int code = 0;
41
42
709M
    switch(type) {
43
12.9M
        case PDF_ARRAY_MARK:
44
24.4M
        case PDF_DICT_MARK:
45
24.9M
        case PDF_PROC_MARK:
46
24.9M
            bytes = sizeof(pdf_obj);
47
24.9M
            break;
48
176M
        case PDF_INT:
49
301M
        case PDF_REAL:
50
301M
            bytes = sizeof(pdf_num);
51
301M
            break;
52
64.6M
        case PDF_STRING:
53
326M
        case PDF_NAME:
54
326M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
326M
            break;
56
39.7k
        case PDF_BUFFER:
57
39.7k
            bytes = sizeof(pdf_buffer);
58
39.7k
            break;
59
12.8M
        case PDF_ARRAY:
60
12.8M
            bytes = sizeof(pdf_array);
61
12.8M
            break;
62
11.8M
        case PDF_DICT:
63
11.8M
            bytes = sizeof(pdf_dict);
64
11.8M
            break;
65
15.4M
        case PDF_INDIRECT:
66
15.4M
            bytes = sizeof(pdf_indirect_ref);
67
15.4M
            break;
68
15.7M
        case PDF_KEYWORD:
69
15.7M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
15.7M
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
1.02M
        case PDF_STREAM:
78
1.02M
            bytes = sizeof(pdf_stream);
79
1.02M
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
709M
    }
86
709M
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
709M
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
709M
    memset(*obj, 0x00, bytes);
93
709M
    (*obj)->ctx = ctx;
94
709M
    (*obj)->type = type;
95
96
709M
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
176M
        case PDF_INT:
106
301M
        case PDF_REAL:
107
316M
        case PDF_INDIRECT:
108
329M
        case PDF_ARRAY_MARK:
109
341M
        case PDF_DICT_MARK:
110
341M
        case PDF_PROC_MARK:
111
341M
            break;
112
15.7M
        case PDF_KEYWORD:
113
80.4M
        case PDF_STRING:
114
342M
        case PDF_NAME:
115
342M
            ((pdf_string *)*obj)->length = size;
116
342M
            break;
117
39.7k
        case PDF_BUFFER:
118
39.7k
            {
119
39.7k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
39.7k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
39.7k
                else {
130
39.7k
                    b->data = NULL;
131
39.7k
                }
132
39.7k
                b->length = size;
133
39.7k
            }
134
0
            break;
135
12.8M
        case PDF_ARRAY:
136
12.8M
            {
137
12.8M
                pdf_obj **values = NULL;
138
139
12.8M
                ((pdf_array *)*obj)->size = size;
140
12.8M
                if (size > 0) {
141
11.9M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
11.9M
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
11.9M
                    ((pdf_array *)*obj)->values = values;
147
11.9M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
11.9M
                }
149
12.8M
            }
150
12.8M
            break;
151
12.8M
        case PDF_DICT:
152
11.8M
            {
153
11.8M
                pdf_dict_entry *entries = NULL;
154
155
11.8M
                ((pdf_dict *)*obj)->size = size;
156
11.8M
                if (size > 0) {
157
11.3M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
11.3M
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
11.3M
                    ((pdf_dict *)*obj)->list = entries;
163
11.3M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
11.3M
                }
165
11.8M
            }
166
11.8M
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
11.8M
        case PDF_XREF_TABLE:
171
0
            break;
172
1.02M
        default:
173
1.02M
            break;
174
709M
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
709M
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
709M
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
29.0k
{
192
29.0k
    uint64_t test = 0;
193
29.0k
    int code = 0;
194
195
29.0k
    test = (uint64_t)floor(d);
196
29.0k
    if (d == test) {
197
22.9k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
22.9k
        if (code < 0)
199
0
            return code;
200
22.9k
        (*num)->value.i = test;
201
22.9k
    }
202
6.13k
    else {
203
6.13k
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
6.13k
        if (code < 0)
205
0
            return code;
206
6.13k
        (*num)->value.d = d;
207
6.13k
    }
208
209
29.0k
    return 0;
210
29.0k
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
326M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
326M
    pdf_name *n = (pdf_name *)o;
224
225
326M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
326M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
15.7M
{
230
15.7M
    pdf_keyword *k = (pdf_keyword *)o;
231
232
15.7M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
15.7M
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
90.1k
{
237
90.1k
    xref_table_t *xref = (xref_table_t *)o;
238
239
90.1k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
90.1k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
90.1k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
1.02M
{
245
1.02M
    pdf_stream *stream = (pdf_stream *)o;
246
247
1.02M
    pdfi_countdown(stream->stream_dict);
248
1.02M
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
1.02M
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
39.7k
{
253
39.7k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
39.7k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
39.7k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
39.7k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
711M
{
261
711M
    if (o == NULL)
262
664k
        return;
263
710M
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
710M
    switch(o->type) {
266
12.9M
        case PDF_ARRAY_MARK:
267
24.4M
        case PDF_DICT_MARK:
268
24.9M
        case PDF_PROC_MARK:
269
201M
        case PDF_INT:
270
326M
        case PDF_REAL:
271
341M
        case PDF_INDIRECT:
272
341M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
341M
            break;
274
64.6M
        case PDF_STRING:
275
326M
        case PDF_NAME:
276
326M
            pdfi_free_namestring(o);
277
326M
            break;
278
39.7k
        case PDF_BUFFER:
279
39.7k
            pdfi_free_buffer(o);
280
39.7k
            break;
281
12.8M
        case PDF_ARRAY:
282
12.8M
            pdfi_free_array(o);
283
12.8M
            break;
284
11.7M
        case PDF_DICT:
285
11.7M
            pdfi_free_dict(o);
286
11.7M
            break;
287
1.02M
        case PDF_STREAM:
288
1.02M
            pdfi_free_stream(o);
289
1.02M
            break;
290
15.7M
        case PDF_KEYWORD:
291
15.7M
            pdfi_free_keyword(o);
292
15.7M
            break;
293
90.1k
        case PDF_XREF_TABLE:
294
90.1k
            pdfi_free_xref_table(o);
295
90.1k
            break;
296
615k
        case PDF_FONT:
297
615k
            pdfi_free_font(o);
298
615k
            break;
299
72.0k
        case PDF_CMAP:
300
72.0k
            pdfi_free_cmap(o);
301
72.0k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
38
        default:
307
38
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
38
            break;
309
710M
    }
310
710M
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
1.02M
{
320
1.02M
    int code = 0;
321
1.02M
    pdf_stream *new_stream = NULL;
322
323
1.02M
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
1.02M
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
1.02M
    if (code < 0)
328
0
        goto error_exit;
329
330
1.02M
    new_stream->ctx = ctx;
331
1.02M
    pdfi_countup(new_stream);
332
333
1.02M
    new_stream->stream_dict = dict;
334
1.02M
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
1.02M
    if (do_convert) {
340
982k
        new_stream->object_num = dict->object_num;
341
982k
        new_stream->generation_num = dict->generation_num;
342
982k
        dict->object_num = 0;
343
982k
        dict->generation_num = 0;
344
982k
    }
345
1.02M
    *stream = new_stream;
346
1.02M
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
1.02M
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
240
{
355
240
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
240
    pdfi_countup(*dict);
359
240
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
240
    return 0;
365
240
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
419
{
370
419
    int code;
371
419
    int length = strlen(charstr);
372
419
    pdf_string *newstr = NULL;
373
374
419
    *string = NULL;
375
376
419
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
419
    if (code < 0) goto exit;
378
379
419
    memcpy(newstr->data, (byte *)charstr, length);
380
381
419
    *string = newstr;
382
419
    pdfi_countup(newstr);
383
419
 exit:
384
419
    return code;
385
419
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
448k
{
390
448k
    int code;
391
448k
    int length = strlen(charstr);
392
448k
    pdf_name *newname = NULL;
393
394
448k
    *name = NULL;
395
396
448k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
448k
    if (code < 0) goto exit;
398
399
448k
    memcpy(newname->data, (byte *)charstr, length);
400
401
448k
    *name = newname;
402
448k
    pdfi_countup(newname);
403
448k
 exit:
404
448k
    return code;
405
448k
}
406
407
/************ bufstream module BEGIN **************/
408
133k
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
133k
{
419
133k
    stream->len = INIT_BUF_SIZE;
420
133k
    stream->cur = 0;
421
133k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
133k
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
133k
    return 0;
426
133k
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
133k
{
430
133k
    if (stream->data)
431
209
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
133k
    stream->len = 0;
433
133k
    stream->cur = 0;
434
133k
    stream->data = NULL;
435
133k
    return 0;
436
133k
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
133k
{
441
133k
    *buf = stream->data;
442
133k
    *len = stream->cur;
443
133k
    stream->len = 0;
444
133k
    stream->cur = 0;
445
133k
    stream->data = NULL;
446
133k
    return 0;
447
133k
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
6.22k
{
452
6.22k
    byte *data = NULL;
453
6.22k
    uint64_t newsize;
454
455
6.22k
    if (needed > max_int || stream->len > (max_int - needed) / 2)
456
0
        return_error(gs_error_rangecheck);
457
458
6.22k
    newsize = stream->len * 2 + needed;
459
6.22k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
460
6.22k
    if (!data)
461
0
        return_error(gs_error_VMerror);
462
463
6.22k
    memcpy(data, stream->data, stream->len);
464
6.22k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
465
6.22k
    stream->data = data;
466
6.22k
    stream->len = newsize;
467
468
6.22k
    return 0;
469
6.22k
}
470
471
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
472
1.66M
{
473
1.66M
    int code = 0;
474
475
1.66M
    if (stream->cur + len > stream->len) {
476
6.22k
        code = pdfi_bufstream_increase(ctx, stream, len);
477
6.22k
        if (code < 0)
478
0
            goto exit;
479
6.22k
    }
480
1.66M
    memcpy(stream->data + stream->cur, data, len);
481
1.66M
    stream->cur += len;
482
483
1.66M
 exit:
484
1.66M
    return code;
485
1.66M
}
486
487
/************ bufstream module END **************/
488
489
490
/* Create a c-string to use as object label
491
 * Uses the object_num to make it unique.
492
 * (don't call this for objects with object_num=0, though I am not going to check that here)
493
 *
494
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
495
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
496
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
497
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
498
 * object number and generation.
499
 */
500
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
501
16.7k
{
502
16.7k
    int code = 0, i;
503
16.7k
    int length;
504
16.7k
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
505
16.7k
    char *string = NULL;
506
16.7k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
507
16.7k
    uint32_t hash = 5381;
508
509
16.7k
    if (ctx->main_stream->s->file_name.data != NULL) {
510
0
        string = (char *)ctx->main_stream->s->file_name.data;
511
0
        length = ctx->main_stream->s->file_name.size;
512
513
0
        for (i=0;i < length;i++) {
514
#if ARCH_IS_BIG_ENDIAN
515
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
516
#else
517
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
518
0
#endif
519
0
        }
520
0
    }
521
522
16.7k
    *label = NULL;
523
16.7k
    length = strlen(template)+30;
524
525
16.7k
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
526
16.7k
    if (string == NULL) {
527
0
        code = gs_note_error(gs_error_VMerror);
528
0
        goto exit;
529
0
    }
530
531
16.7k
    if (pdfi_type_of(obj) == PDF_INDIRECT)
532
16.6k
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
533
48
    else
534
48
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
535
536
16.7k
    *label = string;
537
16.7k
 exit:
538
16.7k
    return code;
539
16.7k
}
540
541
/*********** BEGIN obj_to_string module ************/
542
543
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
544
545
/* Dispatch to get string representation of an object */
546
typedef struct {
547
    pdf_obj_type type;
548
    str_func func;
549
} obj_str_dispatch_t;
550
551
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
552
68
{
553
68
    int code = 0;
554
68
    int size = 12;
555
68
    byte *buf;
556
557
68
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
558
68
    if (buf == NULL)
559
0
        return_error(gs_error_VMerror);
560
68
    memcpy(buf, "/placeholder", size);
561
68
    *data = buf;
562
68
    *len = size;
563
68
    return code;
564
68
}
565
566
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
567
542k
{
568
542k
    int code = 0;
569
542k
    pdf_name *name = (pdf_name *)obj;
570
542k
    int size = name->length + 1;
571
542k
    byte *buf;
572
573
542k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
574
542k
    if (buf == NULL)
575
0
        return_error(gs_error_VMerror);
576
542k
    buf[0] = '/';
577
542k
    memcpy(buf+1, name->data, name->length);
578
542k
    *data = buf;
579
542k
    *len = size;
580
542k
    return code;
581
542k
}
582
583
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
584
190k
{
585
190k
    int code = 0;
586
190k
    int size = 15;
587
190k
    pdf_num *number = (pdf_num *)obj;
588
190k
    char *buf;
589
590
190k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
591
190k
    if (buf == NULL)
592
0
        return_error(gs_error_VMerror);
593
190k
    snprintf(buf, size, "%.4f", number->value.d);
594
190k
    *data = (byte *)buf;
595
190k
    *len = strlen(buf);
596
190k
    return code;
597
190k
}
598
599
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
600
527k
{
601
527k
    int code = 0;
602
527k
    int size = 15;
603
527k
    pdf_num *number = (pdf_num *)obj;
604
527k
    char *buf;
605
606
527k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
607
527k
    if (buf == NULL)
608
0
        return_error(gs_error_VMerror);
609
527k
    snprintf(buf, size, "%"PRId64"", number->value.i);
610
527k
    *data = (byte *)buf;
611
527k
    *len = strlen(buf);
612
527k
    return code;
613
527k
}
614
615
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
616
14.6k
{
617
14.6k
    int size = 100;
618
14.6k
    char *buf;
619
620
14.6k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
621
14.6k
    if (buf == NULL)
622
0
        return_error(gs_error_VMerror);
623
14.6k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
624
14.6k
    *data = (byte *)buf;
625
14.6k
    *len = strlen(buf);
626
14.6k
    return 0;
627
14.6k
}
628
629
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
630
31.9k
{
631
31.9k
    int code = 0;
632
31.9k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
633
31.9k
    char *buf;
634
31.9k
    pdf_obj *object = NULL;
635
31.9k
    bool use_label = true;
636
637
31.9k
    code = pdfi_loop_detector_mark(ctx);
638
31.9k
    if (code < 0)
639
0
        return code;
640
641
31.9k
    if (ref->is_highlevelform) {
642
13.5k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
643
13.5k
        ref->is_highlevelform = false;
644
18.3k
    } else {
645
18.3k
        if (!ref->is_marking) {
646
8.45k
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
647
8.45k
            if (code == gs_error_undefined) {
648
                /* Do something sensible for undefined reference (this would be a broken file) */
649
                /* TODO: Flag an error? */
650
1.08k
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
651
1.08k
                goto exit;
652
1.08k
            }
653
7.37k
            if (code < 0 && code != gs_error_circular_reference)
654
121
                goto exit;
655
7.25k
            if (code == 0) {
656
6.81k
                if (pdfi_type_of(object) == PDF_STREAM) {
657
514
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
658
514
                    if (code < 0) goto exit;
659
6.30k
                } else if (pdfi_type_of(object) == PDF_DICT) {
660
5.94k
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
661
5.94k
                    if (code < 0) goto exit;
662
5.94k
                } else {
663
358
                    code = pdfi_obj_to_string(ctx, object, data, len);
664
358
                    if (code < 0) goto exit;
665
338
                    use_label = false;
666
338
                }
667
6.81k
            }
668
7.25k
        }
669
17.0k
        if (use_label) {
670
16.6k
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
671
16.6k
            if (code < 0) goto exit;
672
16.6k
            *data = (byte *)buf;
673
16.6k
            *len = strlen(buf);
674
16.6k
        }
675
17.0k
    }
676
677
31.9k
 exit:
678
31.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
679
31.9k
    pdfi_countdown(object);
680
31.9k
    return code;
681
31.9k
}
682
683
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
684
21.8k
{
685
21.8k
    int code = 0;
686
21.8k
    int size = 5;
687
21.8k
    char *buf;
688
689
21.8k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
690
21.8k
    if (buf == NULL)
691
0
        return_error(gs_error_VMerror);
692
21.8k
    if (obj == PDF_TRUE_OBJ) {
693
726
        memcpy(buf, (byte *)"true", 4);
694
726
        *len = 4;
695
21.1k
    } else {
696
21.1k
        memcpy(buf, (byte *)"false", 5);
697
21.1k
        *len = 5;
698
21.1k
    }
699
21.8k
    *data = (byte *)buf;
700
21.8k
    return code;
701
21.8k
}
702
703
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
704
799
{
705
799
    int code = 0;
706
799
    int size = 4;
707
799
    char *buf;
708
709
799
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
710
799
    if (buf == NULL)
711
0
        return_error(gs_error_VMerror);
712
799
    memcpy(buf, (byte *)"null", 4);
713
799
    *len = 4;
714
799
    *data = (byte *)buf;
715
799
    return code;
716
799
}
717
718
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
719
77.3k
{
720
77.3k
    pdf_string *string = (pdf_string *)obj;
721
77.3k
    char *buf;
722
77.3k
    int i, length = 0, j;
723
724
2.82M
    for (j=0;j<string->length;j++) {
725
2.74M
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
726
4.26k
                length += 2;
727
2.74M
        else {
728
2.74M
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
729
120k
                length += 4;
730
2.62M
            else
731
2.62M
                length++;
732
2.74M
        }
733
2.74M
    }
734
77.3k
    length += 2;
735
77.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
736
77.3k
    if (buf == NULL)
737
0
        return_error(gs_error_VMerror);
738
77.3k
    buf[0] = '(';
739
77.3k
    i = 1;
740
2.82M
    for (j=0;j<string->length;j++) {
741
2.74M
        switch(string->data[j]) {
742
633
            case 0x0a:
743
633
                buf[i++] = '\\';
744
633
                buf[i++] = 'n';
745
633
                break;
746
711
            case 0x0d:
747
711
                buf[i++] = '\\';
748
711
                buf[i++] = 'r';
749
711
                break;
750
1.38k
            case '(':
751
2.89k
            case ')':
752
2.92k
            case '\\':
753
2.92k
                buf[i++] = '\\';
754
2.92k
                buf[i++] = string->data[j];
755
2.92k
                break;
756
2.74M
            default:
757
2.74M
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
758
120k
                    buf[i++] = '\\';
759
120k
                    buf[i++] = (string->data[j] >> 6) + 0x30;
760
120k
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
761
120k
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
762
120k
                } else
763
2.62M
                buf[i++] = string->data[j];
764
2.74M
                break;
765
2.74M
        }
766
2.74M
    }
767
77.3k
    buf[i++] = ')';
768
769
77.3k
    *len = i;
770
77.3k
    *data = (byte *)buf;
771
77.3k
    return 0;
772
77.3k
}
773
774
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
775
94.3k
{
776
94.3k
    int code = 0;
777
94.3k
    pdf_array *array = (pdf_array *)obj;
778
94.3k
    pdf_obj *object = NULL;
779
94.3k
    byte *itembuf = NULL;
780
94.3k
    int itemsize;
781
94.3k
    pdfi_bufstream_t bufstream;
782
94.3k
    uint64_t index, arraysize;
783
784
94.3k
    code = pdfi_bufstream_init(ctx, &bufstream);
785
94.3k
    if (code < 0) goto exit;
786
787
94.3k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
788
94.3k
    if (code < 0) goto exit;
789
790
94.3k
    arraysize = pdfi_array_size(array);
791
765k
    for (index = 0; index < arraysize; index++) {
792
671k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
793
671k
        if (code < 0) goto exit;
794
795
671k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
796
671k
        if (code < 0) goto exit;
797
798
671k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
799
671k
        if (code < 0) goto exit;
800
801
671k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
802
671k
        itembuf = NULL;
803
671k
        itemsize = 0;
804
671k
        pdfi_countdown(object);
805
671k
        object = NULL;
806
807
        /* Put a space between elements unless last item */
808
671k
        if (index+1 != arraysize) {
809
577k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
810
577k
            if (code < 0) goto exit;
811
577k
        }
812
671k
    }
813
814
94.2k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
815
94.2k
    if (code < 0) goto exit;
816
817
    /* Now copy the results out into the string we can keep */
818
94.2k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
819
820
94.3k
 exit:
821
94.3k
    if (itembuf)
822
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
823
94.3k
    pdfi_bufstream_free(ctx, &bufstream);
824
94.3k
    pdfi_countdown(object);
825
94.3k
    return code;
826
94.2k
}
827
828
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
829
533
{
830
533
    int code = 0;
831
533
    byte *buf;
832
533
    pdf_stream *stream = (pdf_stream *)obj;
833
533
    int64_t bufsize = 0;
834
533
    pdf_indirect_ref *streamref = NULL;
835
836
    /* TODO: How to deal with stream dictionaries?
837
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
838
     * See 'pushpin' annotation in annotations-galore_II.ps
839
     *
840
     * This will just literally grab the stream data.
841
     */
842
533
    if (stream->is_marking) {
843
466
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
844
466
        if (code < 0) goto exit;
845
466
        *data = buf;
846
466
        *len = (int)bufsize;
847
466
    } else {
848
        /* Create an indirect ref for the stream */
849
67
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
850
67
        if (code < 0) goto exit;
851
67
        pdfi_countup(streamref);
852
67
        streamref->ref_object_num = stream->object_num;
853
67
        streamref->ref_generation_num = stream->generation_num;
854
67
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
855
67
    }
856
857
533
 exit:
858
533
    pdfi_countdown(streamref);
859
533
    return code;
860
533
}
861
862
/* This fetches without dereferencing.  If you want to see the references inline,
863
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
864
 */
865
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
866
39.3k
{
867
39.3k
    int code = 0;
868
39.3k
    pdf_dict *dict = (pdf_dict *)obj;
869
39.3k
    pdf_name *Key = NULL;
870
39.3k
    pdf_obj *Value = NULL;
871
39.3k
    byte *itembuf = NULL;
872
39.3k
    int itemsize;
873
39.3k
    pdfi_bufstream_t bufstream;
874
39.3k
    uint64_t index, dictsize;
875
39.3k
    uint64_t itemnum = 0;
876
877
39.3k
    code = pdfi_loop_detector_mark(ctx);
878
39.3k
    if (code < 0)
879
0
        return code;
880
881
39.3k
    code = pdfi_bufstream_init(ctx, &bufstream);
882
39.3k
    if (code < 0) goto exit;
883
884
39.3k
    dictsize = pdfi_dict_entries(dict);
885
    /* Handle empty dict specially */
886
39.3k
    if (dictsize == 0) {
887
5
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
888
5
        if (code < 0)
889
0
            goto exit;
890
5
        goto exit_copy;
891
5
    }
892
893
39.3k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
894
39.3k
    if (code < 0) goto exit;
895
896
    /* Note: We specifically fetch without dereferencing, so there will be no circular
897
     * references to handle here.
898
     */
899
    /* Wrong.... */
900
901
39.3k
    if (dict->object_num !=0 ) {
902
13.8k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
903
4
            code = gs_note_error(gs_error_circular_reference);
904
4
            goto exit;
905
4
        }
906
13.8k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
907
13.8k
        if (code < 0)
908
0
            goto exit;
909
13.8k
    }
910
911
    /* Get each (key,val) pair from dict and setup param for it */
912
39.3k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
913
48.2k
    while (code >= 0) {
914
48.2k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
915
48.2k
        if (code < 0) goto exit;
916
917
48.2k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
918
48.2k
        if (code < 0) goto exit;
919
920
48.2k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
921
48.2k
        itembuf = NULL;
922
48.2k
        itemsize = 0;
923
924
        /* Put a space between elements */
925
48.2k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
926
48.2k
        if (code < 0) goto exit;
927
928
        /* No dereference */
929
48.2k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
930
48.2k
        if (code < 0) goto exit;
931
48.2k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
932
48.2k
        if (code < 0) goto exit;
933
934
48.1k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
935
48.1k
        if (code < 0) goto exit;
936
937
48.1k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
938
48.1k
        itembuf = NULL;
939
48.1k
        itemsize = 0;
940
941
48.1k
        pdfi_countdown(Value);
942
48.1k
        Value = NULL;
943
48.1k
        pdfi_countdown(Key);
944
48.1k
        Key = NULL;
945
946
48.1k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
947
48.1k
        if (code == gs_error_undefined) {
948
39.2k
            code = 0;
949
39.2k
            break;
950
39.2k
        }
951
8.88k
        if (code < 0) goto exit;
952
953
        /* Put a space between elements */
954
8.88k
        if (++itemnum != dictsize) {
955
8.88k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
956
8.88k
            if (code < 0) goto exit;
957
8.88k
        }
958
8.88k
    }
959
39.2k
    if (code < 0) goto exit;
960
961
39.2k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
962
39.2k
    if (code < 0) goto exit;
963
964
39.2k
 exit_copy:
965
    /* Now copy the results out into the string we can keep */
966
39.2k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
967
968
39.3k
 exit:
969
39.3k
    if (itembuf)
970
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
971
39.3k
    pdfi_countdown(Key);
972
39.3k
    pdfi_countdown(Value);
973
39.3k
    pdfi_bufstream_free(ctx, &bufstream);
974
39.3k
    if (code < 0)
975
132
        (void)pdfi_loop_detector_cleartomark(ctx);
976
39.2k
    else
977
39.2k
        code = pdfi_loop_detector_cleartomark(ctx);
978
39.3k
    return code;
979
39.2k
}
980
981
#define PARAM1(A) # A,
982
#define PARAM2(A,B) A,
983
static const char pdf_token_strings[][10] = {
984
#include "pdf_tokens.h"
985
};
986
987
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
988
0
{
989
0
    int code = 0;
990
0
    const char *s = pdf_token_strings[(uintptr_t)obj];
991
0
    int size = (int)strlen(s) + 1;
992
0
    byte *buf;
993
994
0
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
995
0
    if (buf == NULL)
996
0
        return_error(gs_error_VMerror);
997
0
    memcpy(buf, s, size);
998
0
    *data = buf;
999
0
    *len = size;
1000
0
    return code;
1001
0
}
1002
1003
obj_str_dispatch_t obj_str_dispatch[] = {
1004
    {PDF_NAME, pdfi_obj_name_str},
1005
    {PDF_ARRAY, pdfi_obj_array_str},
1006
    {PDF_REAL, pdfi_obj_real_str},
1007
    {PDF_INT, pdfi_obj_int_str},
1008
    {PDF_BOOL, pdfi_obj_bool_str},
1009
    {PDF_STRING, pdfi_obj_string_str},
1010
    {PDF_DICT, pdfi_obj_dict_str},
1011
    {PDF_STREAM, pdfi_obj_stream_str},
1012
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1013
    {PDF_NULL, pdfi_obj_null_str},
1014
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1015
    {0, NULL}
1016
};
1017
1018
/* Recursive function to build a string from an object
1019
 */
1020
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1021
1.52M
{
1022
1.52M
    obj_str_dispatch_t *dispatch_ptr;
1023
1.52M
    int code = 0;
1024
1.52M
    pdf_obj_type type;
1025
1026
1.52M
    *data = NULL;
1027
1.52M
    *len = 0;
1028
1.52M
    type = pdfi_type_of(obj);
1029
4.56M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1030
4.56M
        if (type == dispatch_ptr->type) {
1031
1.52M
            code = dispatch_ptr->func(ctx, obj, data, len);
1032
1.52M
            goto exit;
1033
1.52M
        }
1034
4.56M
    }
1035
    /* Not implemented, use default */
1036
68
    code = pdfi_obj_default_str(ctx, obj, data, len);
1037
1.52M
 exit:
1038
1.52M
    return code;
1039
68
}
1040
1041
/*********** END obj_to_string module ************/