Coverage Report

Created: 2025-11-16 07:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
1.09G
{
39
1.09G
    int bytes = 0;
40
1.09G
    int code = 0;
41
42
1.09G
    switch(type) {
43
20.6M
        case PDF_ARRAY_MARK:
44
38.4M
        case PDF_DICT_MARK:
45
39.1M
        case PDF_PROC_MARK:
46
39.1M
            bytes = sizeof(pdf_obj);
47
39.1M
            break;
48
290M
        case PDF_INT:
49
496M
        case PDF_REAL:
50
496M
            bytes = sizeof(pdf_num);
51
496M
            break;
52
96.2M
        case PDF_STRING:
53
476M
        case PDF_NAME:
54
476M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
476M
            break;
56
57.3k
        case PDF_BUFFER:
57
57.3k
            bytes = sizeof(pdf_buffer);
58
57.3k
            break;
59
20.5M
        case PDF_ARRAY:
60
20.5M
            bytes = sizeof(pdf_array);
61
20.5M
            break;
62
18.6M
        case PDF_DICT:
63
18.6M
            bytes = sizeof(pdf_dict);
64
18.6M
            break;
65
25.0M
        case PDF_INDIRECT:
66
25.0M
            bytes = sizeof(pdf_indirect_ref);
67
25.0M
            break;
68
21.1M
        case PDF_KEYWORD:
69
21.1M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
21.1M
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
1.49M
        case PDF_STREAM:
78
1.49M
            bytes = sizeof(pdf_stream);
79
1.49M
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
1.09G
    }
86
1.09G
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
1.09G
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
1.09G
    memset(*obj, 0x00, bytes);
93
1.09G
    (*obj)->ctx = ctx;
94
1.09G
    (*obj)->type = type;
95
96
1.09G
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
290M
        case PDF_INT:
106
496M
        case PDF_REAL:
107
522M
        case PDF_INDIRECT:
108
542M
        case PDF_ARRAY_MARK:
109
560M
        case PDF_DICT_MARK:
110
561M
        case PDF_PROC_MARK:
111
561M
            break;
112
21.1M
        case PDF_KEYWORD:
113
117M
        case PDF_STRING:
114
497M
        case PDF_NAME:
115
497M
            ((pdf_string *)*obj)->length = size;
116
497M
            break;
117
57.3k
        case PDF_BUFFER:
118
57.3k
            {
119
57.3k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
57.3k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
57.3k
                else {
130
57.3k
                    b->data = NULL;
131
57.3k
                }
132
57.3k
                b->length = size;
133
57.3k
            }
134
0
            break;
135
20.5M
        case PDF_ARRAY:
136
20.5M
            {
137
20.5M
                pdf_obj **values = NULL;
138
139
20.5M
                ((pdf_array *)*obj)->size = size;
140
20.5M
                if (size > 0) {
141
19.3M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
19.3M
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
19.3M
                    ((pdf_array *)*obj)->values = values;
147
19.3M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
19.3M
                }
149
20.5M
            }
150
20.5M
            break;
151
20.5M
        case PDF_DICT:
152
18.6M
            {
153
18.6M
                pdf_dict_entry *entries = NULL;
154
155
18.6M
                ((pdf_dict *)*obj)->size = size;
156
18.6M
                if (size > 0) {
157
17.8M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
17.8M
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
17.8M
                    ((pdf_dict *)*obj)->list = entries;
163
17.8M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
17.8M
                }
165
18.6M
            }
166
18.6M
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
18.6M
        case PDF_XREF_TABLE:
171
0
            break;
172
1.49M
        default:
173
1.49M
            break;
174
1.09G
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
1.09G
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
1.09G
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
38.8k
{
192
38.8k
    uint64_t test = 0;
193
38.8k
    int code = 0;
194
195
38.8k
    test = (uint64_t)floor(d);
196
38.8k
    if (d == test) {
197
31.1k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
31.1k
        if (code < 0)
199
0
            return code;
200
31.1k
        (*num)->value.i = test;
201
31.1k
    }
202
7.70k
    else {
203
7.70k
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
7.70k
        if (code < 0)
205
0
            return code;
206
7.70k
        (*num)->value.d = d;
207
7.70k
    }
208
209
38.8k
    return 0;
210
38.8k
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
476M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
476M
    pdf_name *n = (pdf_name *)o;
224
225
476M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
476M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
21.1M
{
230
21.1M
    pdf_keyword *k = (pdf_keyword *)o;
231
232
21.1M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
21.1M
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
106k
{
237
106k
    xref_table_t *xref = (xref_table_t *)o;
238
239
106k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
106k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
106k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
1.49M
{
245
1.49M
    pdf_stream *stream = (pdf_stream *)o;
246
247
1.49M
    pdfi_countdown(stream->stream_dict);
248
1.49M
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
1.49M
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
57.3k
{
253
57.3k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
57.3k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
57.3k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
57.3k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
1.10G
{
261
1.10G
    if (o == NULL)
262
1.01M
        return;
263
1.10G
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
1.10G
    switch(o->type) {
266
20.6M
        case PDF_ARRAY_MARK:
267
38.4M
        case PDF_DICT_MARK:
268
39.1M
        case PDF_PROC_MARK:
269
329M
        case PDF_INT:
270
536M
        case PDF_REAL:
271
561M
        case PDF_INDIRECT:
272
561M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
561M
            break;
274
96.2M
        case PDF_STRING:
275
476M
        case PDF_NAME:
276
476M
            pdfi_free_namestring(o);
277
476M
            break;
278
57.3k
        case PDF_BUFFER:
279
57.3k
            pdfi_free_buffer(o);
280
57.3k
            break;
281
20.5M
        case PDF_ARRAY:
282
20.5M
            pdfi_free_array(o);
283
20.5M
            break;
284
18.6M
        case PDF_DICT:
285
18.6M
            pdfi_free_dict(o);
286
18.6M
            break;
287
1.49M
        case PDF_STREAM:
288
1.49M
            pdfi_free_stream(o);
289
1.49M
            break;
290
21.1M
        case PDF_KEYWORD:
291
21.1M
            pdfi_free_keyword(o);
292
21.1M
            break;
293
106k
        case PDF_XREF_TABLE:
294
106k
            pdfi_free_xref_table(o);
295
106k
            break;
296
1.21M
        case PDF_FONT:
297
1.21M
            pdfi_free_font(o);
298
1.21M
            break;
299
102k
        case PDF_CMAP:
300
102k
            pdfi_free_cmap(o);
301
102k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
98
        default:
307
98
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
98
            break;
309
1.10G
    }
310
1.10G
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
1.49M
{
320
1.49M
    int code = 0;
321
1.49M
    pdf_stream *new_stream = NULL;
322
323
1.49M
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
1.49M
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
1.49M
    if (code < 0)
328
0
        goto error_exit;
329
330
1.49M
    new_stream->ctx = ctx;
331
1.49M
    pdfi_countup(new_stream);
332
333
1.49M
    new_stream->stream_dict = dict;
334
1.49M
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
1.49M
    if (do_convert) {
340
1.41M
        new_stream->object_num = dict->object_num;
341
1.41M
        new_stream->generation_num = dict->generation_num;
342
1.41M
        dict->object_num = 0;
343
1.41M
        dict->generation_num = 0;
344
1.41M
    }
345
1.49M
    *stream = new_stream;
346
1.49M
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
1.49M
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
306
{
355
306
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
306
    pdfi_countup(*dict);
359
306
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
306
    return 0;
365
306
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
675
{
370
675
    int code;
371
675
    int length = strlen(charstr);
372
675
    pdf_string *newstr = NULL;
373
374
675
    *string = NULL;
375
376
675
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
675
    if (code < 0) goto exit;
378
379
675
    memcpy(newstr->data, (byte *)charstr, length);
380
381
675
    *string = newstr;
382
675
    pdfi_countup(newstr);
383
675
 exit:
384
675
    return code;
385
675
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
986k
{
390
986k
    int code;
391
986k
    int length = strlen(charstr);
392
986k
    pdf_name *newname = NULL;
393
394
986k
    *name = NULL;
395
396
986k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
986k
    if (code < 0) goto exit;
398
399
986k
    memcpy(newname->data, (byte *)charstr, length);
400
401
986k
    *name = newname;
402
986k
    pdfi_countup(newname);
403
986k
 exit:
404
986k
    return code;
405
986k
}
406
407
/************ bufstream module BEGIN **************/
408
178k
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
178k
{
419
178k
    stream->len = INIT_BUF_SIZE;
420
178k
    stream->cur = 0;
421
178k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
178k
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
178k
    return 0;
426
178k
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
178k
{
430
178k
    if (stream->data)
431
353
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
178k
    stream->len = 0;
433
178k
    stream->cur = 0;
434
178k
    stream->data = NULL;
435
178k
    return 0;
436
178k
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
178k
{
441
178k
    *buf = stream->data;
442
178k
    *len = stream->cur;
443
178k
    stream->len = 0;
444
178k
    stream->cur = 0;
445
178k
    stream->data = NULL;
446
178k
    return 0;
447
178k
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
7.34k
{
452
7.34k
    byte *data = NULL;
453
7.34k
    uint64_t newsize;
454
455
7.34k
    newsize = stream->len * 2 + needed;
456
7.34k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
457
7.34k
    if (!data)
458
0
        return_error(gs_error_VMerror);
459
460
7.34k
    memcpy(data, stream->data, stream->len);
461
7.34k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
462
7.34k
    stream->data = data;
463
7.34k
    stream->len = newsize;
464
465
7.34k
    return 0;
466
7.34k
}
467
468
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
469
2.10M
{
470
2.10M
    int code = 0;
471
472
2.10M
    if (stream->cur + len > stream->len) {
473
7.34k
        code = pdfi_bufstream_increase(ctx, stream, len);
474
7.34k
        if (code < 0)
475
0
            goto exit;
476
7.34k
    }
477
2.10M
    memcpy(stream->data + stream->cur, data, len);
478
2.10M
    stream->cur += len;
479
480
2.10M
 exit:
481
2.10M
    return code;
482
2.10M
}
483
484
/************ bufstream module END **************/
485
486
487
/* Create a c-string to use as object label
488
 * Uses the object_num to make it unique.
489
 * (don't call this for objects with object_num=0, though I am not going to check that here)
490
 *
491
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
492
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
493
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
494
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
495
 * object number and generation.
496
 */
497
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
498
47.3k
{
499
47.3k
    int code = 0, i;
500
47.3k
    int length;
501
47.3k
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
502
47.3k
    char *string = NULL;
503
47.3k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
504
47.3k
    uint32_t hash = 5381;
505
506
47.3k
    if (ctx->main_stream->s->file_name.data != NULL) {
507
0
        string = (char *)ctx->main_stream->s->file_name.data;
508
0
        length = ctx->main_stream->s->file_name.size;
509
510
0
        for (i=0;i < length;i++) {
511
#if ARCH_IS_BIG_ENDIAN
512
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
513
#else
514
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
515
0
#endif
516
0
        }
517
0
    }
518
519
47.3k
    *label = NULL;
520
47.3k
    length = strlen(template)+30;
521
522
47.3k
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
523
47.3k
    if (string == NULL) {
524
0
        code = gs_note_error(gs_error_VMerror);
525
0
        goto exit;
526
0
    }
527
528
47.3k
    if (pdfi_type_of(obj) == PDF_INDIRECT)
529
47.3k
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
530
77
    else
531
77
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
532
533
47.3k
    *label = string;
534
47.3k
 exit:
535
47.3k
    return code;
536
47.3k
}
537
538
/*********** BEGIN obj_to_string module ************/
539
540
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
541
542
/* Dispatch to get string representation of an object */
543
typedef struct {
544
    pdf_obj_type type;
545
    str_func func;
546
} obj_str_dispatch_t;
547
548
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
549
141
{
550
141
    int code = 0;
551
141
    int size = 12;
552
141
    byte *buf;
553
554
141
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
555
141
    if (buf == NULL)
556
0
        return_error(gs_error_VMerror);
557
141
    memcpy(buf, "/placeholder", size);
558
141
    *data = buf;
559
141
    *len = size;
560
141
    return code;
561
141
}
562
563
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
564
712k
{
565
712k
    int code = 0;
566
712k
    pdf_name *name = (pdf_name *)obj;
567
712k
    int size = name->length + 1;
568
712k
    byte *buf;
569
570
712k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
571
712k
    if (buf == NULL)
572
0
        return_error(gs_error_VMerror);
573
712k
    buf[0] = '/';
574
712k
    memcpy(buf+1, name->data, name->length);
575
712k
    *data = buf;
576
712k
    *len = size;
577
712k
    return code;
578
712k
}
579
580
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
581
231k
{
582
231k
    int code = 0;
583
231k
    int size = 15;
584
231k
    pdf_num *number = (pdf_num *)obj;
585
231k
    char *buf;
586
587
231k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
588
231k
    if (buf == NULL)
589
0
        return_error(gs_error_VMerror);
590
231k
    snprintf(buf, size, "%.4f", number->value.d);
591
231k
    *data = (byte *)buf;
592
231k
    *len = strlen(buf);
593
231k
    return code;
594
231k
}
595
596
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
597
637k
{
598
637k
    int code = 0;
599
637k
    int size = 15;
600
637k
    pdf_num *number = (pdf_num *)obj;
601
637k
    char *buf;
602
603
637k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
604
637k
    if (buf == NULL)
605
0
        return_error(gs_error_VMerror);
606
637k
    snprintf(buf, size, "%"PRId64"", number->value.i);
607
637k
    *data = (byte *)buf;
608
637k
    *len = strlen(buf);
609
637k
    return code;
610
637k
}
611
612
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
613
17.3k
{
614
17.3k
    int size = 100;
615
17.3k
    char *buf;
616
617
17.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
618
17.3k
    if (buf == NULL)
619
0
        return_error(gs_error_VMerror);
620
17.3k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
621
17.3k
    *data = (byte *)buf;
622
17.3k
    *len = strlen(buf);
623
17.3k
    return 0;
624
17.3k
}
625
626
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
627
65.6k
{
628
65.6k
    int code = 0;
629
65.6k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
630
65.6k
    char *buf;
631
65.6k
    pdf_obj *object = NULL;
632
65.6k
    bool use_label = true;
633
634
65.6k
    code = pdfi_loop_detector_mark(ctx);
635
65.6k
    if (code < 0)
636
0
        return code;
637
638
65.6k
    if (ref->is_highlevelform) {
639
16.1k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
640
16.1k
        ref->is_highlevelform = false;
641
49.4k
    } else {
642
49.4k
        if (!ref->is_marking) {
643
21.9k
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
644
21.9k
            if (code == gs_error_undefined) {
645
                /* Do something sensible for undefined reference (this would be a broken file) */
646
                /* TODO: Flag an error? */
647
1.17k
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
648
1.17k
                goto exit;
649
1.17k
            }
650
20.7k
            if (code < 0 && code != gs_error_circular_reference)
651
196
                goto exit;
652
20.5k
            if (code == 0) {
653
20.0k
                if (pdfi_type_of(object) == PDF_STREAM) {
654
642
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
655
642
                    if (code < 0) goto exit;
656
19.4k
                } else if (pdfi_type_of(object) == PDF_DICT) {
657
18.7k
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
658
18.7k
                    if (code < 0) goto exit;
659
18.7k
                } else {
660
674
                    code = pdfi_obj_to_string(ctx, object, data, len);
661
674
                    if (code < 0) goto exit;
662
649
                    use_label = false;
663
649
                }
664
20.0k
            }
665
20.5k
        }
666
47.9k
        if (use_label) {
667
47.3k
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
668
47.3k
            if (code < 0) goto exit;
669
47.3k
            *data = (byte *)buf;
670
47.3k
            *len = strlen(buf);
671
47.3k
        }
672
47.9k
    }
673
674
65.6k
 exit:
675
65.6k
    (void)pdfi_loop_detector_cleartomark(ctx);
676
65.6k
    pdfi_countdown(object);
677
65.6k
    return code;
678
65.6k
}
679
680
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
681
25.2k
{
682
25.2k
    int code = 0;
683
25.2k
    int size = 5;
684
25.2k
    char *buf;
685
686
25.2k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
687
25.2k
    if (buf == NULL)
688
0
        return_error(gs_error_VMerror);
689
25.2k
    if (obj == PDF_TRUE_OBJ) {
690
963
        memcpy(buf, (byte *)"true", 4);
691
963
        *len = 4;
692
24.2k
    } else {
693
24.2k
        memcpy(buf, (byte *)"false", 5);
694
24.2k
        *len = 5;
695
24.2k
    }
696
25.2k
    *data = (byte *)buf;
697
25.2k
    return code;
698
25.2k
}
699
700
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
701
1.06k
{
702
1.06k
    int code = 0;
703
1.06k
    int size = 4;
704
1.06k
    char *buf;
705
706
1.06k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
707
1.06k
    if (buf == NULL)
708
0
        return_error(gs_error_VMerror);
709
1.06k
    memcpy(buf, (byte *)"null", 4);
710
1.06k
    *len = 4;
711
1.06k
    *data = (byte *)buf;
712
1.06k
    return code;
713
1.06k
}
714
715
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
716
98.3k
{
717
98.3k
    pdf_string *string = (pdf_string *)obj;
718
98.3k
    char *buf;
719
98.3k
    int i, length = 0, j;
720
721
3.64M
    for (j=0;j<string->length;j++) {
722
3.54M
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
723
5.98k
                length += 2;
724
3.54M
        else {
725
3.54M
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
726
142k
                length += 4;
727
3.40M
            else
728
3.40M
                length++;
729
3.54M
        }
730
3.54M
    }
731
98.3k
    length += 2;
732
98.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
733
98.3k
    if (buf == NULL)
734
0
        return_error(gs_error_VMerror);
735
98.3k
    buf[0] = '(';
736
98.3k
    i = 1;
737
3.64M
    for (j=0;j<string->length;j++) {
738
3.54M
        switch(string->data[j]) {
739
956
            case 0x0a:
740
956
                buf[i++] = '\\';
741
956
                buf[i++] = 'n';
742
956
                break;
743
821
            case 0x0d:
744
821
                buf[i++] = '\\';
745
821
                buf[i++] = 'r';
746
821
                break;
747
2.00k
            case '(':
748
4.16k
            case ')':
749
4.20k
            case '\\':
750
4.20k
                buf[i++] = '\\';
751
4.20k
                buf[i++] = string->data[j];
752
4.20k
                break;
753
3.54M
            default:
754
3.54M
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
755
142k
                    buf[i++] = '\\';
756
142k
                    buf[i++] = (string->data[j] >> 6) + 0x30;
757
142k
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
758
142k
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
759
142k
                } else
760
3.40M
                buf[i++] = string->data[j];
761
3.54M
                break;
762
3.54M
        }
763
3.54M
    }
764
98.3k
    buf[i++] = ')';
765
766
98.3k
    *len = i;
767
98.3k
    *data = (byte *)buf;
768
98.3k
    return 0;
769
98.3k
}
770
771
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
772
114k
{
773
114k
    int code = 0;
774
114k
    pdf_array *array = (pdf_array *)obj;
775
114k
    pdf_obj *object = NULL;
776
114k
    byte *itembuf = NULL;
777
114k
    int itemsize;
778
114k
    pdfi_bufstream_t bufstream;
779
114k
    uint64_t index, arraysize;
780
781
114k
    code = pdfi_bufstream_init(ctx, &bufstream);
782
114k
    if (code < 0) goto exit;
783
784
114k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
785
114k
    if (code < 0) goto exit;
786
787
114k
    arraysize = pdfi_array_size(array);
788
923k
    for (index = 0; index < arraysize; index++) {
789
809k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
790
809k
        if (code < 0) goto exit;
791
792
809k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
793
809k
        if (code < 0) goto exit;
794
795
808k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
796
808k
        if (code < 0) goto exit;
797
798
808k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
799
808k
        itembuf = NULL;
800
808k
        itemsize = 0;
801
808k
        pdfi_countdown(object);
802
808k
        object = NULL;
803
804
        /* Put a space between elements unless last item */
805
808k
        if (index+1 != arraysize) {
806
694k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
807
694k
            if (code < 0) goto exit;
808
694k
        }
809
808k
    }
810
811
114k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
812
114k
    if (code < 0) goto exit;
813
814
    /* Now copy the results out into the string we can keep */
815
114k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
816
817
114k
 exit:
818
114k
    if (itembuf)
819
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
820
114k
    pdfi_bufstream_free(ctx, &bufstream);
821
114k
    pdfi_countdown(object);
822
114k
    return code;
823
114k
}
824
825
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
826
644
{
827
644
    int code = 0;
828
644
    byte *buf;
829
644
    pdf_stream *stream = (pdf_stream *)obj;
830
644
    int64_t bufsize = 0;
831
644
    pdf_indirect_ref *streamref = NULL;
832
833
    /* TODO: How to deal with stream dictionaries?
834
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
835
     * See 'pushpin' annotation in annotations-galore_II.ps
836
     *
837
     * This will just literally grab the stream data.
838
     */
839
644
    if (stream->is_marking) {
840
568
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
841
568
        if (code < 0) goto exit;
842
568
        *data = buf;
843
568
        *len = (int)bufsize;
844
568
    } else {
845
        /* Create an indirect ref for the stream */
846
76
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
847
76
        if (code < 0) goto exit;
848
76
        pdfi_countup(streamref);
849
76
        streamref->ref_object_num = stream->object_num;
850
76
        streamref->ref_generation_num = stream->generation_num;
851
76
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
852
76
    }
853
854
644
 exit:
855
644
    pdfi_countdown(streamref);
856
644
    return code;
857
644
}
858
859
/* This fetches without dereferencing.  If you want to see the references inline,
860
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
861
 */
862
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
863
63.9k
{
864
63.9k
    int code = 0;
865
63.9k
    pdf_dict *dict = (pdf_dict *)obj;
866
63.9k
    pdf_name *Key = NULL;
867
63.9k
    pdf_obj *Value = NULL;
868
63.9k
    byte *itembuf = NULL;
869
63.9k
    int itemsize;
870
63.9k
    pdfi_bufstream_t bufstream;
871
63.9k
    uint64_t index, dictsize;
872
63.9k
    uint64_t itemnum = 0;
873
874
63.9k
    code = pdfi_loop_detector_mark(ctx);
875
63.9k
    if (code < 0)
876
0
        return code;
877
878
63.9k
    code = pdfi_bufstream_init(ctx, &bufstream);
879
63.9k
    if (code < 0) goto exit;
880
881
63.9k
    dictsize = pdfi_dict_entries(dict);
882
    /* Handle empty dict specially */
883
63.9k
    if (dictsize == 0) {
884
10
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
885
10
        if (code < 0)
886
0
            goto exit;
887
10
        goto exit_copy;
888
10
    }
889
890
63.9k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
891
63.9k
    if (code < 0) goto exit;
892
893
    /* Note: We specifically fetch without dereferencing, so there will be no circular
894
     * references to handle here.
895
     */
896
    /* Wrong.... */
897
898
63.9k
    if (dict->object_num !=0 ) {
899
16.3k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
900
4
            code = gs_note_error(gs_error_circular_reference);
901
4
            goto exit;
902
4
        }
903
16.3k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
904
16.3k
        if (code < 0)
905
0
            goto exit;
906
16.3k
    }
907
908
    /* Get each (key,val) pair from dict and setup param for it */
909
63.9k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
910
77.7k
    while (code >= 0) {
911
77.7k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
912
77.7k
        if (code < 0) goto exit;
913
914
77.7k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
915
77.7k
        if (code < 0) goto exit;
916
917
77.7k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
918
77.7k
        itembuf = NULL;
919
77.7k
        itemsize = 0;
920
921
        /* Put a space between elements */
922
77.7k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
923
77.7k
        if (code < 0) goto exit;
924
925
        /* No dereference */
926
77.7k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
927
77.7k
        if (code < 0) goto exit;
928
77.7k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
929
77.7k
        if (code < 0) goto exit;
930
931
77.5k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
932
77.5k
        if (code < 0) goto exit;
933
934
77.5k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
935
77.5k
        itembuf = NULL;
936
77.5k
        itemsize = 0;
937
938
77.5k
        pdfi_countdown(Value);
939
77.5k
        Value = NULL;
940
77.5k
        pdfi_countdown(Key);
941
77.5k
        Key = NULL;
942
943
77.5k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
944
77.5k
        if (code == gs_error_undefined) {
945
63.6k
            code = 0;
946
63.6k
            break;
947
63.6k
        }
948
13.8k
        if (code < 0) goto exit;
949
950
        /* Put a space between elements */
951
13.8k
        if (++itemnum != dictsize) {
952
13.8k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
953
13.8k
            if (code < 0) goto exit;
954
13.8k
        }
955
13.8k
    }
956
63.6k
    if (code < 0) goto exit;
957
958
63.6k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
959
63.6k
    if (code < 0) goto exit;
960
961
63.7k
 exit_copy:
962
    /* Now copy the results out into the string we can keep */
963
63.7k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
964
965
63.9k
 exit:
966
63.9k
    if (itembuf)
967
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
968
63.9k
    pdfi_countdown(Key);
969
63.9k
    pdfi_countdown(Value);
970
63.9k
    pdfi_bufstream_free(ctx, &bufstream);
971
63.9k
    if (code < 0)
972
206
        (void)pdfi_loop_detector_cleartomark(ctx);
973
63.7k
    else
974
63.7k
        code = pdfi_loop_detector_cleartomark(ctx);
975
63.9k
    return code;
976
63.7k
}
977
978
#define PARAM1(A) # A,
979
#define PARAM2(A,B) A,
980
static const char pdf_token_strings[][10] = {
981
#include "pdf_tokens.h"
982
};
983
984
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
985
3
{
986
3
    int code = 0;
987
3
    const char *s = pdf_token_strings[(uintptr_t)obj];
988
3
    int size = (int)strlen(s) + 1;
989
3
    byte *buf;
990
991
3
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
992
3
    if (buf == NULL)
993
0
        return_error(gs_error_VMerror);
994
3
    memcpy(buf, s, size);
995
3
    *data = buf;
996
3
    *len = size;
997
3
    return code;
998
3
}
999
1000
obj_str_dispatch_t obj_str_dispatch[] = {
1001
    {PDF_NAME, pdfi_obj_name_str},
1002
    {PDF_ARRAY, pdfi_obj_array_str},
1003
    {PDF_REAL, pdfi_obj_real_str},
1004
    {PDF_INT, pdfi_obj_int_str},
1005
    {PDF_BOOL, pdfi_obj_bool_str},
1006
    {PDF_STRING, pdfi_obj_string_str},
1007
    {PDF_DICT, pdfi_obj_dict_str},
1008
    {PDF_STREAM, pdfi_obj_stream_str},
1009
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1010
    {PDF_NULL, pdfi_obj_null_str},
1011
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1012
    {0, NULL}
1013
};
1014
1015
/* Recursive function to build a string from an object
1016
 */
1017
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1018
1.95M
{
1019
1.95M
    obj_str_dispatch_t *dispatch_ptr;
1020
1.95M
    int code = 0;
1021
1.95M
    pdf_obj_type type;
1022
1023
1.95M
    *data = NULL;
1024
1.95M
    *len = 0;
1025
1.95M
    type = pdfi_type_of(obj);
1026
5.95M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1027
5.95M
        if (type == dispatch_ptr->type) {
1028
1.95M
            code = dispatch_ptr->func(ctx, obj, data, len);
1029
1.95M
            goto exit;
1030
1.95M
        }
1031
5.95M
    }
1032
    /* Not implemented, use default */
1033
141
    code = pdfi_obj_default_str(ctx, obj, data, len);
1034
1.95M
 exit:
1035
1.95M
    return code;
1036
141
}
1037
1038
/*********** END obj_to_string module ************/