Coverage Report

Created: 2025-08-28 07:06

/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
988M
{
39
988M
    int bytes = 0;
40
988M
    int code = 0;
41
42
988M
    switch(type) {
43
18.1M
        case PDF_ARRAY_MARK:
44
33.9M
        case PDF_DICT_MARK:
45
34.6M
        case PDF_PROC_MARK:
46
34.6M
            bytes = sizeof(pdf_obj);
47
34.6M
            break;
48
252M
        case PDF_INT:
49
438M
        case PDF_REAL:
50
438M
            bytes = sizeof(pdf_num);
51
438M
            break;
52
83.9M
        case PDF_STRING:
53
438M
        case PDF_NAME:
54
438M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
438M
            break;
56
50.6k
        case PDF_BUFFER:
57
50.6k
            bytes = sizeof(pdf_buffer);
58
50.6k
            break;
59
18.0M
        case PDF_ARRAY:
60
18.0M
            bytes = sizeof(pdf_array);
61
18.0M
            break;
62
16.6M
        case PDF_DICT:
63
16.6M
            bytes = sizeof(pdf_dict);
64
16.6M
            break;
65
21.7M
        case PDF_INDIRECT:
66
21.7M
            bytes = sizeof(pdf_indirect_ref);
67
21.7M
            break;
68
19.0M
        case PDF_KEYWORD:
69
19.0M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
19.0M
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
1.27M
        case PDF_STREAM:
78
1.27M
            bytes = sizeof(pdf_stream);
79
1.27M
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
988M
    }
86
988M
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
988M
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
988M
    memset(*obj, 0x00, bytes);
93
988M
    (*obj)->ctx = ctx;
94
988M
    (*obj)->type = type;
95
96
988M
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
252M
        case PDF_INT:
106
438M
        case PDF_REAL:
107
460M
        case PDF_INDIRECT:
108
478M
        case PDF_ARRAY_MARK:
109
494M
        case PDF_DICT_MARK:
110
495M
        case PDF_PROC_MARK:
111
495M
            break;
112
19.0M
        case PDF_KEYWORD:
113
103M
        case PDF_STRING:
114
457M
        case PDF_NAME:
115
457M
            ((pdf_string *)*obj)->length = size;
116
457M
            break;
117
50.6k
        case PDF_BUFFER:
118
50.6k
            {
119
50.6k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
50.6k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
50.6k
                else {
130
50.6k
                    b->data = NULL;
131
50.6k
                }
132
50.6k
                b->length = size;
133
50.6k
            }
134
0
            break;
135
18.0M
        case PDF_ARRAY:
136
18.0M
            {
137
18.0M
                pdf_obj **values = NULL;
138
139
18.0M
                ((pdf_array *)*obj)->size = size;
140
18.0M
                if (size > 0) {
141
16.9M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
16.9M
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
16.9M
                    ((pdf_array *)*obj)->values = values;
147
16.9M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
16.9M
                }
149
18.0M
            }
150
18.0M
            break;
151
18.0M
        case PDF_DICT:
152
16.6M
            {
153
16.6M
                pdf_dict_entry *entries = NULL;
154
155
16.6M
                ((pdf_dict *)*obj)->size = size;
156
16.6M
                if (size > 0) {
157
15.8M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
15.8M
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
15.8M
                    ((pdf_dict *)*obj)->list = entries;
163
15.8M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
15.8M
                }
165
16.6M
            }
166
16.6M
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
16.6M
        case PDF_XREF_TABLE:
171
0
            break;
172
1.27M
        default:
173
1.27M
            break;
174
988M
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
988M
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
988M
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
36.7k
{
192
36.7k
    uint64_t test = 0;
193
36.7k
    int code = 0;
194
195
36.7k
    test = (uint64_t)floor(d);
196
36.7k
    if (d == test) {
197
30.1k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
30.1k
        if (code < 0)
199
0
            return code;
200
30.1k
        (*num)->value.i = test;
201
30.1k
    }
202
6.59k
    else {
203
6.59k
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
6.59k
        if (code < 0)
205
0
            return code;
206
6.59k
        (*num)->value.d = d;
207
6.59k
    }
208
209
36.7k
    return 0;
210
36.7k
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
438M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
438M
    pdf_name *n = (pdf_name *)o;
224
225
438M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
438M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
19.0M
{
230
19.0M
    pdf_keyword *k = (pdf_keyword *)o;
231
232
19.0M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
19.0M
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
99.7k
{
237
99.7k
    xref_table_t *xref = (xref_table_t *)o;
238
239
99.7k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
99.7k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
99.7k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
1.27M
{
245
1.27M
    pdf_stream *stream = (pdf_stream *)o;
246
247
1.27M
    pdfi_countdown(stream->stream_dict);
248
1.27M
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
1.27M
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
50.6k
{
253
50.6k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
50.6k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
50.6k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
50.6k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
990M
{
261
990M
    if (o == NULL)
262
810k
        return;
263
989M
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
989M
    switch(o->type) {
266
18.1M
        case PDF_ARRAY_MARK:
267
33.9M
        case PDF_DICT_MARK:
268
34.6M
        case PDF_PROC_MARK:
269
286M
        case PDF_INT:
270
473M
        case PDF_REAL:
271
495M
        case PDF_INDIRECT:
272
495M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
495M
            break;
274
83.9M
        case PDF_STRING:
275
438M
        case PDF_NAME:
276
438M
            pdfi_free_namestring(o);
277
438M
            break;
278
50.6k
        case PDF_BUFFER:
279
50.6k
            pdfi_free_buffer(o);
280
50.6k
            break;
281
18.0M
        case PDF_ARRAY:
282
18.0M
            pdfi_free_array(o);
283
18.0M
            break;
284
16.5M
        case PDF_DICT:
285
16.5M
            pdfi_free_dict(o);
286
16.5M
            break;
287
1.27M
        case PDF_STREAM:
288
1.27M
            pdfi_free_stream(o);
289
1.27M
            break;
290
19.0M
        case PDF_KEYWORD:
291
19.0M
            pdfi_free_keyword(o);
292
19.0M
            break;
293
99.7k
        case PDF_XREF_TABLE:
294
99.7k
            pdfi_free_xref_table(o);
295
99.7k
            break;
296
1.10M
        case PDF_FONT:
297
1.10M
            pdfi_free_font(o);
298
1.10M
            break;
299
89.2k
        case PDF_CMAP:
300
89.2k
            pdfi_free_cmap(o);
301
89.2k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
58
        default:
307
58
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
58
            break;
309
989M
    }
310
989M
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
1.27M
{
320
1.27M
    int code = 0;
321
1.27M
    pdf_stream *new_stream = NULL;
322
323
1.27M
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
1.27M
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
1.27M
    if (code < 0)
328
0
        goto error_exit;
329
330
1.27M
    new_stream->ctx = ctx;
331
1.27M
    pdfi_countup(new_stream);
332
333
1.27M
    new_stream->stream_dict = dict;
334
1.27M
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
1.27M
    if (do_convert) {
340
1.19M
        new_stream->object_num = dict->object_num;
341
1.19M
        new_stream->generation_num = dict->generation_num;
342
1.19M
        dict->object_num = 0;
343
1.19M
        dict->generation_num = 0;
344
1.19M
    }
345
1.27M
    *stream = new_stream;
346
1.27M
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
1.27M
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
252
{
355
252
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
252
    pdfi_countup(*dict);
359
252
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
252
    return 0;
365
252
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
420
{
370
420
    int code;
371
420
    int length = strlen(charstr);
372
420
    pdf_string *newstr = NULL;
373
374
420
    *string = NULL;
375
376
420
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
420
    if (code < 0) goto exit;
378
379
420
    memcpy(newstr->data, (byte *)charstr, length);
380
381
420
    *string = newstr;
382
420
    pdfi_countup(newstr);
383
420
 exit:
384
420
    return code;
385
420
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
902k
{
390
902k
    int code;
391
902k
    int length = strlen(charstr);
392
902k
    pdf_name *newname = NULL;
393
394
902k
    *name = NULL;
395
396
902k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
902k
    if (code < 0) goto exit;
398
399
902k
    memcpy(newname->data, (byte *)charstr, length);
400
401
902k
    *name = newname;
402
902k
    pdfi_countup(newname);
403
902k
 exit:
404
902k
    return code;
405
902k
}
406
407
/************ bufstream module BEGIN **************/
408
176k
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
176k
{
419
176k
    stream->len = INIT_BUF_SIZE;
420
176k
    stream->cur = 0;
421
176k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
176k
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
176k
    return 0;
426
176k
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
176k
{
430
176k
    if (stream->data)
431
296
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
176k
    stream->len = 0;
433
176k
    stream->cur = 0;
434
176k
    stream->data = NULL;
435
176k
    return 0;
436
176k
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
176k
{
441
176k
    *buf = stream->data;
442
176k
    *len = stream->cur;
443
176k
    stream->len = 0;
444
176k
    stream->cur = 0;
445
176k
    stream->data = NULL;
446
176k
    return 0;
447
176k
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
7.56k
{
452
7.56k
    byte *data = NULL;
453
7.56k
    uint64_t newsize;
454
455
7.56k
    newsize = stream->len * 2 + needed;
456
7.56k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
457
7.56k
    if (!data)
458
0
        return_error(gs_error_VMerror);
459
460
7.56k
    memcpy(data, stream->data, stream->len);
461
7.56k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
462
7.56k
    stream->data = data;
463
7.56k
    stream->len = newsize;
464
465
7.56k
    return 0;
466
7.56k
}
467
468
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
469
2.11M
{
470
2.11M
    int code = 0;
471
472
2.11M
    if (stream->cur + len > stream->len) {
473
7.56k
        code = pdfi_bufstream_increase(ctx, stream, len);
474
7.56k
        if (code < 0)
475
0
            goto exit;
476
7.56k
    }
477
2.11M
    memcpy(stream->data + stream->cur, data, len);
478
2.11M
    stream->cur += len;
479
480
2.11M
 exit:
481
2.11M
    return code;
482
2.11M
}
483
484
/************ bufstream module END **************/
485
486
487
/* Create a c-string to use as object label
488
 * Uses the object_num to make it unique.
489
 * (don't call this for objects with object_num=0, though I am not going to check that here)
490
 *
491
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
492
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
493
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
494
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
495
 * object number and generation.
496
 */
497
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
498
40.8k
{
499
40.8k
    int code = 0, i;
500
40.8k
    int length;
501
40.8k
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
502
40.8k
    char *string = NULL;
503
40.8k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
504
40.8k
    uint32_t hash = 5381;
505
506
40.8k
    if (ctx->main_stream->s->file_name.data != NULL) {
507
0
        string = (char *)ctx->main_stream->s->file_name.data;
508
0
        length = ctx->main_stream->s->file_name.size;
509
510
0
        for (i=0;i < length;i++) {
511
#if ARCH_IS_BIG_ENDIAN
512
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
513
#else
514
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
515
0
#endif
516
0
        }
517
0
    }
518
519
40.8k
    *label = NULL;
520
40.8k
    length = strlen(template)+30;
521
522
40.8k
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
523
40.8k
    if (string == NULL) {
524
0
        code = gs_note_error(gs_error_VMerror);
525
0
        goto exit;
526
0
    }
527
528
40.8k
    if (pdfi_type_of(obj) == PDF_INDIRECT)
529
40.7k
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
530
81
    else
531
81
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
532
533
40.8k
    *label = string;
534
40.8k
 exit:
535
40.8k
    return code;
536
40.8k
}
537
538
/*********** BEGIN obj_to_string module ************/
539
540
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
541
542
/* Dispatch to get string representation of an object */
543
typedef struct {
544
    pdf_obj_type type;
545
    str_func func;
546
} obj_str_dispatch_t;
547
548
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
549
141
{
550
141
    int code = 0;
551
141
    int size = 12;
552
141
    byte *buf;
553
554
141
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
555
141
    if (buf == NULL)
556
0
        return_error(gs_error_VMerror);
557
141
    memcpy(buf, "/placeholder", size);
558
141
    *data = buf;
559
141
    *len = size;
560
141
    return code;
561
141
}
562
563
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
564
702k
{
565
702k
    int code = 0;
566
702k
    pdf_name *name = (pdf_name *)obj;
567
702k
    int size = name->length + 1;
568
702k
    byte *buf;
569
570
702k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
571
702k
    if (buf == NULL)
572
0
        return_error(gs_error_VMerror);
573
702k
    buf[0] = '/';
574
702k
    memcpy(buf+1, name->data, name->length);
575
702k
    *data = buf;
576
702k
    *len = size;
577
702k
    return code;
578
702k
}
579
580
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
581
230k
{
582
230k
    int code = 0;
583
230k
    int size = 15;
584
230k
    pdf_num *number = (pdf_num *)obj;
585
230k
    char *buf;
586
587
230k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
588
230k
    if (buf == NULL)
589
0
        return_error(gs_error_VMerror);
590
230k
    snprintf(buf, size, "%.4f", number->value.d);
591
230k
    *data = (byte *)buf;
592
230k
    *len = strlen(buf);
593
230k
    return code;
594
230k
}
595
596
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
597
655k
{
598
655k
    int code = 0;
599
655k
    int size = 15;
600
655k
    pdf_num *number = (pdf_num *)obj;
601
655k
    char *buf;
602
603
655k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
604
655k
    if (buf == NULL)
605
0
        return_error(gs_error_VMerror);
606
655k
    snprintf(buf, size, "%"PRId64"", number->value.i);
607
655k
    *data = (byte *)buf;
608
655k
    *len = strlen(buf);
609
655k
    return code;
610
655k
}
611
612
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
613
18.0k
{
614
18.0k
    int size = 100;
615
18.0k
    char *buf;
616
617
18.0k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
618
18.0k
    if (buf == NULL)
619
0
        return_error(gs_error_VMerror);
620
18.0k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
621
18.0k
    *data = (byte *)buf;
622
18.0k
    *len = strlen(buf);
623
18.0k
    return 0;
624
18.0k
}
625
626
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
627
59.5k
{
628
59.5k
    int code = 0;
629
59.5k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
630
59.5k
    char *buf;
631
59.5k
    pdf_obj *object = NULL;
632
59.5k
    bool use_label = true;
633
634
59.5k
    code = pdfi_loop_detector_mark(ctx);
635
59.5k
    if (code < 0)
636
0
        return code;
637
638
59.5k
    if (ref->is_highlevelform) {
639
16.8k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
640
16.8k
        ref->is_highlevelform = false;
641
42.7k
    } else {
642
42.7k
        if (!ref->is_marking) {
643
21.1k
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
644
21.1k
            if (code == gs_error_undefined) {
645
                /* Do something sensible for undefined reference (this would be a broken file) */
646
                /* TODO: Flag an error? */
647
1.18k
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
648
1.18k
                goto exit;
649
1.18k
            }
650
19.9k
            if (code < 0 && code != gs_error_circular_reference)
651
162
                goto exit;
652
19.7k
            if (code == 0) {
653
19.2k
                if (pdfi_type_of(object) == PDF_STREAM) {
654
563
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
655
563
                    if (code < 0) goto exit;
656
18.7k
                } else if (pdfi_type_of(object) == PDF_DICT) {
657
18.1k
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
658
18.1k
                    if (code < 0) goto exit;
659
18.1k
                } else {
660
570
                    code = pdfi_obj_to_string(ctx, object, data, len);
661
570
                    if (code < 0) goto exit;
662
548
                    use_label = false;
663
548
                }
664
19.2k
            }
665
19.7k
        }
666
41.2k
        if (use_label) {
667
40.7k
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
668
40.7k
            if (code < 0) goto exit;
669
40.7k
            *data = (byte *)buf;
670
40.7k
            *len = strlen(buf);
671
40.7k
        }
672
41.2k
    }
673
674
59.5k
 exit:
675
59.5k
    (void)pdfi_loop_detector_cleartomark(ctx);
676
59.5k
    pdfi_countdown(object);
677
59.5k
    return code;
678
59.5k
}
679
680
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
681
26.3k
{
682
26.3k
    int code = 0;
683
26.3k
    int size = 5;
684
26.3k
    char *buf;
685
686
26.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
687
26.3k
    if (buf == NULL)
688
0
        return_error(gs_error_VMerror);
689
26.3k
    if (obj == PDF_TRUE_OBJ) {
690
891
        memcpy(buf, (byte *)"true", 4);
691
891
        *len = 4;
692
25.4k
    } else {
693
25.4k
        memcpy(buf, (byte *)"false", 5);
694
25.4k
        *len = 5;
695
25.4k
    }
696
26.3k
    *data = (byte *)buf;
697
26.3k
    return code;
698
26.3k
}
699
700
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
701
940
{
702
940
    int code = 0;
703
940
    int size = 4;
704
940
    char *buf;
705
706
940
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
707
940
    if (buf == NULL)
708
0
        return_error(gs_error_VMerror);
709
940
    memcpy(buf, (byte *)"null", 4);
710
940
    *len = 4;
711
940
    *data = (byte *)buf;
712
940
    return code;
713
940
}
714
715
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
716
97.2k
{
717
97.2k
    pdf_string *string = (pdf_string *)obj;
718
97.2k
    char *buf;
719
97.2k
    int i, length = 0, j;
720
721
3.51M
    for (j=0;j<string->length;j++) {
722
3.41M
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
723
5.28k
                length += 2;
724
3.41M
        else {
725
3.41M
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
726
127k
                length += 4;
727
3.28M
            else
728
3.28M
                length++;
729
3.41M
        }
730
3.41M
    }
731
97.2k
    length += 2;
732
97.2k
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
733
97.2k
    if (buf == NULL)
734
0
        return_error(gs_error_VMerror);
735
97.2k
    buf[0] = '(';
736
97.2k
    i = 1;
737
3.51M
    for (j=0;j<string->length;j++) {
738
3.41M
        switch(string->data[j]) {
739
656
            case 0x0a:
740
656
                buf[i++] = '\\';
741
656
                buf[i++] = 'n';
742
656
                break;
743
825
            case 0x0d:
744
825
                buf[i++] = '\\';
745
825
                buf[i++] = 'r';
746
825
                break;
747
1.81k
            case '(':
748
3.76k
            case ')':
749
3.80k
            case '\\':
750
3.80k
                buf[i++] = '\\';
751
3.80k
                buf[i++] = string->data[j];
752
3.80k
                break;
753
3.41M
            default:
754
3.41M
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
755
127k
                    buf[i++] = '\\';
756
127k
                    buf[i++] = (string->data[j] >> 6) + 0x30;
757
127k
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
758
127k
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
759
127k
                } else
760
3.28M
                buf[i++] = string->data[j];
761
3.41M
                break;
762
3.41M
        }
763
3.41M
    }
764
97.2k
    buf[i++] = ')';
765
766
97.2k
    *len = i;
767
97.2k
    *data = (byte *)buf;
768
97.2k
    return 0;
769
97.2k
}
770
771
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
772
115k
{
773
115k
    int code = 0;
774
115k
    pdf_array *array = (pdf_array *)obj;
775
115k
    pdf_obj *object = NULL;
776
115k
    byte *itembuf = NULL;
777
115k
    int itemsize;
778
115k
    pdfi_bufstream_t bufstream;
779
115k
    uint64_t index, arraysize;
780
781
115k
    code = pdfi_bufstream_init(ctx, &bufstream);
782
115k
    if (code < 0) goto exit;
783
784
115k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
785
115k
    if (code < 0) goto exit;
786
787
115k
    arraysize = pdfi_array_size(array);
788
942k
    for (index = 0; index < arraysize; index++) {
789
827k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
790
827k
        if (code < 0) goto exit;
791
792
827k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
793
827k
        if (code < 0) goto exit;
794
795
826k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
796
826k
        if (code < 0) goto exit;
797
798
826k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
799
826k
        itembuf = NULL;
800
826k
        itemsize = 0;
801
826k
        pdfi_countdown(object);
802
826k
        object = NULL;
803
804
        /* Put a space between elements unless last item */
805
826k
        if (index+1 != arraysize) {
806
711k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
807
711k
            if (code < 0) goto exit;
808
711k
        }
809
826k
    }
810
811
115k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
812
115k
    if (code < 0) goto exit;
813
814
    /* Now copy the results out into the string we can keep */
815
115k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
816
817
115k
 exit:
818
115k
    if (itembuf)
819
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
820
115k
    pdfi_bufstream_free(ctx, &bufstream);
821
115k
    pdfi_countdown(object);
822
115k
    return code;
823
115k
}
824
825
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
826
580
{
827
580
    int code = 0;
828
580
    byte *buf;
829
580
    pdf_stream *stream = (pdf_stream *)obj;
830
580
    int64_t bufsize = 0;
831
580
    pdf_indirect_ref *streamref = NULL;
832
833
    /* TODO: How to deal with stream dictionaries?
834
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
835
     * See 'pushpin' annotation in annotations-galore_II.ps
836
     *
837
     * This will just literally grab the stream data.
838
     */
839
580
    if (stream->is_marking) {
840
504
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
841
504
        if (code < 0) goto exit;
842
504
        *data = buf;
843
504
        *len = (int)bufsize;
844
504
    } else {
845
        /* Create an indirect ref for the stream */
846
76
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
847
76
        if (code < 0) goto exit;
848
76
        pdfi_countup(streamref);
849
76
        streamref->ref_object_num = stream->object_num;
850
76
        streamref->ref_generation_num = stream->generation_num;
851
76
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
852
76
    }
853
854
580
 exit:
855
580
    pdfi_countdown(streamref);
856
580
    return code;
857
580
}
858
859
/* This fetches without dereferencing.  If you want to see the references inline,
860
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
861
 */
862
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
863
60.8k
{
864
60.8k
    int code = 0;
865
60.8k
    pdf_dict *dict = (pdf_dict *)obj;
866
60.8k
    pdf_name *Key = NULL;
867
60.8k
    pdf_obj *Value = NULL;
868
60.8k
    byte *itembuf = NULL;
869
60.8k
    int itemsize;
870
60.8k
    pdfi_bufstream_t bufstream;
871
60.8k
    uint64_t index, dictsize;
872
60.8k
    uint64_t itemnum = 0;
873
874
60.8k
    code = pdfi_loop_detector_mark(ctx);
875
60.8k
    if (code < 0)
876
0
        return code;
877
878
60.8k
    code = pdfi_bufstream_init(ctx, &bufstream);
879
60.8k
    if (code < 0) goto exit;
880
881
60.8k
    dictsize = pdfi_dict_entries(dict);
882
    /* Handle empty dict specially */
883
60.8k
    if (dictsize == 0) {
884
8
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
885
8
        if (code < 0)
886
0
            goto exit;
887
8
        goto exit_copy;
888
8
    }
889
890
60.8k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
891
60.8k
    if (code < 0) goto exit;
892
893
    /* Note: We specifically fetch without dereferencing, so there will be no circular
894
     * references to handle here.
895
     */
896
    /* Wrong.... */
897
898
60.8k
    if (dict->object_num !=0 ) {
899
17.0k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
900
4
            code = gs_note_error(gs_error_circular_reference);
901
4
            goto exit;
902
4
        }
903
17.0k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
904
17.0k
        if (code < 0)
905
0
            goto exit;
906
17.0k
    }
907
908
    /* Get each (key,val) pair from dict and setup param for it */
909
60.8k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
910
72.2k
    while (code >= 0) {
911
72.2k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
912
72.2k
        if (code < 0) goto exit;
913
914
72.2k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
915
72.2k
        if (code < 0) goto exit;
916
917
72.2k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
918
72.2k
        itembuf = NULL;
919
72.2k
        itemsize = 0;
920
921
        /* Put a space between elements */
922
72.2k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
923
72.2k
        if (code < 0) goto exit;
924
925
        /* No dereference */
926
72.2k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
927
72.2k
        if (code < 0) goto exit;
928
72.2k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
929
72.2k
        if (code < 0) goto exit;
930
931
72.0k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
932
72.0k
        if (code < 0) goto exit;
933
934
72.0k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
935
72.0k
        itembuf = NULL;
936
72.0k
        itemsize = 0;
937
938
72.0k
        pdfi_countdown(Value);
939
72.0k
        Value = NULL;
940
72.0k
        pdfi_countdown(Key);
941
72.0k
        Key = NULL;
942
943
72.0k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
944
72.0k
        if (code == gs_error_undefined) {
945
60.7k
            code = 0;
946
60.7k
            break;
947
60.7k
        }
948
11.3k
        if (code < 0) goto exit;
949
950
        /* Put a space between elements */
951
11.3k
        if (++itemnum != dictsize) {
952
11.3k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
953
11.3k
            if (code < 0) goto exit;
954
11.3k
        }
955
11.3k
    }
956
60.7k
    if (code < 0) goto exit;
957
958
60.7k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
959
60.7k
    if (code < 0) goto exit;
960
961
60.7k
 exit_copy:
962
    /* Now copy the results out into the string we can keep */
963
60.7k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
964
965
60.8k
 exit:
966
60.8k
    if (itembuf)
967
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
968
60.8k
    pdfi_countdown(Key);
969
60.8k
    pdfi_countdown(Value);
970
60.8k
    pdfi_bufstream_free(ctx, &bufstream);
971
60.8k
    if (code < 0)
972
179
        (void)pdfi_loop_detector_cleartomark(ctx);
973
60.7k
    else
974
60.7k
        code = pdfi_loop_detector_cleartomark(ctx);
975
60.8k
    return code;
976
60.7k
}
977
978
#define PARAM1(A) # A,
979
#define PARAM2(A,B) A,
980
static const char pdf_token_strings[][10] = {
981
#include "pdf_tokens.h"
982
};
983
984
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
985
2
{
986
2
    int code = 0;
987
2
    const char *s = pdf_token_strings[(uintptr_t)obj];
988
2
    int size = (int)strlen(s) + 1;
989
2
    byte *buf;
990
991
2
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
992
2
    if (buf == NULL)
993
0
        return_error(gs_error_VMerror);
994
2
    memcpy(buf, s, size);
995
2
    *data = buf;
996
2
    *len = size;
997
2
    return code;
998
2
}
999
1000
obj_str_dispatch_t obj_str_dispatch[] = {
1001
    {PDF_NAME, pdfi_obj_name_str},
1002
    {PDF_ARRAY, pdfi_obj_array_str},
1003
    {PDF_REAL, pdfi_obj_real_str},
1004
    {PDF_INT, pdfi_obj_int_str},
1005
    {PDF_BOOL, pdfi_obj_bool_str},
1006
    {PDF_STRING, pdfi_obj_string_str},
1007
    {PDF_DICT, pdfi_obj_dict_str},
1008
    {PDF_STREAM, pdfi_obj_stream_str},
1009
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1010
    {PDF_NULL, pdfi_obj_null_str},
1011
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1012
    {0, NULL}
1013
};
1014
1015
/* Recursive function to build a string from an object
1016
 */
1017
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1018
1.94M
{
1019
1.94M
    obj_str_dispatch_t *dispatch_ptr;
1020
1.94M
    int code = 0;
1021
1.94M
    pdf_obj_type type;
1022
1023
1.94M
    *data = NULL;
1024
1.94M
    *len = 0;
1025
1.94M
    type = pdfi_type_of(obj);
1026
5.93M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1027
5.93M
        if (type == dispatch_ptr->type) {
1028
1.94M
            code = dispatch_ptr->func(ctx, obj, data, len);
1029
1.94M
            goto exit;
1030
1.94M
        }
1031
5.93M
    }
1032
    /* Not implemented, use default */
1033
141
    code = pdfi_obj_default_str(ctx, obj, data, len);
1034
1.94M
 exit:
1035
1.94M
    return code;
1036
141
}
1037
1038
/*********** END obj_to_string module ************/