Coverage Report

Created: 2025-12-31 07:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
1.09G
{
39
1.09G
    int bytes = 0;
40
1.09G
    int code = 0;
41
42
1.09G
    switch(type) {
43
20.7M
        case PDF_ARRAY_MARK:
44
38.5M
        case PDF_DICT_MARK:
45
39.3M
        case PDF_PROC_MARK:
46
39.3M
            bytes = sizeof(pdf_obj);
47
39.3M
            break;
48
293M
        case PDF_INT:
49
499M
        case PDF_REAL:
50
499M
            bytes = sizeof(pdf_num);
51
499M
            break;
52
95.0M
        case PDF_STRING:
53
472M
        case PDF_NAME:
54
472M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
472M
            break;
56
56.7k
        case PDF_BUFFER:
57
56.7k
            bytes = sizeof(pdf_buffer);
58
56.7k
            break;
59
20.7M
        case PDF_ARRAY:
60
20.7M
            bytes = sizeof(pdf_array);
61
20.7M
            break;
62
18.6M
        case PDF_DICT:
63
18.6M
            bytes = sizeof(pdf_dict);
64
18.6M
            break;
65
24.8M
        case PDF_INDIRECT:
66
24.8M
            bytes = sizeof(pdf_indirect_ref);
67
24.8M
            break;
68
21.3M
        case PDF_KEYWORD:
69
21.3M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
21.3M
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
1.47M
        case PDF_STREAM:
78
1.47M
            bytes = sizeof(pdf_stream);
79
1.47M
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
1.09G
    }
86
1.09G
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
1.09G
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
1.09G
    memset(*obj, 0x00, bytes);
93
1.09G
    (*obj)->ctx = ctx;
94
1.09G
    (*obj)->type = type;
95
96
1.09G
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
293M
        case PDF_INT:
106
499M
        case PDF_REAL:
107
524M
        case PDF_INDIRECT:
108
545M
        case PDF_ARRAY_MARK:
109
562M
        case PDF_DICT_MARK:
110
563M
        case PDF_PROC_MARK:
111
563M
            break;
112
21.3M
        case PDF_KEYWORD:
113
116M
        case PDF_STRING:
114
494M
        case PDF_NAME:
115
494M
            ((pdf_string *)*obj)->length = size;
116
494M
            break;
117
56.7k
        case PDF_BUFFER:
118
56.7k
            {
119
56.7k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
56.7k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
56.7k
                else {
130
56.7k
                    b->data = NULL;
131
56.7k
                }
132
56.7k
                b->length = size;
133
56.7k
            }
134
0
            break;
135
20.7M
        case PDF_ARRAY:
136
20.7M
            {
137
20.7M
                pdf_obj **values = NULL;
138
139
20.7M
                ((pdf_array *)*obj)->size = size;
140
20.7M
                if (size > 0) {
141
19.4M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
19.4M
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
19.4M
                    ((pdf_array *)*obj)->values = values;
147
19.4M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
19.4M
                }
149
20.7M
            }
150
20.7M
            break;
151
20.7M
        case PDF_DICT:
152
18.6M
            {
153
18.6M
                pdf_dict_entry *entries = NULL;
154
155
18.6M
                ((pdf_dict *)*obj)->size = size;
156
18.6M
                if (size > 0) {
157
17.7M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
17.7M
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
17.7M
                    ((pdf_dict *)*obj)->list = entries;
163
17.7M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
17.7M
                }
165
18.6M
            }
166
18.6M
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
18.6M
        case PDF_XREF_TABLE:
171
0
            break;
172
1.47M
        default:
173
1.47M
            break;
174
1.09G
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
1.09G
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
1.09G
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
38.7k
{
192
38.7k
    uint64_t test = 0;
193
38.7k
    int code = 0;
194
195
38.7k
    test = (uint64_t)floor(d);
196
38.7k
    if (d == test) {
197
31.0k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
31.0k
        if (code < 0)
199
0
            return code;
200
31.0k
        (*num)->value.i = test;
201
31.0k
    }
202
7.67k
    else {
203
7.67k
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
7.67k
        if (code < 0)
205
0
            return code;
206
7.67k
        (*num)->value.d = d;
207
7.67k
    }
208
209
38.7k
    return 0;
210
38.7k
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
472M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
472M
    pdf_name *n = (pdf_name *)o;
224
225
472M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
472M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
21.3M
{
230
21.3M
    pdf_keyword *k = (pdf_keyword *)o;
231
232
21.3M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
21.3M
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
102k
{
237
102k
    xref_table_t *xref = (xref_table_t *)o;
238
239
102k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
102k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
102k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
1.47M
{
245
1.47M
    pdf_stream *stream = (pdf_stream *)o;
246
247
1.47M
    pdfi_countdown(stream->stream_dict);
248
1.47M
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
1.47M
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
56.7k
{
253
56.7k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
56.7k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
56.7k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
56.7k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
1.10G
{
261
1.10G
    if (o == NULL)
262
1.05M
        return;
263
1.10G
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
1.10G
    switch(o->type) {
266
20.7M
        case PDF_ARRAY_MARK:
267
38.5M
        case PDF_DICT_MARK:
268
39.3M
        case PDF_PROC_MARK:
269
332M
        case PDF_INT:
270
538M
        case PDF_REAL:
271
563M
        case PDF_INDIRECT:
272
563M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
563M
            break;
274
95.0M
        case PDF_STRING:
275
472M
        case PDF_NAME:
276
472M
            pdfi_free_namestring(o);
277
472M
            break;
278
56.7k
        case PDF_BUFFER:
279
56.7k
            pdfi_free_buffer(o);
280
56.7k
            break;
281
20.7M
        case PDF_ARRAY:
282
20.7M
            pdfi_free_array(o);
283
20.7M
            break;
284
18.5M
        case PDF_DICT:
285
18.5M
            pdfi_free_dict(o);
286
18.5M
            break;
287
1.47M
        case PDF_STREAM:
288
1.47M
            pdfi_free_stream(o);
289
1.47M
            break;
290
21.3M
        case PDF_KEYWORD:
291
21.3M
            pdfi_free_keyword(o);
292
21.3M
            break;
293
102k
        case PDF_XREF_TABLE:
294
102k
            pdfi_free_xref_table(o);
295
102k
            break;
296
1.18M
        case PDF_FONT:
297
1.18M
            pdfi_free_font(o);
298
1.18M
            break;
299
101k
        case PDF_CMAP:
300
101k
            pdfi_free_cmap(o);
301
101k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
37
        default:
307
37
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
37
            break;
309
1.10G
    }
310
1.10G
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
1.47M
{
320
1.47M
    int code = 0;
321
1.47M
    pdf_stream *new_stream = NULL;
322
323
1.47M
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
1.47M
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
1.47M
    if (code < 0)
328
0
        goto error_exit;
329
330
1.47M
    new_stream->ctx = ctx;
331
1.47M
    pdfi_countup(new_stream);
332
333
1.47M
    new_stream->stream_dict = dict;
334
1.47M
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
1.47M
    if (do_convert) {
340
1.39M
        new_stream->object_num = dict->object_num;
341
1.39M
        new_stream->generation_num = dict->generation_num;
342
1.39M
        dict->object_num = 0;
343
1.39M
        dict->generation_num = 0;
344
1.39M
    }
345
1.47M
    *stream = new_stream;
346
1.47M
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
1.47M
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
321
{
355
321
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
321
    pdfi_countup(*dict);
359
321
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
321
    return 0;
365
321
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
729
{
370
729
    int code;
371
729
    int length = strlen(charstr);
372
729
    pdf_string *newstr = NULL;
373
374
729
    *string = NULL;
375
376
729
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
729
    if (code < 0) goto exit;
378
379
729
    memcpy(newstr->data, (byte *)charstr, length);
380
381
729
    *string = newstr;
382
729
    pdfi_countup(newstr);
383
729
 exit:
384
729
    return code;
385
729
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
960k
{
390
960k
    int code;
391
960k
    int length = strlen(charstr);
392
960k
    pdf_name *newname = NULL;
393
394
960k
    *name = NULL;
395
396
960k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
960k
    if (code < 0) goto exit;
398
399
960k
    memcpy(newname->data, (byte *)charstr, length);
400
401
960k
    *name = newname;
402
960k
    pdfi_countup(newname);
403
960k
 exit:
404
960k
    return code;
405
960k
}
406
407
/************ bufstream module BEGIN **************/
408
177k
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
177k
{
419
177k
    stream->len = INIT_BUF_SIZE;
420
177k
    stream->cur = 0;
421
177k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
177k
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
177k
    return 0;
426
177k
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
177k
{
430
177k
    if (stream->data)
431
353
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
177k
    stream->len = 0;
433
177k
    stream->cur = 0;
434
177k
    stream->data = NULL;
435
177k
    return 0;
436
177k
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
177k
{
441
177k
    *buf = stream->data;
442
177k
    *len = stream->cur;
443
177k
    stream->len = 0;
444
177k
    stream->cur = 0;
445
177k
    stream->data = NULL;
446
177k
    return 0;
447
177k
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
7.32k
{
452
7.32k
    byte *data = NULL;
453
7.32k
    uint64_t newsize;
454
455
7.32k
    newsize = stream->len * 2 + needed;
456
7.32k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
457
7.32k
    if (!data)
458
0
        return_error(gs_error_VMerror);
459
460
7.32k
    memcpy(data, stream->data, stream->len);
461
7.32k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
462
7.32k
    stream->data = data;
463
7.32k
    stream->len = newsize;
464
465
7.32k
    return 0;
466
7.32k
}
467
468
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
469
2.09M
{
470
2.09M
    int code = 0;
471
472
2.09M
    if (stream->cur + len > stream->len) {
473
7.32k
        code = pdfi_bufstream_increase(ctx, stream, len);
474
7.32k
        if (code < 0)
475
0
            goto exit;
476
7.32k
    }
477
2.09M
    memcpy(stream->data + stream->cur, data, len);
478
2.09M
    stream->cur += len;
479
480
2.09M
 exit:
481
2.09M
    return code;
482
2.09M
}
483
484
/************ bufstream module END **************/
485
486
487
/* Create a c-string to use as object label
488
 * Uses the object_num to make it unique.
489
 * (don't call this for objects with object_num=0, though I am not going to check that here)
490
 *
491
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
492
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
493
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
494
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
495
 * object number and generation.
496
 */
497
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
498
47.4k
{
499
47.4k
    int code = 0, i;
500
47.4k
    int length;
501
47.4k
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
502
47.4k
    char *string = NULL;
503
47.4k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
504
47.4k
    uint32_t hash = 5381;
505
506
47.4k
    if (ctx->main_stream->s->file_name.data != NULL) {
507
0
        string = (char *)ctx->main_stream->s->file_name.data;
508
0
        length = ctx->main_stream->s->file_name.size;
509
510
0
        for (i=0;i < length;i++) {
511
#if ARCH_IS_BIG_ENDIAN
512
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
513
#else
514
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
515
0
#endif
516
0
        }
517
0
    }
518
519
47.4k
    *label = NULL;
520
47.4k
    length = strlen(template)+30;
521
522
47.4k
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
523
47.4k
    if (string == NULL) {
524
0
        code = gs_note_error(gs_error_VMerror);
525
0
        goto exit;
526
0
    }
527
528
47.4k
    if (pdfi_type_of(obj) == PDF_INDIRECT)
529
47.3k
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
530
77
    else
531
77
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
532
533
47.4k
    *label = string;
534
47.4k
 exit:
535
47.4k
    return code;
536
47.4k
}
537
538
/*********** BEGIN obj_to_string module ************/
539
540
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
541
542
/* Dispatch to get string representation of an object */
543
typedef struct {
544
    pdf_obj_type type;
545
    str_func func;
546
} obj_str_dispatch_t;
547
548
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
549
141
{
550
141
    int code = 0;
551
141
    int size = 12;
552
141
    byte *buf;
553
554
141
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
555
141
    if (buf == NULL)
556
0
        return_error(gs_error_VMerror);
557
141
    memcpy(buf, "/placeholder", size);
558
141
    *data = buf;
559
141
    *len = size;
560
141
    return code;
561
141
}
562
563
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
564
709k
{
565
709k
    int code = 0;
566
709k
    pdf_name *name = (pdf_name *)obj;
567
709k
    int size = name->length + 1;
568
709k
    byte *buf;
569
570
709k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
571
709k
    if (buf == NULL)
572
0
        return_error(gs_error_VMerror);
573
709k
    buf[0] = '/';
574
709k
    memcpy(buf+1, name->data, name->length);
575
709k
    *data = buf;
576
709k
    *len = size;
577
709k
    return code;
578
709k
}
579
580
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
581
230k
{
582
230k
    int code = 0;
583
230k
    int size = 15;
584
230k
    pdf_num *number = (pdf_num *)obj;
585
230k
    char *buf;
586
587
230k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
588
230k
    if (buf == NULL)
589
0
        return_error(gs_error_VMerror);
590
230k
    snprintf(buf, size, "%.4f", number->value.d);
591
230k
    *data = (byte *)buf;
592
230k
    *len = strlen(buf);
593
230k
    return code;
594
230k
}
595
596
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
597
635k
{
598
635k
    int code = 0;
599
635k
    int size = 15;
600
635k
    pdf_num *number = (pdf_num *)obj;
601
635k
    char *buf;
602
603
635k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
604
635k
    if (buf == NULL)
605
0
        return_error(gs_error_VMerror);
606
635k
    snprintf(buf, size, "%"PRId64"", number->value.i);
607
635k
    *data = (byte *)buf;
608
635k
    *len = strlen(buf);
609
635k
    return code;
610
635k
}
611
612
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
613
17.3k
{
614
17.3k
    int size = 100;
615
17.3k
    char *buf;
616
617
17.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
618
17.3k
    if (buf == NULL)
619
0
        return_error(gs_error_VMerror);
620
17.3k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
621
17.3k
    *data = (byte *)buf;
622
17.3k
    *len = strlen(buf);
623
17.3k
    return 0;
624
17.3k
}
625
626
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
627
65.6k
{
628
65.6k
    int code = 0;
629
65.6k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
630
65.6k
    char *buf;
631
65.6k
    pdf_obj *object = NULL;
632
65.6k
    bool use_label = true;
633
634
65.6k
    code = pdfi_loop_detector_mark(ctx);
635
65.6k
    if (code < 0)
636
0
        return code;
637
638
65.6k
    if (ref->is_highlevelform) {
639
16.1k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
640
16.1k
        ref->is_highlevelform = false;
641
49.5k
    } else {
642
49.5k
        if (!ref->is_marking) {
643
21.9k
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
644
21.9k
            if (code == gs_error_undefined) {
645
                /* Do something sensible for undefined reference (this would be a broken file) */
646
                /* TODO: Flag an error? */
647
1.20k
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
648
1.20k
                goto exit;
649
1.20k
            }
650
20.7k
            if (code < 0 && code != gs_error_circular_reference)
651
196
                goto exit;
652
20.5k
            if (code == 0) {
653
20.1k
                if (pdfi_type_of(object) == PDF_STREAM) {
654
644
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
655
644
                    if (code < 0) goto exit;
656
19.4k
                } else if (pdfi_type_of(object) == PDF_DICT) {
657
18.7k
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
658
18.7k
                    if (code < 0) goto exit;
659
18.7k
                } else {
660
674
                    code = pdfi_obj_to_string(ctx, object, data, len);
661
674
                    if (code < 0) goto exit;
662
649
                    use_label = false;
663
649
                }
664
20.1k
            }
665
20.5k
        }
666
47.9k
        if (use_label) {
667
47.3k
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
668
47.3k
            if (code < 0) goto exit;
669
47.3k
            *data = (byte *)buf;
670
47.3k
            *len = strlen(buf);
671
47.3k
        }
672
47.9k
    }
673
674
65.6k
 exit:
675
65.6k
    (void)pdfi_loop_detector_cleartomark(ctx);
676
65.6k
    pdfi_countdown(object);
677
65.6k
    return code;
678
65.6k
}
679
680
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
681
25.1k
{
682
25.1k
    int code = 0;
683
25.1k
    int size = 5;
684
25.1k
    char *buf;
685
686
25.1k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
687
25.1k
    if (buf == NULL)
688
0
        return_error(gs_error_VMerror);
689
25.1k
    if (obj == PDF_TRUE_OBJ) {
690
955
        memcpy(buf, (byte *)"true", 4);
691
955
        *len = 4;
692
24.1k
    } else {
693
24.1k
        memcpy(buf, (byte *)"false", 5);
694
24.1k
        *len = 5;
695
24.1k
    }
696
25.1k
    *data = (byte *)buf;
697
25.1k
    return code;
698
25.1k
}
699
700
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
701
1.06k
{
702
1.06k
    int code = 0;
703
1.06k
    int size = 4;
704
1.06k
    char *buf;
705
706
1.06k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
707
1.06k
    if (buf == NULL)
708
0
        return_error(gs_error_VMerror);
709
1.06k
    memcpy(buf, (byte *)"null", 4);
710
1.06k
    *len = 4;
711
1.06k
    *data = (byte *)buf;
712
1.06k
    return code;
713
1.06k
}
714
715
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
716
97.9k
{
717
97.9k
    pdf_string *string = (pdf_string *)obj;
718
97.9k
    char *buf;
719
97.9k
    int i, length = 0, j;
720
721
3.63M
    for (j=0;j<string->length;j++) {
722
3.53M
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
723
5.96k
                length += 2;
724
3.52M
        else {
725
3.52M
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
726
142k
                length += 4;
727
3.38M
            else
728
3.38M
                length++;
729
3.52M
        }
730
3.53M
    }
731
97.9k
    length += 2;
732
97.9k
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
733
97.9k
    if (buf == NULL)
734
0
        return_error(gs_error_VMerror);
735
97.9k
    buf[0] = '(';
736
97.9k
    i = 1;
737
3.63M
    for (j=0;j<string->length;j++) {
738
3.53M
        switch(string->data[j]) {
739
956
            case 0x0a:
740
956
                buf[i++] = '\\';
741
956
                buf[i++] = 'n';
742
956
                break;
743
819
            case 0x0d:
744
819
                buf[i++] = '\\';
745
819
                buf[i++] = 'r';
746
819
                break;
747
1.99k
            case '(':
748
4.14k
            case ')':
749
4.18k
            case '\\':
750
4.18k
                buf[i++] = '\\';
751
4.18k
                buf[i++] = string->data[j];
752
4.18k
                break;
753
3.52M
            default:
754
3.52M
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
755
142k
                    buf[i++] = '\\';
756
142k
                    buf[i++] = (string->data[j] >> 6) + 0x30;
757
142k
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
758
142k
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
759
142k
                } else
760
3.38M
                buf[i++] = string->data[j];
761
3.52M
                break;
762
3.53M
        }
763
3.53M
    }
764
97.9k
    buf[i++] = ')';
765
766
97.9k
    *len = i;
767
97.9k
    *data = (byte *)buf;
768
97.9k
    return 0;
769
97.9k
}
770
771
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
772
114k
{
773
114k
    int code = 0;
774
114k
    pdf_array *array = (pdf_array *)obj;
775
114k
    pdf_obj *object = NULL;
776
114k
    byte *itembuf = NULL;
777
114k
    int itemsize;
778
114k
    pdfi_bufstream_t bufstream;
779
114k
    uint64_t index, arraysize;
780
781
114k
    code = pdfi_bufstream_init(ctx, &bufstream);
782
114k
    if (code < 0) goto exit;
783
784
114k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
785
114k
    if (code < 0) goto exit;
786
787
114k
    arraysize = pdfi_array_size(array);
788
920k
    for (index = 0; index < arraysize; index++) {
789
806k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
790
806k
        if (code < 0) goto exit;
791
792
806k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
793
806k
        if (code < 0) goto exit;
794
795
805k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
796
805k
        if (code < 0) goto exit;
797
798
805k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
799
805k
        itembuf = NULL;
800
805k
        itemsize = 0;
801
805k
        pdfi_countdown(object);
802
805k
        object = NULL;
803
804
        /* Put a space between elements unless last item */
805
805k
        if (index+1 != arraysize) {
806
692k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
807
692k
            if (code < 0) goto exit;
808
692k
        }
809
805k
    }
810
811
114k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
812
114k
    if (code < 0) goto exit;
813
814
    /* Now copy the results out into the string we can keep */
815
114k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
816
817
114k
 exit:
818
114k
    if (itembuf)
819
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
820
114k
    pdfi_bufstream_free(ctx, &bufstream);
821
114k
    pdfi_countdown(object);
822
114k
    return code;
823
114k
}
824
825
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
826
649
{
827
649
    int code = 0;
828
649
    byte *buf;
829
649
    pdf_stream *stream = (pdf_stream *)obj;
830
649
    int64_t bufsize = 0;
831
649
    pdf_indirect_ref *streamref = NULL;
832
833
    /* TODO: How to deal with stream dictionaries?
834
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
835
     * See 'pushpin' annotation in annotations-galore_II.ps
836
     *
837
     * This will just literally grab the stream data.
838
     */
839
649
    if (stream->is_marking) {
840
570
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
841
570
        if (code < 0) goto exit;
842
570
        *data = buf;
843
570
        *len = (int)bufsize;
844
570
    } else {
845
        /* Create an indirect ref for the stream */
846
79
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
847
79
        if (code < 0) goto exit;
848
79
        pdfi_countup(streamref);
849
79
        streamref->ref_object_num = stream->object_num;
850
79
        streamref->ref_generation_num = stream->generation_num;
851
79
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
852
79
    }
853
854
649
 exit:
855
649
    pdfi_countdown(streamref);
856
649
    return code;
857
649
}
858
859
/* This fetches without dereferencing.  If you want to see the references inline,
860
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
861
 */
862
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
863
63.6k
{
864
63.6k
    int code = 0;
865
63.6k
    pdf_dict *dict = (pdf_dict *)obj;
866
63.6k
    pdf_name *Key = NULL;
867
63.6k
    pdf_obj *Value = NULL;
868
63.6k
    byte *itembuf = NULL;
869
63.6k
    int itemsize;
870
63.6k
    pdfi_bufstream_t bufstream;
871
63.6k
    uint64_t index, dictsize;
872
63.6k
    uint64_t itemnum = 0;
873
874
63.6k
    code = pdfi_loop_detector_mark(ctx);
875
63.6k
    if (code < 0)
876
0
        return code;
877
878
63.6k
    code = pdfi_bufstream_init(ctx, &bufstream);
879
63.6k
    if (code < 0) goto exit;
880
881
63.6k
    dictsize = pdfi_dict_entries(dict);
882
    /* Handle empty dict specially */
883
63.6k
    if (dictsize == 0) {
884
10
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
885
10
        if (code < 0)
886
0
            goto exit;
887
10
        goto exit_copy;
888
10
    }
889
890
63.6k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
891
63.6k
    if (code < 0) goto exit;
892
893
    /* Note: We specifically fetch without dereferencing, so there will be no circular
894
     * references to handle here.
895
     */
896
    /* Wrong.... */
897
898
63.6k
    if (dict->object_num !=0 ) {
899
16.3k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
900
4
            code = gs_note_error(gs_error_circular_reference);
901
4
            goto exit;
902
4
        }
903
16.3k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
904
16.3k
        if (code < 0)
905
0
            goto exit;
906
16.3k
    }
907
908
    /* Get each (key,val) pair from dict and setup param for it */
909
63.6k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
910
77.3k
    while (code >= 0) {
911
77.3k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
912
77.3k
        if (code < 0) goto exit;
913
914
77.3k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
915
77.3k
        if (code < 0) goto exit;
916
917
77.3k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
918
77.3k
        itembuf = NULL;
919
77.3k
        itemsize = 0;
920
921
        /* Put a space between elements */
922
77.3k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
923
77.3k
        if (code < 0) goto exit;
924
925
        /* No dereference */
926
77.3k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
927
77.3k
        if (code < 0) goto exit;
928
77.3k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
929
77.3k
        if (code < 0) goto exit;
930
931
77.1k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
932
77.1k
        if (code < 0) goto exit;
933
934
77.1k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
935
77.1k
        itembuf = NULL;
936
77.1k
        itemsize = 0;
937
938
77.1k
        pdfi_countdown(Value);
939
77.1k
        Value = NULL;
940
77.1k
        pdfi_countdown(Key);
941
77.1k
        Key = NULL;
942
943
77.1k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
944
77.1k
        if (code == gs_error_undefined) {
945
63.4k
            code = 0;
946
63.4k
            break;
947
63.4k
        }
948
13.7k
        if (code < 0) goto exit;
949
950
        /* Put a space between elements */
951
13.7k
        if (++itemnum != dictsize) {
952
13.7k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
953
13.7k
            if (code < 0) goto exit;
954
13.7k
        }
955
13.7k
    }
956
63.4k
    if (code < 0) goto exit;
957
958
63.4k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
959
63.4k
    if (code < 0) goto exit;
960
961
63.4k
 exit_copy:
962
    /* Now copy the results out into the string we can keep */
963
63.4k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
964
965
63.6k
 exit:
966
63.6k
    if (itembuf)
967
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
968
63.6k
    pdfi_countdown(Key);
969
63.6k
    pdfi_countdown(Value);
970
63.6k
    pdfi_bufstream_free(ctx, &bufstream);
971
63.6k
    if (code < 0)
972
206
        (void)pdfi_loop_detector_cleartomark(ctx);
973
63.4k
    else
974
63.4k
        code = pdfi_loop_detector_cleartomark(ctx);
975
63.6k
    return code;
976
63.4k
}
977
978
#define PARAM1(A) # A,
979
#define PARAM2(A,B) A,
980
static const char pdf_token_strings[][10] = {
981
#include "pdf_tokens.h"
982
};
983
984
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
985
3
{
986
3
    int code = 0;
987
3
    const char *s = pdf_token_strings[(uintptr_t)obj];
988
3
    int size = (int)strlen(s) + 1;
989
3
    byte *buf;
990
991
3
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
992
3
    if (buf == NULL)
993
0
        return_error(gs_error_VMerror);
994
3
    memcpy(buf, s, size);
995
3
    *data = buf;
996
3
    *len = size;
997
3
    return code;
998
3
}
999
1000
obj_str_dispatch_t obj_str_dispatch[] = {
1001
    {PDF_NAME, pdfi_obj_name_str},
1002
    {PDF_ARRAY, pdfi_obj_array_str},
1003
    {PDF_REAL, pdfi_obj_real_str},
1004
    {PDF_INT, pdfi_obj_int_str},
1005
    {PDF_BOOL, pdfi_obj_bool_str},
1006
    {PDF_STRING, pdfi_obj_string_str},
1007
    {PDF_DICT, pdfi_obj_dict_str},
1008
    {PDF_STREAM, pdfi_obj_stream_str},
1009
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1010
    {PDF_NULL, pdfi_obj_null_str},
1011
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1012
    {0, NULL}
1013
};
1014
1015
/* Recursive function to build a string from an object
1016
 */
1017
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1018
1.94M
{
1019
1.94M
    obj_str_dispatch_t *dispatch_ptr;
1020
1.94M
    int code = 0;
1021
1.94M
    pdf_obj_type type;
1022
1023
1.94M
    *data = NULL;
1024
1.94M
    *len = 0;
1025
1.94M
    type = pdfi_type_of(obj);
1026
5.93M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1027
5.93M
        if (type == dispatch_ptr->type) {
1028
1.94M
            code = dispatch_ptr->func(ctx, obj, data, len);
1029
1.94M
            goto exit;
1030
1.94M
        }
1031
5.93M
    }
1032
    /* Not implemented, use default */
1033
141
    code = pdfi_obj_default_str(ctx, obj, data, len);
1034
1.94M
 exit:
1035
1.94M
    return code;
1036
141
}
1037
1038
/*********** END obj_to_string module ************/