Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
40.5M
{
39
40.5M
    int bytes = 0;
40
40.5M
    int code = 0;
41
42
40.5M
    switch(type) {
43
877k
        case PDF_ARRAY_MARK:
44
1.61M
        case PDF_DICT_MARK:
45
1.62M
        case PDF_PROC_MARK:
46
1.62M
            bytes = sizeof(pdf_obj);
47
1.62M
            break;
48
12.9M
        case PDF_INT:
49
19.0M
        case PDF_REAL:
50
19.0M
            bytes = sizeof(pdf_num);
51
19.0M
            break;
52
4.18M
        case PDF_STRING:
53
16.1M
        case PDF_NAME:
54
16.1M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
16.1M
            break;
56
2.81k
        case PDF_BUFFER:
57
2.81k
            bytes = sizeof(pdf_buffer);
58
2.81k
            break;
59
871k
        case PDF_ARRAY:
60
871k
            bytes = sizeof(pdf_array);
61
871k
            break;
62
766k
        case PDF_DICT:
63
766k
            bytes = sizeof(pdf_dict);
64
766k
            break;
65
1.14M
        case PDF_INDIRECT:
66
1.14M
            bytes = sizeof(pdf_indirect_ref);
67
1.14M
            break;
68
845k
        case PDF_KEYWORD:
69
845k
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
845k
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
58.1k
        case PDF_STREAM:
78
58.1k
            bytes = sizeof(pdf_stream);
79
58.1k
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
40.5M
    }
86
40.5M
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
40.5M
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
40.5M
    memset(*obj, 0x00, bytes);
93
40.5M
    (*obj)->ctx = ctx;
94
40.5M
    (*obj)->type = type;
95
96
40.5M
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
12.9M
        case PDF_INT:
106
19.0M
        case PDF_REAL:
107
20.2M
        case PDF_INDIRECT:
108
21.0M
        case PDF_ARRAY_MARK:
109
21.8M
        case PDF_DICT_MARK:
110
21.8M
        case PDF_PROC_MARK:
111
21.8M
            break;
112
845k
        case PDF_KEYWORD:
113
5.03M
        case PDF_STRING:
114
17.0M
        case PDF_NAME:
115
17.0M
            ((pdf_string *)*obj)->length = size;
116
17.0M
            break;
117
2.81k
        case PDF_BUFFER:
118
2.81k
            {
119
2.81k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
2.81k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
2.81k
                else {
130
2.81k
                    b->data = NULL;
131
2.81k
                }
132
2.81k
                b->length = size;
133
2.81k
            }
134
0
            break;
135
871k
        case PDF_ARRAY:
136
871k
            {
137
871k
                pdf_obj **values = NULL;
138
139
871k
                ((pdf_array *)*obj)->size = size;
140
871k
                if (size > 0) {
141
805k
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
805k
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
805k
                    ((pdf_array *)*obj)->values = values;
147
805k
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
805k
                }
149
871k
            }
150
871k
            break;
151
871k
        case PDF_DICT:
152
766k
            {
153
766k
                pdf_dict_entry *entries = NULL;
154
155
766k
                ((pdf_dict *)*obj)->size = size;
156
766k
                if (size > 0) {
157
727k
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
727k
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
727k
                    ((pdf_dict *)*obj)->list = entries;
163
727k
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
727k
                }
165
766k
            }
166
766k
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
766k
        case PDF_XREF_TABLE:
171
0
            break;
172
58.1k
        default:
173
58.1k
            break;
174
40.5M
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
40.5M
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
40.5M
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
0
{
192
0
    uint64_t test = 0;
193
0
    int code = 0;
194
195
0
    test = (uint64_t)floor(d);
196
0
    if (d == test) {
197
0
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
0
        if (code < 0)
199
0
            return code;
200
0
        (*num)->value.i = test;
201
0
    }
202
0
    else {
203
0
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
0
        if (code < 0)
205
0
            return code;
206
0
        (*num)->value.d = d;
207
0
    }
208
209
0
    return 0;
210
0
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
16.1M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
16.1M
    pdf_name *n = (pdf_name *)o;
224
225
16.1M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
16.1M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
845k
{
230
845k
    pdf_keyword *k = (pdf_keyword *)o;
231
232
845k
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
845k
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
5.14k
{
237
5.14k
    xref_table_t *xref = (xref_table_t *)o;
238
239
5.14k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
5.14k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
5.14k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
58.1k
{
245
58.1k
    pdf_stream *stream = (pdf_stream *)o;
246
247
58.1k
    pdfi_countdown(stream->stream_dict);
248
58.1k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
58.1k
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
2.81k
{
253
2.81k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
2.81k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
2.81k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
2.81k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
40.6M
{
261
40.6M
    if (o == NULL)
262
41.6k
        return;
263
40.6M
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
40.6M
    switch(o->type) {
266
877k
        case PDF_ARRAY_MARK:
267
1.61M
        case PDF_DICT_MARK:
268
1.62M
        case PDF_PROC_MARK:
269
14.5M
        case PDF_INT:
270
20.6M
        case PDF_REAL:
271
21.8M
        case PDF_INDIRECT:
272
21.8M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
21.8M
            break;
274
4.18M
        case PDF_STRING:
275
16.1M
        case PDF_NAME:
276
16.1M
            pdfi_free_namestring(o);
277
16.1M
            break;
278
2.81k
        case PDF_BUFFER:
279
2.81k
            pdfi_free_buffer(o);
280
2.81k
            break;
281
871k
        case PDF_ARRAY:
282
871k
            pdfi_free_array(o);
283
871k
            break;
284
763k
        case PDF_DICT:
285
763k
            pdfi_free_dict(o);
286
763k
            break;
287
58.1k
        case PDF_STREAM:
288
58.1k
            pdfi_free_stream(o);
289
58.1k
            break;
290
845k
        case PDF_KEYWORD:
291
845k
            pdfi_free_keyword(o);
292
845k
            break;
293
5.14k
        case PDF_XREF_TABLE:
294
5.14k
            pdfi_free_xref_table(o);
295
5.14k
            break;
296
50.6k
        case PDF_FONT:
297
50.6k
            pdfi_free_font(o);
298
50.6k
            break;
299
4.74k
        case PDF_CMAP:
300
4.74k
            pdfi_free_cmap(o);
301
4.74k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
0
        default:
307
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
0
            break;
309
40.6M
    }
310
40.6M
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
58.1k
{
320
58.1k
    int code = 0;
321
58.1k
    pdf_stream *new_stream = NULL;
322
323
58.1k
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
58.1k
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
58.1k
    if (code < 0)
328
0
        goto error_exit;
329
330
58.1k
    new_stream->ctx = ctx;
331
58.1k
    pdfi_countup(new_stream);
332
333
58.1k
    new_stream->stream_dict = dict;
334
58.1k
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
58.1k
    if (do_convert) {
340
54.5k
        new_stream->object_num = dict->object_num;
341
54.5k
        new_stream->generation_num = dict->generation_num;
342
54.5k
        dict->object_num = 0;
343
54.5k
        dict->generation_num = 0;
344
54.5k
    }
345
58.1k
    *stream = new_stream;
346
58.1k
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
58.1k
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
11
{
355
11
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
11
    pdfi_countup(*dict);
359
11
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
11
    return 0;
365
11
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
31
{
370
31
    int code;
371
31
    int length = strlen(charstr);
372
31
    pdf_string *newstr = NULL;
373
374
31
    *string = NULL;
375
376
31
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
31
    if (code < 0) goto exit;
378
379
31
    memcpy(newstr->data, (byte *)charstr, length);
380
381
31
    *string = newstr;
382
31
    pdfi_countup(newstr);
383
31
 exit:
384
31
    return code;
385
31
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
38.7k
{
390
38.7k
    int code;
391
38.7k
    int length = strlen(charstr);
392
38.7k
    pdf_name *newname = NULL;
393
394
38.7k
    *name = NULL;
395
396
38.7k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
38.7k
    if (code < 0) goto exit;
398
399
38.7k
    memcpy(newname->data, (byte *)charstr, length);
400
401
38.7k
    *name = newname;
402
38.7k
    pdfi_countup(newname);
403
38.7k
 exit:
404
38.7k
    return code;
405
38.7k
}
406
407
/************ bufstream module BEGIN **************/
408
0
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
0
{
419
0
    stream->len = INIT_BUF_SIZE;
420
0
    stream->cur = 0;
421
0
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
0
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
0
    return 0;
426
0
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
0
{
430
0
    if (stream->data)
431
0
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
0
    stream->len = 0;
433
0
    stream->cur = 0;
434
0
    stream->data = NULL;
435
0
    return 0;
436
0
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
0
{
441
0
    *buf = stream->data;
442
0
    *len = stream->cur;
443
0
    stream->len = 0;
444
0
    stream->cur = 0;
445
0
    stream->data = NULL;
446
0
    return 0;
447
0
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
0
{
452
0
    byte *data = NULL;
453
0
    uint64_t newsize;
454
455
0
    newsize = stream->len * 2 + needed;
456
0
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
457
0
    if (!data)
458
0
        return_error(gs_error_VMerror);
459
460
0
    memcpy(data, stream->data, stream->len);
461
0
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
462
0
    stream->data = data;
463
0
    stream->len = newsize;
464
465
0
    return 0;
466
0
}
467
468
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
469
0
{
470
0
    int code = 0;
471
472
0
    if (stream->cur + len > stream->len) {
473
0
        code = pdfi_bufstream_increase(ctx, stream, len);
474
0
        if (code < 0)
475
0
            goto exit;
476
0
    }
477
0
    memcpy(stream->data + stream->cur, data, len);
478
0
    stream->cur += len;
479
480
0
 exit:
481
0
    return code;
482
0
}
483
484
/************ bufstream module END **************/
485
486
487
/* Create a c-string to use as object label
488
 * Uses the object_num to make it unique.
489
 * (don't call this for objects with object_num=0, though I am not going to check that here)
490
 *
491
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
492
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
493
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
494
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
495
 * object number and generation.
496
 */
497
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
498
0
{
499
0
    int code = 0, i;
500
0
    int length;
501
0
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
502
0
    char *string = NULL;
503
0
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
504
0
    uint32_t hash = 5381;
505
506
0
    if (ctx->main_stream->s->file_name.data != NULL) {
507
0
        string = (char *)ctx->main_stream->s->file_name.data;
508
0
        length = ctx->main_stream->s->file_name.size;
509
510
0
        for (i=0;i < length;i++) {
511
#if ARCH_IS_BIG_ENDIAN
512
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
513
#else
514
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
515
0
#endif
516
0
        }
517
0
    }
518
519
0
    *label = NULL;
520
0
    length = strlen(template)+30;
521
522
0
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
523
0
    if (string == NULL) {
524
0
        code = gs_note_error(gs_error_VMerror);
525
0
        goto exit;
526
0
    }
527
528
0
    if (pdfi_type_of(obj) == PDF_INDIRECT)
529
0
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
530
0
    else
531
0
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
532
533
0
    *label = string;
534
0
 exit:
535
0
    return code;
536
0
}
537
538
/*********** BEGIN obj_to_string module ************/
539
540
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
541
542
/* Dispatch to get string representation of an object */
543
typedef struct {
544
    pdf_obj_type type;
545
    str_func func;
546
} obj_str_dispatch_t;
547
548
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
549
0
{
550
0
    int code = 0;
551
0
    int size = 12;
552
0
    byte *buf;
553
554
0
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
555
0
    if (buf == NULL)
556
0
        return_error(gs_error_VMerror);
557
0
    memcpy(buf, "/placeholder", size);
558
0
    *data = buf;
559
0
    *len = size;
560
0
    return code;
561
0
}
562
563
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
564
0
{
565
0
    int code = 0;
566
0
    pdf_name *name = (pdf_name *)obj;
567
0
    int size = name->length + 1;
568
0
    byte *buf;
569
570
0
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
571
0
    if (buf == NULL)
572
0
        return_error(gs_error_VMerror);
573
0
    buf[0] = '/';
574
0
    memcpy(buf+1, name->data, name->length);
575
0
    *data = buf;
576
0
    *len = size;
577
0
    return code;
578
0
}
579
580
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
581
0
{
582
0
    int code = 0;
583
0
    int size = 15;
584
0
    pdf_num *number = (pdf_num *)obj;
585
0
    char *buf;
586
587
0
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
588
0
    if (buf == NULL)
589
0
        return_error(gs_error_VMerror);
590
0
    snprintf(buf, size, "%.4f", number->value.d);
591
0
    *data = (byte *)buf;
592
0
    *len = strlen(buf);
593
0
    return code;
594
0
}
595
596
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
597
0
{
598
0
    int code = 0;
599
0
    int size = 15;
600
0
    pdf_num *number = (pdf_num *)obj;
601
0
    char *buf;
602
603
0
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
604
0
    if (buf == NULL)
605
0
        return_error(gs_error_VMerror);
606
0
    snprintf(buf, size, "%"PRId64"", number->value.i);
607
0
    *data = (byte *)buf;
608
0
    *len = strlen(buf);
609
0
    return code;
610
0
}
611
612
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
613
0
{
614
0
    int size = 100;
615
0
    char *buf;
616
617
0
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
618
0
    if (buf == NULL)
619
0
        return_error(gs_error_VMerror);
620
0
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
621
0
    *data = (byte *)buf;
622
0
    *len = strlen(buf);
623
0
    return 0;
624
0
}
625
626
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
627
0
{
628
0
    int code = 0;
629
0
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
630
0
    char *buf;
631
0
    pdf_obj *object = NULL;
632
0
    bool use_label = true;
633
634
0
    code = pdfi_loop_detector_mark(ctx);
635
0
    if (code < 0)
636
0
        return code;
637
638
0
    if (ref->is_highlevelform) {
639
0
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
640
0
        ref->is_highlevelform = false;
641
0
    } else {
642
0
        if (!ref->is_marking) {
643
0
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
644
0
            if (code == gs_error_undefined) {
645
                /* Do something sensible for undefined reference (this would be a broken file) */
646
                /* TODO: Flag an error? */
647
0
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
648
0
                goto exit;
649
0
            }
650
0
            if (code < 0 && code != gs_error_circular_reference)
651
0
                goto exit;
652
0
            if (code == 0) {
653
0
                if (pdfi_type_of(object) == PDF_STREAM) {
654
0
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
655
0
                    if (code < 0) goto exit;
656
0
                } else if (pdfi_type_of(object) == PDF_DICT) {
657
0
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
658
0
                    if (code < 0) goto exit;
659
0
                } else {
660
0
                    code = pdfi_obj_to_string(ctx, object, data, len);
661
0
                    if (code < 0) goto exit;
662
0
                    use_label = false;
663
0
                }
664
0
            }
665
0
        }
666
0
        if (use_label) {
667
0
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
668
0
            if (code < 0) goto exit;
669
0
            *data = (byte *)buf;
670
0
            *len = strlen(buf);
671
0
        }
672
0
    }
673
674
0
 exit:
675
0
    (void)pdfi_loop_detector_cleartomark(ctx);
676
0
    pdfi_countdown(object);
677
0
    return code;
678
0
}
679
680
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
681
0
{
682
0
    int code = 0;
683
0
    int size = 5;
684
0
    char *buf;
685
686
0
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
687
0
    if (buf == NULL)
688
0
        return_error(gs_error_VMerror);
689
0
    if (obj == PDF_TRUE_OBJ) {
690
0
        memcpy(buf, (byte *)"true", 4);
691
0
        *len = 4;
692
0
    } else {
693
0
        memcpy(buf, (byte *)"false", 5);
694
0
        *len = 5;
695
0
    }
696
0
    *data = (byte *)buf;
697
0
    return code;
698
0
}
699
700
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
701
0
{
702
0
    int code = 0;
703
0
    int size = 4;
704
0
    char *buf;
705
706
0
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
707
0
    if (buf == NULL)
708
0
        return_error(gs_error_VMerror);
709
0
    memcpy(buf, (byte *)"null", 4);
710
0
    *len = 4;
711
0
    *data = (byte *)buf;
712
0
    return code;
713
0
}
714
715
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
716
0
{
717
0
    pdf_string *string = (pdf_string *)obj;
718
0
    char *buf;
719
0
    int i, length = 0, j;
720
721
0
    for (j=0;j<string->length;j++) {
722
0
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
723
0
                length += 2;
724
0
        else {
725
0
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
726
0
                length += 4;
727
0
            else
728
0
                length++;
729
0
        }
730
0
    }
731
0
    length += 2;
732
0
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
733
0
    if (buf == NULL)
734
0
        return_error(gs_error_VMerror);
735
0
    buf[0] = '(';
736
0
    i = 1;
737
0
    for (j=0;j<string->length;j++) {
738
0
        switch(string->data[j]) {
739
0
            case 0x0a:
740
0
                buf[i++] = '\\';
741
0
                buf[i++] = 'n';
742
0
                break;
743
0
            case 0x0d:
744
0
                buf[i++] = '\\';
745
0
                buf[i++] = 'r';
746
0
                break;
747
0
            case '(':
748
0
            case ')':
749
0
            case '\\':
750
0
                buf[i++] = '\\';
751
0
                buf[i++] = string->data[j];
752
0
                break;
753
0
            default:
754
0
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
755
0
                    buf[i++] = '\\';
756
0
                    buf[i++] = (string->data[j] >> 6) + 0x30;
757
0
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
758
0
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
759
0
                } else
760
0
                buf[i++] = string->data[j];
761
0
                break;
762
0
        }
763
0
    }
764
0
    buf[i++] = ')';
765
766
0
    *len = i;
767
0
    *data = (byte *)buf;
768
0
    return 0;
769
0
}
770
771
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
772
0
{
773
0
    int code = 0;
774
0
    pdf_array *array = (pdf_array *)obj;
775
0
    pdf_obj *object = NULL;
776
0
    byte *itembuf = NULL;
777
0
    int itemsize;
778
0
    pdfi_bufstream_t bufstream;
779
0
    uint64_t index, arraysize;
780
781
0
    code = pdfi_bufstream_init(ctx, &bufstream);
782
0
    if (code < 0) goto exit;
783
784
0
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
785
0
    if (code < 0) goto exit;
786
787
0
    arraysize = pdfi_array_size(array);
788
0
    for (index = 0; index < arraysize; index++) {
789
0
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
790
0
        if (code < 0) goto exit;
791
792
0
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
793
0
        if (code < 0) goto exit;
794
795
0
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
796
0
        if (code < 0) goto exit;
797
798
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
799
0
        itembuf = NULL;
800
0
        itemsize = 0;
801
0
        pdfi_countdown(object);
802
0
        object = NULL;
803
804
        /* Put a space between elements unless last item */
805
0
        if (index+1 != arraysize) {
806
0
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
807
0
            if (code < 0) goto exit;
808
0
        }
809
0
    }
810
811
0
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
812
0
    if (code < 0) goto exit;
813
814
    /* Now copy the results out into the string we can keep */
815
0
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
816
817
0
 exit:
818
0
    if (itembuf)
819
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
820
0
    pdfi_bufstream_free(ctx, &bufstream);
821
0
    pdfi_countdown(object);
822
0
    return code;
823
0
}
824
825
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
826
0
{
827
0
    int code = 0;
828
0
    byte *buf;
829
0
    pdf_stream *stream = (pdf_stream *)obj;
830
0
    int64_t bufsize = 0;
831
0
    pdf_indirect_ref *streamref = NULL;
832
833
    /* TODO: How to deal with stream dictionaries?
834
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
835
     * See 'pushpin' annotation in annotations-galore_II.ps
836
     *
837
     * This will just literally grab the stream data.
838
     */
839
0
    if (stream->is_marking) {
840
0
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
841
0
        if (code < 0) goto exit;
842
0
        *data = buf;
843
0
        *len = (int)bufsize;
844
0
    } else {
845
        /* Create an indirect ref for the stream */
846
0
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
847
0
        if (code < 0) goto exit;
848
0
        pdfi_countup(streamref);
849
0
        streamref->ref_object_num = stream->object_num;
850
0
        streamref->ref_generation_num = stream->generation_num;
851
0
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
852
0
    }
853
854
0
 exit:
855
0
    pdfi_countdown(streamref);
856
0
    return code;
857
0
}
858
859
/* This fetches without dereferencing.  If you want to see the references inline,
860
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
861
 */
862
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
863
0
{
864
0
    int code = 0;
865
0
    pdf_dict *dict = (pdf_dict *)obj;
866
0
    pdf_name *Key = NULL;
867
0
    pdf_obj *Value = NULL;
868
0
    byte *itembuf = NULL;
869
0
    int itemsize;
870
0
    pdfi_bufstream_t bufstream;
871
0
    uint64_t index, dictsize;
872
0
    uint64_t itemnum = 0;
873
874
0
    code = pdfi_loop_detector_mark(ctx);
875
0
    if (code < 0)
876
0
        return code;
877
878
0
    code = pdfi_bufstream_init(ctx, &bufstream);
879
0
    if (code < 0) goto exit;
880
881
0
    dictsize = pdfi_dict_entries(dict);
882
    /* Handle empty dict specially */
883
0
    if (dictsize == 0) {
884
0
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
885
0
        if (code < 0)
886
0
            goto exit;
887
0
        goto exit_copy;
888
0
    }
889
890
0
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
891
0
    if (code < 0) goto exit;
892
893
    /* Note: We specifically fetch without dereferencing, so there will be no circular
894
     * references to handle here.
895
     */
896
    /* Wrong.... */
897
898
0
    if (dict->object_num !=0 ) {
899
0
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
900
0
            code = gs_note_error(gs_error_circular_reference);
901
0
            goto exit;
902
0
        }
903
0
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
904
0
        if (code < 0)
905
0
            goto exit;
906
0
    }
907
908
    /* Get each (key,val) pair from dict and setup param for it */
909
0
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
910
0
    while (code >= 0) {
911
0
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
912
0
        if (code < 0) goto exit;
913
914
0
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
915
0
        if (code < 0) goto exit;
916
917
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
918
0
        itembuf = NULL;
919
0
        itemsize = 0;
920
921
        /* Put a space between elements */
922
0
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
923
0
        if (code < 0) goto exit;
924
925
        /* No dereference */
926
0
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
927
0
        if (code < 0) goto exit;
928
0
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
929
0
        if (code < 0) goto exit;
930
931
0
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
932
0
        if (code < 0) goto exit;
933
934
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
935
0
        itembuf = NULL;
936
0
        itemsize = 0;
937
938
0
        pdfi_countdown(Value);
939
0
        Value = NULL;
940
0
        pdfi_countdown(Key);
941
0
        Key = NULL;
942
943
0
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
944
0
        if (code == gs_error_undefined) {
945
0
            code = 0;
946
0
            break;
947
0
        }
948
0
        if (code < 0) goto exit;
949
950
        /* Put a space between elements */
951
0
        if (++itemnum != dictsize) {
952
0
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
953
0
            if (code < 0) goto exit;
954
0
        }
955
0
    }
956
0
    if (code < 0) goto exit;
957
958
0
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
959
0
    if (code < 0) goto exit;
960
961
0
 exit_copy:
962
    /* Now copy the results out into the string we can keep */
963
0
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
964
965
0
 exit:
966
0
    if (itembuf)
967
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
968
0
    pdfi_countdown(Key);
969
0
    pdfi_countdown(Value);
970
0
    pdfi_bufstream_free(ctx, &bufstream);
971
0
    if (code < 0)
972
0
        (void)pdfi_loop_detector_cleartomark(ctx);
973
0
    else
974
0
        code = pdfi_loop_detector_cleartomark(ctx);
975
0
    return code;
976
0
}
977
978
#define PARAM1(A) # A,
979
#define PARAM2(A,B) A,
980
static const char pdf_token_strings[][10] = {
981
#include "pdf_tokens.h"
982
};
983
984
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
985
0
{
986
0
    int code = 0;
987
0
    const char *s = pdf_token_strings[(uintptr_t)obj];
988
0
    int size = (int)strlen(s) + 1;
989
0
    byte *buf;
990
991
0
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
992
0
    if (buf == NULL)
993
0
        return_error(gs_error_VMerror);
994
0
    memcpy(buf, s, size);
995
0
    *data = buf;
996
0
    *len = size;
997
0
    return code;
998
0
}
999
1000
obj_str_dispatch_t obj_str_dispatch[] = {
1001
    {PDF_NAME, pdfi_obj_name_str},
1002
    {PDF_ARRAY, pdfi_obj_array_str},
1003
    {PDF_REAL, pdfi_obj_real_str},
1004
    {PDF_INT, pdfi_obj_int_str},
1005
    {PDF_BOOL, pdfi_obj_bool_str},
1006
    {PDF_STRING, pdfi_obj_string_str},
1007
    {PDF_DICT, pdfi_obj_dict_str},
1008
    {PDF_STREAM, pdfi_obj_stream_str},
1009
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1010
    {PDF_NULL, pdfi_obj_null_str},
1011
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1012
    {0, NULL}
1013
};
1014
1015
/* Recursive function to build a string from an object
1016
 */
1017
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1018
0
{
1019
0
    obj_str_dispatch_t *dispatch_ptr;
1020
0
    int code = 0;
1021
0
    pdf_obj_type type;
1022
1023
0
    *data = NULL;
1024
0
    *len = 0;
1025
0
    type = pdfi_type_of(obj);
1026
0
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1027
0
        if (type == dispatch_ptr->type) {
1028
0
            code = dispatch_ptr->func(ctx, obj, data, len);
1029
0
            goto exit;
1030
0
        }
1031
0
    }
1032
    /* Not implemented, use default */
1033
0
    code = pdfi_obj_default_str(ctx, obj, data, len);
1034
0
 exit:
1035
0
    return code;
1036
0
}
1037
1038
/*********** END obj_to_string module ************/