Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
#include "stream.h"
28
29
/***********************************************************************************/
30
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
31
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
32
/* 'size' argument to create an object with the correct numbr of entries or of the */
33
/* requested size. Simple objects (integers etc) ignore this parameter.            */
34
/* Objects do not get their data assigned, that's up to the caller, but we do      */
35
/* set the length or size fields for composite objects.                             */
36
37
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
38
804M
{
39
804M
    int bytes = 0;
40
804M
    int code = 0;
41
42
804M
    switch(type) {
43
14.9M
        case PDF_ARRAY_MARK:
44
27.8M
        case PDF_DICT_MARK:
45
28.4M
        case PDF_PROC_MARK:
46
28.4M
            bytes = sizeof(pdf_obj);
47
28.4M
            break;
48
212M
        case PDF_INT:
49
339M
        case PDF_REAL:
50
339M
            bytes = sizeof(pdf_num);
51
339M
            break;
52
72.6M
        case PDF_STRING:
53
373M
        case PDF_NAME:
54
373M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
55
373M
            break;
56
41.3k
        case PDF_BUFFER:
57
41.3k
            bytes = sizeof(pdf_buffer);
58
41.3k
            break;
59
14.8M
        case PDF_ARRAY:
60
14.8M
            bytes = sizeof(pdf_array);
61
14.8M
            break;
62
13.4M
        case PDF_DICT:
63
13.4M
            bytes = sizeof(pdf_dict);
64
13.4M
            break;
65
18.4M
        case PDF_INDIRECT:
66
18.4M
            bytes = sizeof(pdf_indirect_ref);
67
18.4M
            break;
68
15.7M
        case PDF_KEYWORD:
69
15.7M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
70
15.7M
            break;
71
        /* The following aren't PDF object types, but are objects we either want to
72
         * reference count, or store on the stack.
73
         */
74
0
        case PDF_XREF_TABLE:
75
0
            bytes = sizeof(xref_table_t);
76
0
            break;
77
1.03M
        case PDF_STREAM:
78
1.03M
            bytes = sizeof(pdf_stream);
79
1.03M
            break;
80
0
        case PDF_NULL:
81
0
        case PDF_BOOL:
82
0
        default:
83
0
            code = gs_note_error(gs_error_typecheck);
84
0
            goto error_out;
85
804M
    }
86
804M
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
87
804M
    if (*obj == NULL) {
88
0
        code = gs_note_error(gs_error_VMerror);
89
0
        goto error_out;
90
0
    }
91
92
804M
    memset(*obj, 0x00, bytes);
93
804M
    (*obj)->ctx = ctx;
94
804M
    (*obj)->type = type;
95
96
804M
    switch(type) {
97
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
98
        and we will return an error in the switch above if we get a call to allocate
99
        one of these. Having the cases isn't harmful but Coverity complains of dead
100
        code, so commenting these out to silence Coverity while preserving the old
101
        semantics to indicate what's happening.
102
        case PDF_NULL:
103
        case PDF_BOOL: */
104
105
212M
        case PDF_INT:
106
339M
        case PDF_REAL:
107
358M
        case PDF_INDIRECT:
108
373M
        case PDF_ARRAY_MARK:
109
386M
        case PDF_DICT_MARK:
110
386M
        case PDF_PROC_MARK:
111
386M
            break;
112
15.7M
        case PDF_KEYWORD:
113
88.3M
        case PDF_STRING:
114
388M
        case PDF_NAME:
115
388M
            ((pdf_string *)*obj)->length = size;
116
388M
            break;
117
41.3k
        case PDF_BUFFER:
118
41.3k
            {
119
41.3k
                pdf_buffer *b = (pdf_buffer *)*obj;
120
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
121
                */
122
41.3k
                if (size > 0) {
123
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
124
0
                    if (b->data == NULL) {
125
0
                        code = gs_note_error(gs_error_VMerror);
126
0
                        goto error_out;
127
0
                    }
128
0
                }
129
41.3k
                else {
130
41.3k
                    b->data = NULL;
131
41.3k
                }
132
41.3k
                b->length = size;
133
41.3k
            }
134
0
            break;
135
14.8M
        case PDF_ARRAY:
136
14.8M
            {
137
14.8M
                pdf_obj **values = NULL;
138
139
14.8M
                ((pdf_array *)*obj)->size = size;
140
14.8M
                if (size > 0) {
141
13.9M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_obj *), "pdfi_object_alloc");
142
13.9M
                    if (values == NULL) {
143
0
                        code = gs_note_error(gs_error_VMerror);
144
0
                        goto error_out;
145
0
                    }
146
13.9M
                    ((pdf_array *)*obj)->values = values;
147
13.9M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
148
13.9M
                }
149
14.8M
            }
150
14.8M
            break;
151
14.8M
        case PDF_DICT:
152
13.4M
            {
153
13.4M
                pdf_dict_entry *entries = NULL;
154
155
13.4M
                ((pdf_dict *)*obj)->size = size;
156
13.4M
                if (size > 0) {
157
12.8M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
158
12.8M
                    if (entries == NULL) {
159
0
                        code = gs_note_error(gs_error_VMerror);
160
0
                        goto error_out;
161
0
                    }
162
12.8M
                    ((pdf_dict *)*obj)->list = entries;
163
12.8M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
164
12.8M
                }
165
13.4M
            }
166
13.4M
            break;
167
        /* The following aren't PDF object types, but are objects we either want to
168
         * reference count, or store on the stack.
169
         */
170
13.4M
        case PDF_XREF_TABLE:
171
0
            break;
172
1.03M
        default:
173
1.03M
            break;
174
804M
    }
175
#if REFCNT_DEBUG
176
    (*obj)->UID = ctx->ref_UID++;
177
    outprintf(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
178
#endif
179
804M
    return 0;
180
0
error_out:
181
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
182
0
    *obj = NULL;
183
0
    return code;
184
804M
}
185
186
/* Create a PDF number object from a numeric value. Attempts to create
187
 * either a REAL or INT as appropriate. As usual for the alloc functions
188
 * this returns an object with a reference count of 0.
189
 */
190
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
191
29.0k
{
192
29.0k
    uint64_t test = 0;
193
29.0k
    int code = 0;
194
195
29.0k
    test = (uint64_t)floor(d);
196
29.0k
    if (d == test) {
197
22.8k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
198
22.8k
        if (code < 0)
199
0
            return code;
200
22.8k
        (*num)->value.i = test;
201
22.8k
    }
202
6.15k
    else {
203
6.15k
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
204
6.15k
        if (code < 0)
205
0
            return code;
206
6.15k
        (*num)->value.d = d;
207
6.15k
    }
208
209
29.0k
    return 0;
210
29.0k
}
211
212
/***********************************************************************************/
213
/* Functions to free the various kinds of 'PDF objects'.                           */
214
/* All objects are reference counted, newly allocated objects, as noted above have */
215
/* a reference count of 0. Pushing an object onto the stack increments             */
216
/* its reference count, popping it from the stack decrements its reference count.  */
217
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
218
/* pdfi_free_object() to free it.                                                  */
219
220
static void pdfi_free_namestring(pdf_obj *o)
221
373M
{
222
    /* Currently names and strings are the same, so a single cast is OK */
223
373M
    pdf_name *n = (pdf_name *)o;
224
225
373M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
226
373M
}
227
228
static void pdfi_free_keyword(pdf_obj *o)
229
15.7M
{
230
15.7M
    pdf_keyword *k = (pdf_keyword *)o;
231
232
15.7M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
233
15.7M
}
234
235
static void pdfi_free_xref_table(pdf_obj *o)
236
84.7k
{
237
84.7k
    xref_table_t *xref = (xref_table_t *)o;
238
239
84.7k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
240
84.7k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
241
84.7k
}
242
243
static void pdfi_free_stream(pdf_obj *o)
244
1.03M
{
245
1.03M
    pdf_stream *stream = (pdf_stream *)o;
246
247
1.03M
    pdfi_countdown(stream->stream_dict);
248
1.03M
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
249
1.03M
}
250
251
static void pdfi_free_buffer(pdf_obj *o)
252
41.3k
{
253
41.3k
    pdf_buffer *b = (pdf_buffer *)o;
254
255
41.3k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
256
41.3k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
257
41.3k
}
258
259
void pdfi_free_object(pdf_obj *o)
260
806M
{
261
806M
    if (o == NULL)
262
680k
        return;
263
805M
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
264
0
        return;
265
805M
    switch(o->type) {
266
14.9M
        case PDF_ARRAY_MARK:
267
27.8M
        case PDF_DICT_MARK:
268
28.4M
        case PDF_PROC_MARK:
269
240M
        case PDF_INT:
270
368M
        case PDF_REAL:
271
386M
        case PDF_INDIRECT:
272
386M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
273
386M
            break;
274
72.6M
        case PDF_STRING:
275
373M
        case PDF_NAME:
276
373M
            pdfi_free_namestring(o);
277
373M
            break;
278
41.3k
        case PDF_BUFFER:
279
41.3k
            pdfi_free_buffer(o);
280
41.3k
            break;
281
14.8M
        case PDF_ARRAY:
282
14.8M
            pdfi_free_array(o);
283
14.8M
            break;
284
13.4M
        case PDF_DICT:
285
13.4M
            pdfi_free_dict(o);
286
13.4M
            break;
287
1.03M
        case PDF_STREAM:
288
1.03M
            pdfi_free_stream(o);
289
1.03M
            break;
290
15.7M
        case PDF_KEYWORD:
291
15.7M
            pdfi_free_keyword(o);
292
15.7M
            break;
293
84.7k
        case PDF_XREF_TABLE:
294
84.7k
            pdfi_free_xref_table(o);
295
84.7k
            break;
296
984k
        case PDF_FONT:
297
984k
            pdfi_free_font(o);
298
984k
            break;
299
78.5k
        case PDF_CMAP:
300
78.5k
            pdfi_free_cmap(o);
301
78.5k
            break;
302
0
        case PDF_BOOL:
303
0
        case PDF_NULL:
304
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
305
0
            break;
306
21
        default:
307
21
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
308
21
            break;
309
805M
    }
310
805M
}
311
312
313
/* Convert a pdf_dict to a pdf_stream.
314
 * do_convert -- convert the stream to use same object num as dict
315
 *               (This assumes the dict has not been cached.)
316
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
317
 */
318
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
319
1.03M
{
320
1.03M
    int code = 0;
321
1.03M
    pdf_stream *new_stream = NULL;
322
323
1.03M
    if (pdfi_type_of(dict) != PDF_DICT)
324
0
        return_error(gs_error_typecheck);
325
326
1.03M
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
327
1.03M
    if (code < 0)
328
0
        goto error_exit;
329
330
1.03M
    new_stream->ctx = ctx;
331
1.03M
    pdfi_countup(new_stream);
332
333
1.03M
    new_stream->stream_dict = dict;
334
1.03M
    pdfi_countup(dict);
335
336
    /* this replaces the dict with the stream.
337
     * assumes it's not cached
338
     */
339
1.03M
    if (do_convert) {
340
966k
        new_stream->object_num = dict->object_num;
341
966k
        new_stream->generation_num = dict->generation_num;
342
966k
        dict->object_num = 0;
343
966k
        dict->generation_num = 0;
344
966k
    }
345
1.03M
    *stream = new_stream;
346
1.03M
    return 0;
347
348
0
 error_exit:
349
0
    pdfi_countdown(new_stream);
350
0
    return code;
351
1.03M
}
352
353
int pdfi_get_stream_dict(pdf_context *ctx, pdf_stream *stream, pdf_dict **dict)
354
187
{
355
187
    *dict = stream->stream_dict;
356
357
    /* Make sure the dictionary won't go away */
358
187
    pdfi_countup(*dict);
359
187
    if ((*dict)->object_num == 0) {
360
0
        (*dict)->object_num = stream->object_num;
361
0
        (*dict)->generation_num = stream->generation_num;
362
0
    }
363
364
187
    return 0;
365
187
}
366
367
/* Create a pdf_string from a c char * */
368
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
369
404
{
370
404
    int code;
371
404
    int length = strlen(charstr);
372
404
    pdf_string *newstr = NULL;
373
374
404
    *string = NULL;
375
376
404
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
377
404
    if (code < 0) goto exit;
378
379
404
    memcpy(newstr->data, (byte *)charstr, length);
380
381
404
    *string = newstr;
382
404
    pdfi_countup(newstr);
383
404
 exit:
384
404
    return code;
385
404
}
386
387
/* Create a pdf_name from a c char * */
388
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
389
811k
{
390
811k
    int code;
391
811k
    int length = strlen(charstr);
392
811k
    pdf_name *newname = NULL;
393
394
811k
    *name = NULL;
395
396
811k
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
397
811k
    if (code < 0) goto exit;
398
399
811k
    memcpy(newname->data, (byte *)charstr, length);
400
401
811k
    *name = newname;
402
811k
    pdfi_countup(newname);
403
811k
 exit:
404
811k
    return code;
405
811k
}
406
407
/************ bufstream module BEGIN **************/
408
129k
#define INIT_BUF_SIZE 256
409
410
typedef struct {
411
    int len;  /* Length of buffer */
412
    int cur;  /* Current position */
413
    byte *data;
414
} pdfi_bufstream_t;
415
416
417
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
418
129k
{
419
129k
    stream->len = INIT_BUF_SIZE;
420
129k
    stream->cur = 0;
421
129k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
422
423
129k
    if (!stream->data)
424
0
        return_error(gs_error_VMerror);
425
129k
    return 0;
426
129k
}
427
428
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
429
129k
{
430
129k
    if (stream->data)
431
241
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
432
129k
    stream->len = 0;
433
129k
    stream->cur = 0;
434
129k
    stream->data = NULL;
435
129k
    return 0;
436
129k
}
437
438
/* Grab a copy of the stream's buffer */
439
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
440
129k
{
441
129k
    *buf = stream->data;
442
129k
    *len = stream->cur;
443
129k
    stream->len = 0;
444
129k
    stream->cur = 0;
445
129k
    stream->data = NULL;
446
129k
    return 0;
447
129k
}
448
449
/* Increase the size of the buffer by doubling and added the known needed amount */
450
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
451
5.28k
{
452
5.28k
    byte *data = NULL;
453
5.28k
    uint64_t newsize;
454
455
5.28k
    newsize = stream->len * 2 + needed;
456
5.28k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
457
5.28k
    if (!data)
458
0
        return_error(gs_error_VMerror);
459
460
5.28k
    memcpy(data, stream->data, stream->len);
461
5.28k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
462
5.28k
    stream->data = data;
463
5.28k
    stream->len = newsize;
464
465
5.28k
    return 0;
466
5.28k
}
467
468
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
469
1.52M
{
470
1.52M
    int code = 0;
471
472
1.52M
    if (stream->cur + len > stream->len) {
473
5.28k
        code = pdfi_bufstream_increase(ctx, stream, len);
474
5.28k
        if (code < 0)
475
0
            goto exit;
476
5.28k
    }
477
1.52M
    memcpy(stream->data + stream->cur, data, len);
478
1.52M
    stream->cur += len;
479
480
1.52M
 exit:
481
1.52M
    return code;
482
1.52M
}
483
484
/************ bufstream module END **************/
485
486
487
/* Create a c-string to use as object label
488
 * Uses the object_num to make it unique.
489
 * (don't call this for objects with object_num=0, though I am not going to check that here)
490
 *
491
 * Bug #708127; just the object number alone is insufficient. Two consecutive input files might use the
492
 * same object number for a pdfmark, but with different content, we need to differntiate between the two.
493
 * Add a simple hash of the input filename (uses the same dumb but fast hash as pattern ID generation), this gives
494
 * the last bytes in the filename more say in the final result so is 'probably' sufficiently unique with the
495
 * object number and generation.
496
 */
497
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
498
33.7k
{
499
33.7k
    int code = 0, i;
500
33.7k
    int length;
501
33.7k
    const char *template = "{Obj%dG%dF%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
502
33.7k
    char *string = NULL;
503
33.7k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
504
33.7k
    uint32_t hash = 5381;
505
506
33.7k
    if (ctx->main_stream->s->file_name.data != NULL) {
507
0
        string = (char *)ctx->main_stream->s->file_name.data;
508
0
        length = ctx->main_stream->s->file_name.size;
509
510
0
        for (i=0;i < length;i++) {
511
#if ARCH_IS_BIG_ENDIAN
512
            hash = ((hash << 5) + hash) + string[length - 1 - i]; /* hash * 33 + c */
513
#else
514
0
            hash = ((hash << 5) + hash) + string[i]; /* hash * 33 + c */
515
0
#endif
516
0
        }
517
0
    }
518
519
33.7k
    *label = NULL;
520
33.7k
    length = strlen(template)+30;
521
522
33.7k
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
523
33.7k
    if (string == NULL) {
524
0
        code = gs_note_error(gs_error_VMerror);
525
0
        goto exit;
526
0
    }
527
528
33.7k
    if (pdfi_type_of(obj) == PDF_INDIRECT)
529
33.6k
        gs_snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num, hash);
530
52
    else
531
52
        gs_snprintf(string, length, template, obj->object_num, obj->generation_num, hash);
532
533
33.7k
    *label = string;
534
33.7k
 exit:
535
33.7k
    return code;
536
33.7k
}
537
538
/*********** BEGIN obj_to_string module ************/
539
540
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
541
542
/* Dispatch to get string representation of an object */
543
typedef struct {
544
    pdf_obj_type type;
545
    str_func func;
546
} obj_str_dispatch_t;
547
548
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
549
64
{
550
64
    int code = 0;
551
64
    int size = 12;
552
64
    byte *buf;
553
554
64
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
555
64
    if (buf == NULL)
556
0
        return_error(gs_error_VMerror);
557
64
    memcpy(buf, "/placeholder", size);
558
64
    *data = buf;
559
64
    *len = size;
560
64
    return code;
561
64
}
562
563
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
564
513k
{
565
513k
    int code = 0;
566
513k
    pdf_name *name = (pdf_name *)obj;
567
513k
    int size = name->length + 1;
568
513k
    byte *buf;
569
570
513k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
571
513k
    if (buf == NULL)
572
0
        return_error(gs_error_VMerror);
573
513k
    buf[0] = '/';
574
513k
    memcpy(buf+1, name->data, name->length);
575
513k
    *data = buf;
576
513k
    *len = size;
577
513k
    return code;
578
513k
}
579
580
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
581
168k
{
582
168k
    int code = 0;
583
168k
    int size = 15;
584
168k
    pdf_num *number = (pdf_num *)obj;
585
168k
    char *buf;
586
587
168k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
588
168k
    if (buf == NULL)
589
0
        return_error(gs_error_VMerror);
590
168k
    snprintf(buf, size, "%.4f", number->value.d);
591
168k
    *data = (byte *)buf;
592
168k
    *len = strlen(buf);
593
168k
    return code;
594
168k
}
595
596
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
597
456k
{
598
456k
    int code = 0;
599
456k
    int size = 15;
600
456k
    pdf_num *number = (pdf_num *)obj;
601
456k
    char *buf;
602
603
456k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
604
456k
    if (buf == NULL)
605
0
        return_error(gs_error_VMerror);
606
456k
    snprintf(buf, size, "%"PRId64"", number->value.i);
607
456k
    *data = (byte *)buf;
608
456k
    *len = strlen(buf);
609
456k
    return code;
610
456k
}
611
612
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
613
12.8k
{
614
12.8k
    int size = 100;
615
12.8k
    char *buf;
616
617
12.8k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
618
12.8k
    if (buf == NULL)
619
0
        return_error(gs_error_VMerror);
620
12.8k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
621
12.8k
    *data = (byte *)buf;
622
12.8k
    *len = strlen(buf);
623
12.8k
    return 0;
624
12.8k
}
625
626
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
627
47.1k
{
628
47.1k
    int code = 0;
629
47.1k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
630
47.1k
    char *buf;
631
47.1k
    pdf_obj *object = NULL;
632
47.1k
    bool use_label = true;
633
634
47.1k
    code = pdfi_loop_detector_mark(ctx);
635
47.1k
    if (code < 0)
636
0
        return code;
637
638
47.1k
    if (ref->is_highlevelform) {
639
11.6k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
640
11.6k
        ref->is_highlevelform = false;
641
35.5k
    } else {
642
35.5k
        if (!ref->is_marking) {
643
17.1k
            code = pdfi_dereference(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
644
17.1k
            if (code == gs_error_undefined) {
645
                /* Do something sensible for undefined reference (this would be a broken file) */
646
                /* TODO: Flag an error? */
647
1.26k
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
648
1.26k
                goto exit;
649
1.26k
            }
650
15.8k
            if (code < 0 && code != gs_error_circular_reference)
651
138
                goto exit;
652
15.7k
            if (code == 0) {
653
15.2k
                if (pdfi_type_of(object) == PDF_STREAM) {
654
478
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
655
478
                    if (code < 0) goto exit;
656
14.8k
                } else if (pdfi_type_of(object) == PDF_DICT) {
657
14.4k
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
658
14.4k
                    if (code < 0) goto exit;
659
14.4k
                } else {
660
399
                    code = pdfi_obj_to_string(ctx, object, data, len);
661
399
                    if (code < 0) goto exit;
662
376
                    use_label = false;
663
376
                }
664
15.2k
            }
665
15.7k
        }
666
34.0k
        if (use_label) {
667
33.6k
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
668
33.6k
            if (code < 0) goto exit;
669
33.6k
            *data = (byte *)buf;
670
33.6k
            *len = strlen(buf);
671
33.6k
        }
672
34.0k
    }
673
674
47.1k
 exit:
675
47.1k
    (void)pdfi_loop_detector_cleartomark(ctx);
676
47.1k
    pdfi_countdown(object);
677
47.1k
    return code;
678
47.1k
}
679
680
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
681
18.2k
{
682
18.2k
    int code = 0;
683
18.2k
    int size = 5;
684
18.2k
    char *buf;
685
686
18.2k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
687
18.2k
    if (buf == NULL)
688
0
        return_error(gs_error_VMerror);
689
18.2k
    if (obj == PDF_TRUE_OBJ) {
690
559
        memcpy(buf, (byte *)"true", 4);
691
559
        *len = 4;
692
17.7k
    } else {
693
17.7k
        memcpy(buf, (byte *)"false", 5);
694
17.7k
        *len = 5;
695
17.7k
    }
696
18.2k
    *data = (byte *)buf;
697
18.2k
    return code;
698
18.2k
}
699
700
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
701
795
{
702
795
    int code = 0;
703
795
    int size = 4;
704
795
    char *buf;
705
706
795
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
707
795
    if (buf == NULL)
708
0
        return_error(gs_error_VMerror);
709
795
    memcpy(buf, (byte *)"null", 4);
710
795
    *len = 4;
711
795
    *data = (byte *)buf;
712
795
    return code;
713
795
}
714
715
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
716
70.1k
{
717
70.1k
    pdf_string *string = (pdf_string *)obj;
718
70.1k
    char *buf;
719
70.1k
    int i, length = 0, j;
720
721
2.63M
    for (j=0;j<string->length;j++) {
722
2.56M
        if (string->data[j] == 0x0a || string->data[j] == 0x0d || string->data[j] == '(' || string->data[j] == ')' || string->data[j] == '\\')
723
4.27k
                length += 2;
724
2.55M
        else {
725
2.55M
            if (string->data[j] < 0x20 || string->data[j] > 0x7F || string->data[j] == '\\')
726
123k
                length += 4;
727
2.43M
            else
728
2.43M
                length++;
729
2.55M
        }
730
2.56M
    }
731
70.1k
    length += 2;
732
70.1k
    buf = (char *)gs_alloc_bytes(ctx->memory, length, "pdfi_obj_string_str(data)");
733
70.1k
    if (buf == NULL)
734
0
        return_error(gs_error_VMerror);
735
70.1k
    buf[0] = '(';
736
70.1k
    i = 1;
737
2.63M
    for (j=0;j<string->length;j++) {
738
2.56M
        switch(string->data[j]) {
739
652
            case 0x0a:
740
652
                buf[i++] = '\\';
741
652
                buf[i++] = 'n';
742
652
                break;
743
659
            case 0x0d:
744
659
                buf[i++] = '\\';
745
659
                buf[i++] = 'r';
746
659
                break;
747
1.39k
            case '(':
748
2.94k
            case ')':
749
2.96k
            case '\\':
750
2.96k
                buf[i++] = '\\';
751
2.96k
                buf[i++] = string->data[j];
752
2.96k
                break;
753
2.55M
            default:
754
2.55M
                if (string->data[j] < 0x20 || string->data[j] > 0x7F) {
755
123k
                    buf[i++] = '\\';
756
123k
                    buf[i++] = (string->data[j] >> 6) + 0x30;
757
123k
                    buf[i++] = ((string->data[j] & 0x3F) >> 3) + 0x30;
758
123k
                    buf[i++] = (string->data[j] & 0x07) + 0x30;
759
123k
                } else
760
2.43M
                buf[i++] = string->data[j];
761
2.55M
                break;
762
2.56M
        }
763
2.56M
    }
764
70.1k
    buf[i++] = ')';
765
766
70.1k
    *len = i;
767
70.1k
    *data = (byte *)buf;
768
70.1k
    return 0;
769
70.1k
}
770
771
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
772
83.2k
{
773
83.2k
    int code = 0;
774
83.2k
    pdf_array *array = (pdf_array *)obj;
775
83.2k
    pdf_obj *object = NULL;
776
83.2k
    byte *itembuf = NULL;
777
83.2k
    int itemsize;
778
83.2k
    pdfi_bufstream_t bufstream;
779
83.2k
    uint64_t index, arraysize;
780
781
83.2k
    code = pdfi_bufstream_init(ctx, &bufstream);
782
83.2k
    if (code < 0) goto exit;
783
784
83.2k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
785
83.2k
    if (code < 0) goto exit;
786
787
83.2k
    arraysize = pdfi_array_size(array);
788
666k
    for (index = 0; index < arraysize; index++) {
789
583k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
790
583k
        if (code < 0) goto exit;
791
792
583k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
793
583k
        if (code < 0) goto exit;
794
795
582k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
796
582k
        if (code < 0) goto exit;
797
798
582k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
799
582k
        itembuf = NULL;
800
582k
        itemsize = 0;
801
582k
        pdfi_countdown(object);
802
582k
        object = NULL;
803
804
        /* Put a space between elements unless last item */
805
582k
        if (index+1 != arraysize) {
806
499k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
807
499k
            if (code < 0) goto exit;
808
499k
        }
809
582k
    }
810
811
83.1k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
812
83.1k
    if (code < 0) goto exit;
813
814
    /* Now copy the results out into the string we can keep */
815
83.1k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
816
817
83.2k
 exit:
818
83.2k
    if (itembuf)
819
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
820
83.2k
    pdfi_bufstream_free(ctx, &bufstream);
821
83.2k
    pdfi_countdown(object);
822
83.2k
    return code;
823
83.1k
}
824
825
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
826
485
{
827
485
    int code = 0;
828
485
    byte *buf;
829
485
    pdf_stream *stream = (pdf_stream *)obj;
830
485
    int64_t bufsize = 0;
831
485
    pdf_indirect_ref *streamref = NULL;
832
833
    /* TODO: How to deal with stream dictionaries?
834
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
835
     * See 'pushpin' annotation in annotations-galore_II.ps
836
     *
837
     * This will just literally grab the stream data.
838
     */
839
485
    if (stream->is_marking) {
840
446
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
841
446
        if (code < 0) goto exit;
842
446
        *data = buf;
843
446
        *len = (int)bufsize;
844
446
    } else {
845
        /* Create an indirect ref for the stream */
846
39
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
847
39
        if (code < 0) goto exit;
848
39
        pdfi_countup(streamref);
849
39
        streamref->ref_object_num = stream->object_num;
850
39
        streamref->ref_generation_num = stream->generation_num;
851
39
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
852
39
    }
853
854
485
 exit:
855
485
    pdfi_countdown(streamref);
856
485
    return code;
857
485
}
858
859
/* This fetches without dereferencing.  If you want to see the references inline,
860
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
861
 */
862
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
863
46.6k
{
864
46.6k
    int code = 0;
865
46.6k
    pdf_dict *dict = (pdf_dict *)obj;
866
46.6k
    pdf_name *Key = NULL;
867
46.6k
    pdf_obj *Value = NULL;
868
46.6k
    byte *itembuf = NULL;
869
46.6k
    int itemsize;
870
46.6k
    pdfi_bufstream_t bufstream;
871
46.6k
    uint64_t index, dictsize;
872
46.6k
    uint64_t itemnum = 0;
873
874
46.6k
    code = pdfi_loop_detector_mark(ctx);
875
46.6k
    if (code < 0)
876
0
        return code;
877
878
46.6k
    code = pdfi_bufstream_init(ctx, &bufstream);
879
46.6k
    if (code < 0) goto exit;
880
881
46.6k
    dictsize = pdfi_dict_entries(dict);
882
    /* Handle empty dict specially */
883
46.6k
    if (dictsize == 0) {
884
5
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
885
5
        if (code < 0)
886
0
            goto exit;
887
5
        goto exit_copy;
888
5
    }
889
890
46.6k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
891
46.6k
    if (code < 0) goto exit;
892
893
    /* Note: We specifically fetch without dereferencing, so there will be no circular
894
     * references to handle here.
895
     */
896
    /* Wrong.... */
897
898
46.6k
    if (dict->object_num !=0 ) {
899
11.9k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
900
4
            code = gs_note_error(gs_error_circular_reference);
901
4
            goto exit;
902
4
        }
903
11.8k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
904
11.8k
        if (code < 0)
905
0
            goto exit;
906
11.8k
    }
907
908
    /* Get each (key,val) pair from dict and setup param for it */
909
46.6k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
910
56.1k
    while (code >= 0) {
911
56.1k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
912
56.1k
        if (code < 0) goto exit;
913
914
56.1k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
915
56.1k
        if (code < 0) goto exit;
916
917
56.1k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
918
56.1k
        itembuf = NULL;
919
56.1k
        itemsize = 0;
920
921
        /* Put a space between elements */
922
56.1k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
923
56.1k
        if (code < 0) goto exit;
924
925
        /* No dereference */
926
56.1k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
927
56.1k
        if (code < 0) goto exit;
928
56.1k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
929
56.1k
        if (code < 0) goto exit;
930
931
56.0k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
932
56.0k
        if (code < 0) goto exit;
933
934
56.0k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
935
56.0k
        itembuf = NULL;
936
56.0k
        itemsize = 0;
937
938
56.0k
        pdfi_countdown(Value);
939
56.0k
        Value = NULL;
940
56.0k
        pdfi_countdown(Key);
941
56.0k
        Key = NULL;
942
943
56.0k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
944
56.0k
        if (code == gs_error_undefined) {
945
46.4k
            code = 0;
946
46.4k
            break;
947
46.4k
        }
948
9.52k
        if (code < 0) goto exit;
949
950
        /* Put a space between elements */
951
9.52k
        if (++itemnum != dictsize) {
952
9.52k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
953
9.52k
            if (code < 0) goto exit;
954
9.52k
        }
955
9.52k
    }
956
46.4k
    if (code < 0) goto exit;
957
958
46.4k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
959
46.4k
    if (code < 0) goto exit;
960
961
46.5k
 exit_copy:
962
    /* Now copy the results out into the string we can keep */
963
46.5k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
964
965
46.6k
 exit:
966
46.6k
    if (itembuf)
967
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
968
46.6k
    pdfi_countdown(Key);
969
46.6k
    pdfi_countdown(Value);
970
46.6k
    pdfi_bufstream_free(ctx, &bufstream);
971
46.6k
    if (code < 0)
972
149
        (void)pdfi_loop_detector_cleartomark(ctx);
973
46.5k
    else
974
46.5k
        code = pdfi_loop_detector_cleartomark(ctx);
975
46.6k
    return code;
976
46.5k
}
977
978
#define PARAM1(A) # A,
979
#define PARAM2(A,B) A,
980
static const char pdf_token_strings[][10] = {
981
#include "pdf_tokens.h"
982
};
983
984
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
985
2
{
986
2
    int code = 0;
987
2
    const char *s = pdf_token_strings[(uintptr_t)obj];
988
2
    int size = (int)strlen(s) + 1;
989
2
    byte *buf;
990
991
2
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
992
2
    if (buf == NULL)
993
0
        return_error(gs_error_VMerror);
994
2
    memcpy(buf, s, size);
995
2
    *data = buf;
996
2
    *len = size;
997
2
    return code;
998
2
}
999
1000
obj_str_dispatch_t obj_str_dispatch[] = {
1001
    {PDF_NAME, pdfi_obj_name_str},
1002
    {PDF_ARRAY, pdfi_obj_array_str},
1003
    {PDF_REAL, pdfi_obj_real_str},
1004
    {PDF_INT, pdfi_obj_int_str},
1005
    {PDF_BOOL, pdfi_obj_bool_str},
1006
    {PDF_STRING, pdfi_obj_string_str},
1007
    {PDF_DICT, pdfi_obj_dict_str},
1008
    {PDF_STREAM, pdfi_obj_stream_str},
1009
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1010
    {PDF_NULL, pdfi_obj_null_str},
1011
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1012
    {0, NULL}
1013
};
1014
1015
/* Recursive function to build a string from an object
1016
 */
1017
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1018
1.40M
{
1019
1.40M
    obj_str_dispatch_t *dispatch_ptr;
1020
1.40M
    int code = 0;
1021
1.40M
    pdf_obj_type type;
1022
1023
1.40M
    *data = NULL;
1024
1.40M
    *len = 0;
1025
1.40M
    type = pdfi_type_of(obj);
1026
4.28M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1027
4.28M
        if (type == dispatch_ptr->type) {
1028
1.40M
            code = dispatch_ptr->func(ctx, obj, data, len);
1029
1.40M
            goto exit;
1030
1.40M
        }
1031
4.28M
    }
1032
    /* Not implemented, use default */
1033
64
    code = pdfi_obj_default_str(ctx, obj, data, len);
1034
1.40M
 exit:
1035
1.40M
    return code;
1036
64
}
1037
1038
/*********** END obj_to_string module ************/