Coverage Report

Created: 2022-10-31 07:00

/src/ghostpdl/pdf/pdf_obj.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2022 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
#include "ghostpdf.h"
17
#include "pdf_stack.h"
18
#include "pdf_array.h"
19
#include "pdf_dict.h"
20
#include "pdf_obj.h"
21
#include "pdf_cmap.h"
22
#include "pdf_font.h"
23
#include "pdf_deref.h" /* for replace_cache_entry() */
24
#include "pdf_mark.h"
25
#include "pdf_file.h" /* for pdfi_stream_to_buffer() */
26
#include "pdf_loop_detect.h"
27
28
/***********************************************************************************/
29
/* Functions to create the various kinds of 'PDF objects', Created objects have a  */
30
/* reference count of 0. Composite objects (dictionaries, arrays, strings) use the */
31
/* 'size' argument to create an object with the correct numbr of entries or of the */
32
/* requested size. Simple objects (integers etc) ignore this parameter.            */
33
/* Objects do not get their data assigned, that's up to the caller, but we do      */
34
/* set the length or size fields for composite objects.                             */
35
36
int pdfi_object_alloc(pdf_context *ctx, pdf_obj_type type, unsigned int size, pdf_obj **obj)
37
372M
{
38
372M
    int bytes = 0;
39
372M
    int code = 0;
40
41
372M
    switch(type) {
42
7.48M
        case PDF_ARRAY_MARK:
43
14.0M
        case PDF_DICT_MARK:
44
14.6M
        case PDF_PROC_MARK:
45
14.6M
            bytes = sizeof(pdf_obj);
46
14.6M
            break;
47
82.5M
        case PDF_INT:
48
190M
        case PDF_REAL:
49
190M
            bytes = sizeof(pdf_num);
50
190M
            break;
51
26.4M
        case PDF_STRING:
52
140M
        case PDF_NAME:
53
140M
            bytes = sizeof(pdf_string) + size - PDF_NAME_DECLARED_LENGTH;
54
140M
            break;
55
20.8k
        case PDF_BUFFER:
56
20.8k
            bytes = sizeof(pdf_buffer);
57
20.8k
            break;
58
7.39M
        case PDF_ARRAY:
59
7.39M
            bytes = sizeof(pdf_array);
60
7.39M
            break;
61
6.76M
        case PDF_DICT:
62
6.76M
            bytes = sizeof(pdf_dict);
63
6.76M
            break;
64
6.46M
        case PDF_INDIRECT:
65
6.46M
            bytes = sizeof(pdf_indirect_ref);
66
6.46M
            break;
67
6.29M
        case PDF_KEYWORD:
68
6.29M
            bytes = sizeof(pdf_keyword) + size - PDF_NAME_DECLARED_LENGTH;
69
6.29M
            break;
70
        /* The following aren't PDF object types, but are objects we either want to
71
         * reference count, or store on the stack.
72
         */
73
0
        case PDF_XREF_TABLE:
74
0
            bytes = sizeof(xref_table_t);
75
0
            break;
76
447k
        case PDF_STREAM:
77
447k
            bytes = sizeof(pdf_stream);
78
447k
            break;
79
0
        case PDF_NULL:
80
0
        case PDF_BOOL:
81
0
        default:
82
0
            code = gs_note_error(gs_error_typecheck);
83
0
            goto error_out;
84
372M
    }
85
372M
    *obj = (pdf_obj *)gs_alloc_bytes(ctx->memory, bytes, "pdfi_object_alloc");
86
372M
    if (*obj == NULL) {
87
0
        code = gs_note_error(gs_error_VMerror);
88
0
        goto error_out;
89
0
    }
90
91
372M
    memset(*obj, 0x00, bytes);
92
372M
    (*obj)->ctx = ctx;
93
372M
    (*obj)->type = type;
94
95
372M
    switch(type) {
96
/*      PDF_NULL and PDF_BOOL are now handled as special (not allocated) data types
97
        and we will return an error in the switch above if we get a call to allocate
98
        one of these. Having the cases isn't harmful but Coverity complains of dead
99
        code, so commenting these out to silence Coverity while preserving the old
100
        semantics to indicate what's happening.
101
        case PDF_NULL:
102
        case PDF_BOOL: */
103
104
82.5M
        case PDF_INT:
105
190M
        case PDF_REAL:
106
196M
        case PDF_INDIRECT:
107
204M
        case PDF_ARRAY_MARK:
108
210M
        case PDF_DICT_MARK:
109
211M
        case PDF_PROC_MARK:
110
211M
            break;
111
6.29M
        case PDF_KEYWORD:
112
32.7M
        case PDF_STRING:
113
146M
        case PDF_NAME:
114
146M
            ((pdf_string *)*obj)->length = size;
115
146M
            break;
116
20.8k
        case PDF_BUFFER:
117
20.8k
            {
118
20.8k
                pdf_buffer *b = (pdf_buffer *)*obj;
119
               /* NOTE: size can be 0 if the caller wants to allocate the data area itself
120
                */
121
20.8k
                if (size > 0) {
122
0
                    b->data = gs_alloc_bytes(ctx->memory, size, "pdfi_object_alloc");
123
0
                    if (b->data == NULL) {
124
0
                        code = gs_note_error(gs_error_VMerror);
125
0
                        goto error_out;
126
0
                    }
127
0
                }
128
20.8k
                else {
129
20.8k
                    b->data = NULL;
130
20.8k
                }
131
20.8k
                b->length = size;
132
20.8k
            }
133
0
            break;
134
7.39M
        case PDF_ARRAY:
135
7.39M
            {
136
7.39M
                pdf_obj **values = NULL;
137
138
7.39M
                ((pdf_array *)*obj)->size = size;
139
7.39M
                if (size > 0) {
140
7.16M
                    values = (pdf_obj **)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_obj *), "pdfi_object_alloc");
141
7.16M
                    if (values == NULL) {
142
0
                        code = gs_note_error(gs_error_VMerror);
143
0
                        goto error_out;
144
0
                    }
145
7.16M
                    ((pdf_array *)*obj)->values = values;
146
7.16M
                    memset(((pdf_array *)*obj)->values, 0x00, size * sizeof(pdf_obj *));
147
7.16M
                }
148
7.39M
            }
149
7.39M
            break;
150
7.39M
        case PDF_DICT:
151
6.76M
            {
152
6.76M
                pdf_dict_entry *entries = NULL;
153
154
6.76M
                ((pdf_dict *)*obj)->size = size;
155
6.76M
                if (size > 0) {
156
6.54M
                    entries = (pdf_dict_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(pdf_dict_entry), "pdfi_object_alloc");
157
6.54M
                    if (entries == NULL) {
158
0
                        code = gs_note_error(gs_error_VMerror);
159
0
                        goto error_out;
160
0
                    }
161
6.54M
                    ((pdf_dict *)*obj)->list = entries;
162
6.54M
                    memset(((pdf_dict *)*obj)->list, 0x00, size * sizeof(pdf_dict_entry));
163
6.54M
                }
164
6.76M
            }
165
6.76M
            break;
166
        /* The following aren't PDF object types, but are objects we either want to
167
         * reference count, or store on the stack.
168
         */
169
6.76M
        case PDF_XREF_TABLE:
170
0
            break;
171
447k
        default:
172
447k
            break;
173
372M
    }
174
#if REFCNT_DEBUG
175
    (*obj)->UID = ctx->ref_UID++;
176
    dmprintf2(ctx->memory, "Allocated object of type %c with UID %"PRIi64"\n", (*obj)->type, (*obj)->UID);
177
#endif
178
372M
    return 0;
179
0
error_out:
180
0
    gs_free_object(ctx->memory, *obj, "pdfi_object_alloc");
181
0
    *obj = NULL;
182
0
    return code;
183
372M
}
184
185
/* Create a PDF number object from a numeric value. Attempts to create
186
 * either a REAL or INT as appropriate. As usual for the alloc functions
187
 * this returns an object with a reference count of 0.
188
 */
189
int pdfi_num_alloc(pdf_context *ctx, double d, pdf_num **num)
190
18.2k
{
191
18.2k
    uint64_t test = 0;
192
18.2k
    int code = 0;
193
194
18.2k
    test = (uint64_t)floor(d);
195
18.2k
    if (d == test) {
196
17.5k
        code = pdfi_object_alloc(ctx, PDF_INT, 0, (pdf_obj **)num);
197
17.5k
        if (code < 0)
198
0
            return code;
199
17.5k
        (*num)->value.i = test;
200
17.5k
    }
201
669
    else {
202
669
        code = pdfi_object_alloc(ctx, PDF_REAL, 0, (pdf_obj **)num);
203
669
        if (code < 0)
204
0
            return code;
205
669
        (*num)->value.d = d;
206
669
    }
207
208
18.2k
    return 0;
209
18.2k
}
210
211
/***********************************************************************************/
212
/* Functions to free the various kinds of 'PDF objects'.                           */
213
/* All objects are reference counted, newly allocated objects, as noted above have */
214
/* a reference count of 0. Pushing an object onto the stack increments             */
215
/* its reference count, popping it from the stack decrements its reference count.  */
216
/* When an object's reference count is decremented to 0, pdfi_countdown calls      */
217
/* pdfi_free_object() to free it.                                                  */
218
219
static void pdfi_free_namestring(pdf_obj *o)
220
140M
{
221
    /* Currently names and strings are the same, so a single cast is OK */
222
140M
    pdf_name *n = (pdf_name *)o;
223
224
140M
    gs_free_object(OBJ_MEMORY(n), n, "pdf interpreter free name or string");
225
140M
}
226
227
static void pdfi_free_keyword(pdf_obj *o)
228
6.29M
{
229
6.29M
    pdf_keyword *k = (pdf_keyword *)o;
230
231
6.29M
    gs_free_object(OBJ_MEMORY(k), k, "pdf interpreter free keyword");
232
6.29M
}
233
234
static void pdfi_free_xref_table(pdf_obj *o)
235
33.6k
{
236
33.6k
    xref_table_t *xref = (xref_table_t *)o;
237
238
33.6k
    gs_free_object(OBJ_MEMORY(xref), xref->xref, "pdfi_free_xref_table");
239
33.6k
    gs_free_object(OBJ_MEMORY(xref), xref, "pdfi_free_xref_table");
240
33.6k
}
241
242
static void pdfi_free_stream(pdf_obj *o)
243
447k
{
244
447k
    pdf_stream *stream = (pdf_stream *)o;
245
246
447k
    pdfi_countdown(stream->stream_dict);
247
447k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_stream");
248
447k
}
249
250
static void pdfi_free_buffer(pdf_obj *o)
251
20.8k
{
252
20.8k
    pdf_buffer *b = (pdf_buffer *)o;
253
254
20.8k
    gs_free_object(OBJ_MEMORY(b), b->data, "pdfi_free_buffer(data)");
255
20.8k
    gs_free_object(OBJ_MEMORY(o), o, "pdfi_free_buffer");
256
20.8k
}
257
258
void pdfi_free_object(pdf_obj *o)
259
373M
{
260
373M
    if (o == NULL)
261
270k
        return;
262
372M
    if ((intptr_t)o < (intptr_t)TOKEN__LAST_KEY)
263
0
        return;
264
372M
    switch(o->type) {
265
7.48M
        case PDF_ARRAY_MARK:
266
14.0M
        case PDF_DICT_MARK:
267
14.6M
        case PDF_PROC_MARK:
268
97.2M
        case PDF_INT:
269
204M
        case PDF_REAL:
270
211M
        case PDF_INDIRECT:
271
211M
            gs_free_object(OBJ_MEMORY(o), o, "pdf interpreter object refcount to 0");
272
211M
            break;
273
26.4M
        case PDF_STRING:
274
140M
        case PDF_NAME:
275
140M
            pdfi_free_namestring(o);
276
140M
            break;
277
20.8k
        case PDF_BUFFER:
278
20.8k
            pdfi_free_buffer(o);
279
20.8k
            break;
280
7.39M
        case PDF_ARRAY:
281
7.39M
            pdfi_free_array(o);
282
7.39M
            break;
283
6.74M
        case PDF_DICT:
284
6.74M
            pdfi_free_dict(o);
285
6.74M
            break;
286
447k
        case PDF_STREAM:
287
447k
            pdfi_free_stream(o);
288
447k
            break;
289
6.29M
        case PDF_KEYWORD:
290
6.29M
            pdfi_free_keyword(o);
291
6.29M
            break;
292
33.6k
        case PDF_XREF_TABLE:
293
33.6k
            pdfi_free_xref_table(o);
294
33.6k
            break;
295
217k
        case PDF_FONT:
296
217k
            pdfi_free_font(o);
297
217k
            break;
298
16.8k
        case PDF_CMAP:
299
16.8k
            pdfi_free_cmap(o);
300
16.8k
            break;
301
0
        case PDF_BOOL:
302
0
        case PDF_NULL:
303
0
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free non-allocated object type !!!\n");
304
0
            break;
305
22
        default:
306
22
            dbgmprintf(OBJ_MEMORY(o), "!!! Attempting to free unknown object type !!!\n");
307
22
            break;
308
372M
    }
309
372M
}
310
311
312
/* Convert a pdf_dict to a pdf_stream.
313
 * do_convert -- convert the stream to use same object num as dict
314
 *               (This assumes the dict has not been cached.)
315
 * The stream will come with 1 refcnt, dict refcnt will be incremented by 1.
316
 */
317
int pdfi_obj_dict_to_stream(pdf_context *ctx, pdf_dict *dict, pdf_stream **stream, bool do_convert)
318
447k
{
319
447k
    int code = 0;
320
447k
    pdf_stream *new_stream = NULL;
321
322
447k
    if (pdfi_type_of(dict) != PDF_DICT)
323
0
        return_error(gs_error_typecheck);
324
325
447k
    code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&new_stream);
326
447k
    if (code < 0)
327
0
        goto error_exit;
328
329
447k
    new_stream->ctx = ctx;
330
447k
    pdfi_countup(new_stream);
331
332
447k
    new_stream->stream_dict = dict;
333
447k
    pdfi_countup(dict);
334
335
    /* this replaces the dict with the stream.
336
     * assumes it's not cached
337
     */
338
447k
    if (do_convert) {
339
431k
        new_stream->object_num = dict->object_num;
340
431k
        new_stream->generation_num = dict->generation_num;
341
431k
        dict->object_num = 0;
342
431k
        dict->generation_num = 0;
343
431k
    }
344
447k
    *stream = new_stream;
345
447k
    return 0;
346
347
0
 error_exit:
348
0
    pdfi_countdown(new_stream);
349
0
    return code;
350
447k
}
351
352
/* Create a pdf_string from a c char * */
353
int pdfi_obj_charstr_to_string(pdf_context *ctx, const char *charstr, pdf_string **string)
354
73
{
355
73
    int code;
356
73
    int length = strlen(charstr);
357
73
    pdf_string *newstr = NULL;
358
359
73
    *string = NULL;
360
361
73
    code = pdfi_object_alloc(ctx, PDF_STRING, length, (pdf_obj **)&newstr);
362
73
    if (code < 0) goto exit;
363
364
73
    memcpy(newstr->data, (byte *)charstr, length);
365
366
73
    *string = newstr;
367
73
    pdfi_countup(newstr);
368
73
 exit:
369
73
    return code;
370
73
}
371
372
/* Create a pdf_name from a c char * */
373
int pdfi_obj_charstr_to_name(pdf_context *ctx, const char *charstr, pdf_name **name)
374
513
{
375
513
    int code;
376
513
    int length = strlen(charstr);
377
513
    pdf_name *newname = NULL;
378
379
513
    *name = NULL;
380
381
513
    code = pdfi_object_alloc(ctx, PDF_NAME, length, (pdf_obj **)&newname);
382
513
    if (code < 0) goto exit;
383
384
513
    memcpy(newname->data, (byte *)charstr, length);
385
386
513
    *name = newname;
387
513
    pdfi_countup(newname);
388
513
 exit:
389
513
    return code;
390
513
}
391
392
/************ bufstream module BEGIN **************/
393
145k
#define INIT_BUF_SIZE 256
394
395
typedef struct {
396
    int len;  /* Length of buffer */
397
    int cur;  /* Current position */
398
    byte *data;
399
} pdfi_bufstream_t;
400
401
402
static int pdfi_bufstream_init(pdf_context *ctx, pdfi_bufstream_t *stream)
403
145k
{
404
145k
    stream->len = INIT_BUF_SIZE;
405
145k
    stream->cur = 0;
406
145k
    stream->data = gs_alloc_bytes(ctx->memory, stream->len, "pdfi_bufstream_init(data)");
407
408
145k
    if (!stream->data)
409
0
        return_error(gs_error_VMerror);
410
145k
    return 0;
411
145k
}
412
413
static int pdfi_bufstream_free(pdf_context *ctx, pdfi_bufstream_t *stream)
414
145k
{
415
145k
    if (stream->data)
416
0
        gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_free(data)");
417
145k
    stream->len = 0;
418
145k
    stream->cur = 0;
419
145k
    stream->data = NULL;
420
145k
    return 0;
421
145k
}
422
423
/* Grab a copy of the stream's buffer */
424
static int pdfi_bufstream_copy(pdf_context *ctx, pdfi_bufstream_t *stream, byte **buf, int *len)
425
145k
{
426
145k
    *buf = stream->data;
427
145k
    *len = stream->cur;
428
145k
    stream->len = 0;
429
145k
    stream->cur = 0;
430
145k
    stream->data = NULL;
431
145k
    return 0;
432
145k
}
433
434
/* Increase the size of the buffer by doubling and added the known needed amount */
435
static int pdfi_bufstream_increase(pdf_context *ctx, pdfi_bufstream_t *stream, uint64_t needed)
436
8.66k
{
437
8.66k
    byte *data = NULL;
438
8.66k
    uint64_t newsize;
439
440
8.66k
    newsize = stream->len * 2 + needed;
441
8.66k
    data = gs_alloc_bytes(ctx->memory, newsize, "pdfi_bufstream_increase(data)");
442
8.66k
    if (!data)
443
0
        return_error(gs_error_VMerror);
444
445
8.66k
    memcpy(data, stream->data, stream->len);
446
8.66k
    gs_free_object(ctx->memory, stream->data, "pdfi_bufstream_increase(data)");
447
8.66k
    stream->data = data;
448
8.66k
    stream->len = newsize;
449
450
8.66k
    return 0;
451
8.66k
}
452
453
static int pdfi_bufstream_write(pdf_context *ctx, pdfi_bufstream_t *stream, byte *data, uint64_t len)
454
2.06M
{
455
2.06M
    int code = 0;
456
457
2.06M
    if (stream->cur + len > stream->len) {
458
8.66k
        code = pdfi_bufstream_increase(ctx, stream, len);
459
8.66k
        if (code < 0)
460
0
            goto exit;
461
8.66k
    }
462
2.06M
    memcpy(stream->data + stream->cur, data, len);
463
2.06M
    stream->cur += len;
464
465
2.06M
 exit:
466
2.06M
    return code;
467
2.06M
}
468
469
/************ bufstream module END **************/
470
471
472
/* Create a c-string to use as object label
473
 * Uses the object_num to make it unique
474
 * (don't call this for objects with object_num=0, though I am not going to check that here)
475
 */
476
int pdfi_obj_get_label(pdf_context *ctx, pdf_obj *obj, char **label)
477
571
{
478
571
    int code = 0;
479
571
    int length;
480
571
    const char *template = "{Obj%dG%d}"; /* The '{' and '}' are special to pdfmark/pdfwrite driver */
481
571
    char *string = NULL;
482
571
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
483
484
571
    *label = NULL;
485
571
    length = strlen(template)+20;
486
487
571
    string = (char *)gs_alloc_bytes(ctx->memory, length, "pdf_obj_get_label(label)");
488
571
    if (string == NULL) {
489
0
        code = gs_note_error(gs_error_VMerror);
490
0
        goto exit;
491
0
    }
492
493
571
    if (pdfi_type_of(obj) == PDF_INDIRECT)
494
571
        snprintf(string, length, template, ref->ref_object_num, ref->ref_generation_num);
495
0
    else
496
0
        snprintf(string, length, template, obj->object_num, obj->generation_num);
497
498
571
    *label = string;
499
571
 exit:
500
571
    return code;
501
571
}
502
503
/*********** BEGIN obj_to_string module ************/
504
505
typedef int (*str_func)(pdf_context *ctx, pdf_obj *obj, byte **data, int *len);
506
507
/* Dispatch to get string representation of an object */
508
typedef struct {
509
    pdf_obj_type type;
510
    str_func func;
511
} obj_str_dispatch_t;
512
513
static int pdfi_obj_default_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
514
8
{
515
8
    int code = 0;
516
8
    int size = 12;
517
8
    byte *buf;
518
519
8
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_default_str(data)");
520
8
    if (buf == NULL)
521
0
        return_error(gs_error_VMerror);
522
8
    memcpy(buf, "/placeholder", size);
523
8
    *data = buf;
524
8
    *len = size;
525
8
    return code;
526
8
}
527
528
static int pdfi_obj_name_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
529
614k
{
530
614k
    int code = 0;
531
614k
    pdf_name *name = (pdf_name *)obj;
532
614k
    int size = name->length + 1;
533
614k
    byte *buf;
534
535
614k
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
536
614k
    if (buf == NULL)
537
0
        return_error(gs_error_VMerror);
538
614k
    buf[0] = '/';
539
614k
    memcpy(buf+1, name->data, name->length);
540
614k
    *data = buf;
541
614k
    *len = size;
542
614k
    return code;
543
614k
}
544
545
static int pdfi_obj_real_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
546
225k
{
547
225k
    int code = 0;
548
225k
    int size = 15;
549
225k
    pdf_num *number = (pdf_num *)obj;
550
225k
    char *buf;
551
552
225k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_real_str(data)");
553
225k
    if (buf == NULL)
554
0
        return_error(gs_error_VMerror);
555
225k
    snprintf(buf, size, "%.4f", number->value.d);
556
225k
    *data = (byte *)buf;
557
225k
    *len = strlen(buf);
558
225k
    return code;
559
225k
}
560
561
static int pdfi_obj_int_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
562
725k
{
563
725k
    int code = 0;
564
725k
    int size = 15;
565
725k
    pdf_num *number = (pdf_num *)obj;
566
725k
    char *buf;
567
568
725k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_int_str(data)");
569
725k
    if (buf == NULL)
570
0
        return_error(gs_error_VMerror);
571
725k
    snprintf(buf, size, "%"PRId64"", number->value.i);
572
725k
    *data = (byte *)buf;
573
725k
    *len = strlen(buf);
574
725k
    return code;
575
725k
}
576
577
static int pdfi_obj_getrefstr(pdf_context *ctx, uint64_t object_num, uint32_t generation, byte **data, int *len)
578
19.1k
{
579
19.1k
    int size = 100;
580
19.1k
    char *buf;
581
582
19.1k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_getrefstr(data)");
583
19.1k
    if (buf == NULL)
584
0
        return_error(gs_error_VMerror);
585
19.1k
    snprintf(buf, size, "%"PRId64" %d R", object_num, generation);
586
19.1k
    *data = (byte *)buf;
587
19.1k
    *len = strlen(buf);
588
19.1k
    return 0;
589
19.1k
}
590
591
static int pdfi_obj_indirect_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
592
19.7k
{
593
19.7k
    int code = 0;
594
19.7k
    pdf_indirect_ref *ref = (pdf_indirect_ref *)obj;
595
19.7k
    char *buf;
596
19.7k
    pdf_obj *object = NULL;
597
19.7k
    bool use_label = true;
598
599
19.7k
    if (ref->is_highlevelform) {
600
19.1k
        code = pdfi_obj_getrefstr(ctx, ref->highlevel_object_num, 0, data, len);
601
19.1k
        ref->is_highlevelform = false;
602
19.1k
    } else {
603
602
        if (!ref->is_marking) {
604
210
            code = pdfi_deref_loop_detect(ctx, ref->ref_object_num, ref->ref_generation_num, &object);
605
210
            if (code == gs_error_undefined) {
606
                /* Do something sensible for undefined reference (this would be a broken file) */
607
                /* TODO: Flag an error? */
608
0
                code = pdfi_obj_getrefstr(ctx, ref->ref_object_num, ref->ref_generation_num, data, len);
609
0
                goto exit;
610
0
            }
611
210
            if (code < 0 && code != gs_error_circular_reference)
612
0
                goto exit;
613
210
            if (code == 0) {
614
210
                if (pdfi_type_of(object) == PDF_STREAM) {
615
25
                    code = pdfi_pdfmark_stream(ctx, (pdf_stream *)object);
616
25
                    if (code < 0) goto exit;
617
185
                } else if (pdfi_type_of(object) == PDF_DICT) {
618
154
                    code = pdfi_pdfmark_dict(ctx, (pdf_dict *)object);
619
154
                    if (code < 0) goto exit;
620
154
                } else {
621
31
                    code = pdfi_obj_to_string(ctx, object, data, len);
622
31
                    if (code < 0) goto exit;
623
31
                    use_label = false;
624
31
                }
625
210
            }
626
210
        }
627
602
        if (use_label) {
628
571
            code = pdfi_obj_get_label(ctx, (pdf_obj *)ref, &buf);
629
571
            if (code < 0) goto exit;
630
571
            *data = (byte *)buf;
631
571
            *len = strlen(buf);
632
571
        }
633
602
    }
634
635
19.7k
 exit:
636
19.7k
    pdfi_countdown(object);
637
19.7k
    return code;
638
19.7k
}
639
640
static int pdfi_obj_bool_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
641
29.3k
{
642
29.3k
    int code = 0;
643
29.3k
    int size = 5;
644
29.3k
    char *buf;
645
646
29.3k
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_bool_str(data)");
647
29.3k
    if (buf == NULL)
648
0
        return_error(gs_error_VMerror);
649
29.3k
    if (obj == PDF_TRUE_OBJ) {
650
34
        memcpy(buf, (byte *)"true", 4);
651
34
        *len = 4;
652
29.2k
    } else {
653
29.2k
        memcpy(buf, (byte *)"false", 5);
654
29.2k
        *len = 5;
655
29.2k
    }
656
29.3k
    *data = (byte *)buf;
657
29.3k
    return code;
658
29.3k
}
659
660
static int pdfi_obj_null_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
661
70
{
662
70
    int code = 0;
663
70
    int size = 4;
664
70
    char *buf;
665
666
70
    buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_null_str(data)");
667
70
    if (buf == NULL)
668
0
        return_error(gs_error_VMerror);
669
70
    memcpy(buf, (byte *)"null", 4);
670
70
    *len = 4;
671
70
    *data = (byte *)buf;
672
70
    return code;
673
70
}
674
675
static int pdfi_obj_string_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
676
86.1k
{
677
86.1k
    int code = 0;
678
86.1k
    pdf_string *string = (pdf_string *)obj;
679
86.1k
    int size;
680
86.1k
    int string_len;
681
86.1k
    char *buf;
682
86.1k
    char *bufptr;
683
86.1k
    bool non_ascii = false;
684
86.1k
    int num_esc = 0;
685
86.1k
    int i;
686
86.1k
    byte *ptr;
687
688
86.1k
    string_len = string->length;
689
    /* See if there are any non-ascii chars */
690
2.70M
    for (i=0,ptr=string->data;i<string_len;i++,ptr++) {
691
        /* TODO: I wanted to convert non-ascii to hex strings, but there
692
         * are cases (such as /Author field) where the non-ascii is not really binary
693
         * and then pdfwrite barfs on it later.
694
         * see gdevpdfu.c/pdf_put_encoded_hex_string(), which is not implemented
695
         * and causes crashes...
696
         * See sample: tests_private/pdf/sumatra/1532_-_Freetype_crash.pdf
697
         *
698
         * For now, just disabling the generation of hex strings, which will match
699
         * what gs does.  Seems lame.
700
         */
701
#if 0
702
        if (*ptr > 127) {
703
            non_ascii = true;
704
            break;
705
        }
706
#endif
707
        /* TODO: I was going to just turn special chars into hexstrings, but it turns out
708
         * that the pdfwrite driver expects to be able to parse URI strings, and these
709
         * can have special characters.  So I will handle the minimum that seems needed for that.
710
         */
711
2.62M
        switch (*ptr) {
712
15
        case 0x0a:
713
748
        case 0x0d:
714
1.77k
        case '(':
715
2.80k
        case ')':
716
2.80k
        case '\\':
717
2.80k
            num_esc ++;
718
2.80k
            break;
719
2.62M
        default:
720
2.62M
            break;
721
2.62M
        }
722
2.62M
    }
723
724
86.1k
    if (non_ascii) {
725
0
        size = string->length * 2 + 2;
726
0
        buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_string_str(data)");
727
0
        if (buf == NULL)
728
0
            return_error(gs_error_VMerror);
729
0
        buf[0] = '<';
730
0
        for (i=0,ptr=string->data;i<string_len;i++,ptr++) {
731
0
            snprintf(buf+2*i+1, 3, "%02X", *ptr);
732
0
        }
733
0
        buf[size-1] = '>';
734
86.1k
    } else {
735
86.1k
        size = string->length + 2 + num_esc;
736
86.1k
        buf = (char *)gs_alloc_bytes(ctx->memory, size, "pdfi_obj_string_str(data)");
737
86.1k
        if (buf == NULL)
738
0
            return_error(gs_error_VMerror);
739
86.1k
        buf[0] = '(';
740
86.1k
        bufptr = buf + 1;
741
2.70M
        for (i=0,ptr=string->data;i<string_len;i++) {
742
2.62M
            switch (*ptr) {
743
733
            case 0x0d:
744
733
                *bufptr++ = '\\';
745
733
                *bufptr++ = 'r';
746
733
                ptr++;
747
733
                continue;
748
15
            case 0x0a:
749
15
                *bufptr++ = '\\';
750
15
                *bufptr++ = 'n';
751
15
                ptr++;
752
15
                continue;
753
1.02k
            case '(':
754
2.05k
            case ')':
755
2.05k
            case '\\':
756
2.05k
                *bufptr++ = '\\';
757
2.05k
                break;
758
2.62M
            default:
759
2.62M
                break;
760
2.62M
            }
761
2.62M
            *bufptr++ = *ptr++;
762
2.62M
        }
763
86.1k
        buf[size-1] = ')';
764
86.1k
    }
765
766
767
86.1k
    *len = size;
768
86.1k
    *data = (byte *)buf;
769
86.1k
    return code;
770
86.1k
}
771
772
static int pdfi_obj_array_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
773
115k
{
774
115k
    int code = 0;
775
115k
    pdf_array *array = (pdf_array *)obj;
776
115k
    pdf_obj *object = NULL;
777
115k
    byte *itembuf = NULL;
778
115k
    int itemsize;
779
115k
    pdfi_bufstream_t bufstream;
780
115k
    uint64_t index, arraysize;
781
782
115k
    code = pdfi_bufstream_init(ctx, &bufstream);
783
115k
    if (code < 0) goto exit;
784
785
115k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"[", 1);
786
115k
    if (code < 0) goto exit;
787
788
115k
    arraysize = pdfi_array_size(array);
789
1.01M
    for (index = 0; index < arraysize; index++) {
790
895k
        code = pdfi_array_get_no_deref(ctx, array, index, &object);
791
895k
        if (code < 0) goto exit;
792
793
895k
        code = pdfi_obj_to_string(ctx, object, &itembuf, &itemsize);
794
895k
        if (code < 0) goto exit;
795
796
895k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
797
895k
        if (code < 0) goto exit;
798
799
895k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
800
895k
        itembuf = NULL;
801
895k
        itemsize = 0;
802
895k
        pdfi_countdown(object);
803
895k
        object = NULL;
804
805
        /* Put a space between elements unless last item */
806
895k
        if (index+1 != arraysize) {
807
780k
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
808
780k
            if (code < 0) goto exit;
809
780k
        }
810
895k
    }
811
812
115k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"]", 1);
813
115k
    if (code < 0) goto exit;
814
815
    /* Now copy the results out into the string we can keep */
816
115k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
817
818
115k
 exit:
819
115k
    if (itembuf)
820
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_array_str(itembuf)");
821
115k
    pdfi_bufstream_free(ctx, &bufstream);
822
115k
    pdfi_countdown(object);
823
115k
    return code;
824
115k
}
825
826
static int pdfi_obj_stream_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
827
33
{
828
33
    int code = 0;
829
33
    byte *buf;
830
33
    pdf_stream *stream = (pdf_stream *)obj;
831
33
    int64_t bufsize = 0;
832
33
    pdf_indirect_ref *streamref = NULL;
833
834
    /* TODO: How to deal with stream dictionaries?
835
     * /AP is one example that has special handling (up in pdf_annot.c), but there are others.
836
     * See 'pushpin' annotation in annotations-galore_II.ps
837
     *
838
     * This will just literally grab the stream data.
839
     */
840
33
    if (stream->is_marking) {
841
25
        code = pdfi_stream_to_buffer(ctx, stream, &buf, &bufsize);
842
25
        if (code < 0) goto exit;
843
25
        *data = buf;
844
25
        *len = (int)bufsize;
845
25
    } else {
846
        /* Create an indirect ref for the stream */
847
8
        code = pdfi_object_alloc(ctx, PDF_INDIRECT, 0, (pdf_obj **)&streamref);
848
8
        if (code < 0) goto exit;
849
8
        pdfi_countup(streamref);
850
8
        streamref->ref_object_num = stream->object_num;
851
8
        streamref->ref_generation_num = stream->generation_num;
852
8
        code = pdfi_obj_indirect_str(ctx, (pdf_obj *)streamref, data, len);
853
8
    }
854
855
33
 exit:
856
33
    pdfi_countdown(streamref);
857
33
    return code;
858
33
}
859
860
/* This fetches without dereferencing.  If you want to see the references inline,
861
 * then you need to pre-resolve them.  See pdfi_resolve_indirect().
862
 */
863
static int pdfi_obj_dict_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
864
30.5k
{
865
30.5k
    int code = 0;
866
30.5k
    pdf_dict *dict = (pdf_dict *)obj;
867
30.5k
    pdf_name *Key = NULL;
868
30.5k
    pdf_obj *Value = NULL;
869
30.5k
    byte *itembuf = NULL;
870
30.5k
    int itemsize;
871
30.5k
    pdfi_bufstream_t bufstream;
872
30.5k
    uint64_t index, dictsize;
873
30.5k
    uint64_t itemnum = 0;
874
875
30.5k
    code = pdfi_loop_detector_mark(ctx);
876
30.5k
    if (code < 0)
877
0
        return code;
878
879
30.5k
    code = pdfi_bufstream_init(ctx, &bufstream);
880
30.5k
    if (code < 0) goto exit;
881
882
30.5k
    dictsize = pdfi_dict_entries(dict);
883
    /* Handle empty dict specially */
884
30.5k
    if (dictsize == 0) {
885
0
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<< >>", 5);
886
0
        if (code < 0)
887
0
            goto exit;
888
0
        goto exit_copy;
889
0
    }
890
891
30.5k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"<<\n", 3);
892
30.5k
    if (code < 0) goto exit;
893
894
    /* Note: We specifically fetch without dereferencing, so there will be no circular
895
     * references to handle here.
896
     */
897
    /* Wrong.... */
898
899
30.5k
    if (dict->object_num !=0 ) {
900
18.8k
        if (pdfi_loop_detector_check_object(ctx, dict->object_num)) {
901
0
            code = gs_note_error(gs_error_circular_reference);
902
0
            goto exit;
903
0
        }
904
18.8k
        code = pdfi_loop_detector_add_object(ctx, dict->object_num);
905
18.8k
        if (code < 0)
906
0
            goto exit;
907
18.8k
    }
908
909
    /* Get each (key,val) pair from dict and setup param for it */
910
30.5k
    code = pdfi_dict_key_first(ctx, dict, (pdf_obj **)&Key, &index);
911
31.3k
    while (code >= 0) {
912
31.3k
        code = pdfi_obj_to_string(ctx, (pdf_obj *)Key, &itembuf, &itemsize);
913
31.3k
        if (code < 0) goto exit;
914
915
31.3k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
916
31.3k
        if (code < 0) goto exit;
917
918
31.3k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
919
31.3k
        itembuf = NULL;
920
31.3k
        itemsize = 0;
921
922
        /* Put a space between elements */
923
31.3k
        code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
924
31.3k
        if (code < 0) goto exit;
925
926
        /* No dereference */
927
31.3k
        code = pdfi_dict_get_no_deref(ctx, dict, (const pdf_name *)Key, &Value);
928
31.3k
        if (code < 0) goto exit;
929
31.3k
        code = pdfi_obj_to_string(ctx, Value, &itembuf, &itemsize);
930
31.3k
        if (code < 0) goto exit;
931
932
31.3k
        code = pdfi_bufstream_write(ctx, &bufstream, itembuf, itemsize);
933
31.3k
        if (code < 0) goto exit;
934
935
31.3k
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
936
31.3k
        itembuf = NULL;
937
31.3k
        itemsize = 0;
938
939
31.3k
        pdfi_countdown(Value);
940
31.3k
        Value = NULL;
941
31.3k
        pdfi_countdown(Key);
942
31.3k
        Key = NULL;
943
944
31.3k
        code = pdfi_dict_key_next(ctx, dict, (pdf_obj **)&Key, &index);
945
31.3k
        if (code == gs_error_undefined) {
946
30.5k
            code = 0;
947
30.5k
            break;
948
30.5k
        }
949
847
        if (code < 0) goto exit;
950
951
        /* Put a space between elements */
952
847
        if (++itemnum != dictsize) {
953
847
            code = pdfi_bufstream_write(ctx, &bufstream, (byte *)" ", 1);
954
847
            if (code < 0) goto exit;
955
847
        }
956
847
    }
957
30.5k
    if (code < 0) goto exit;
958
959
30.5k
    code = pdfi_bufstream_write(ctx, &bufstream, (byte *)"\n>>", 3);
960
30.5k
    if (code < 0) goto exit;
961
962
30.5k
 exit_copy:
963
    /* Now copy the results out into the string we can keep */
964
30.5k
    code = pdfi_bufstream_copy(ctx, &bufstream, data, len);
965
966
30.5k
 exit:
967
30.5k
    if (itembuf)
968
0
        gs_free_object(ctx->memory, itembuf, "pdfi_obj_dict_str(itembuf)");
969
30.5k
    pdfi_countdown(Key);
970
30.5k
    pdfi_countdown(Value);
971
30.5k
    pdfi_bufstream_free(ctx, &bufstream);
972
30.5k
    if (code < 0)
973
0
        (void)pdfi_loop_detector_cleartomark(ctx);
974
30.5k
    else
975
30.5k
        code = pdfi_loop_detector_cleartomark(ctx);
976
30.5k
    return code;
977
30.5k
}
978
979
#define PARAM1(A) # A,
980
#define PARAM2(A,B) A,
981
static const char pdf_token_strings[][10] = {
982
#include "pdf_tokens.h"
983
};
984
985
static int pdfi_obj_fast_keyword_str(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
986
0
{
987
0
    int code = 0;
988
0
    const char *s = pdf_token_strings[(uintptr_t)obj];
989
0
    int size = (int)strlen(s) + 1;
990
0
    byte *buf;
991
992
0
    buf = gs_alloc_bytes(ctx->memory, size, "pdfi_obj_name_str(data)");
993
0
    if (buf == NULL)
994
0
        return_error(gs_error_VMerror);
995
0
    memcpy(buf, s, size);
996
0
    *data = buf;
997
0
    *len = size;
998
0
    return code;
999
0
}
1000
1001
obj_str_dispatch_t obj_str_dispatch[] = {
1002
    {PDF_NAME, pdfi_obj_name_str},
1003
    {PDF_ARRAY, pdfi_obj_array_str},
1004
    {PDF_REAL, pdfi_obj_real_str},
1005
    {PDF_INT, pdfi_obj_int_str},
1006
    {PDF_BOOL, pdfi_obj_bool_str},
1007
    {PDF_STRING, pdfi_obj_string_str},
1008
    {PDF_DICT, pdfi_obj_dict_str},
1009
    {PDF_STREAM, pdfi_obj_stream_str},
1010
    {PDF_INDIRECT, pdfi_obj_indirect_str},
1011
    {PDF_NULL, pdfi_obj_null_str},
1012
    {PDF_FAST_KEYWORD, pdfi_obj_fast_keyword_str},
1013
    {0, NULL}
1014
};
1015
1016
/* Recursive function to build a string from an object
1017
 */
1018
int pdfi_obj_to_string(pdf_context *ctx, pdf_obj *obj, byte **data, int *len)
1019
1.84M
{
1020
1.84M
    obj_str_dispatch_t *dispatch_ptr;
1021
1.84M
    int code = 0;
1022
1.84M
    pdf_obj_type type;
1023
1024
1.84M
    *data = NULL;
1025
1.84M
    *len = 0;
1026
1.84M
    type = pdfi_type_of(obj);
1027
5.47M
    for (dispatch_ptr = obj_str_dispatch; dispatch_ptr->func; dispatch_ptr ++) {
1028
5.47M
        if (type == dispatch_ptr->type) {
1029
1.84M
            code = dispatch_ptr->func(ctx, obj, data, len);
1030
1.84M
            goto exit;
1031
1.84M
        }
1032
5.47M
    }
1033
    /* Not implemented, use default */
1034
8
    code = pdfi_obj_default_str(ctx, obj, data, len);
1035
1.84M
 exit:
1036
1.84M
    return code;
1037
8
}
1038
1039
/*********** END obj_to_string module ************/