Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/pdf/pdf-nametree.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
static pdf_obj *
29
pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, const char *needle, pdf_cycle_list *cycle_up)
30
0
{
31
0
  pdf_cycle_list cycle;
32
0
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
33
0
  pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
34
35
0
  if (pdf_cycle(ctx, &cycle, cycle_up, node))
36
0
    return NULL;
37
38
0
  if (pdf_is_array(ctx, kids))
39
0
  {
40
0
    int l = 0;
41
0
    int r = pdf_array_len(ctx, kids) - 1;
42
43
0
    while (l <= r)
44
0
    {
45
0
      int m = (l + r) >> 1;
46
0
      pdf_obj *kid = pdf_array_get(ctx, kids, m);
47
0
      pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
48
0
      const char *first = pdf_array_get_text_string(ctx, limits, 0);
49
0
      const char *last = pdf_array_get_text_string(ctx, limits, 1);
50
51
0
      if (!pdf_is_indirect(ctx, kid))
52
0
      {
53
0
        fz_warn(ctx, "non-indirect internal node found in name tree");
54
0
        break;
55
0
      }
56
57
0
      if (strcmp(needle, first) < 0)
58
0
        r = m - 1;
59
0
      else if (strcmp(needle, last) > 0)
60
0
        l = m + 1;
61
0
      else
62
0
      {
63
0
        pdf_obj *obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
64
0
        if (obj)
65
0
          return obj;
66
0
        else
67
0
          break;
68
0
      }
69
0
    }
70
71
    /* Spec says names should be sorted (hence the binary search,
72
     * above), but Acrobat copes with non-sorted. Drop back to a
73
     * simple search if the binary search fails. */
74
0
    r = pdf_array_len(ctx, kids);
75
0
    for (l = 0; l < r; l++)
76
0
    {
77
0
      pdf_obj *obj, *kid = pdf_array_get(ctx, kids, l);
78
0
      if (!pdf_is_indirect(ctx, kid))
79
0
      {
80
0
        fz_warn(ctx, "non-indirect internal node found in name tree");
81
0
        continue;
82
0
      }
83
0
      obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
84
0
      if (obj)
85
0
        return obj;
86
0
    }
87
0
  }
88
89
0
  if (pdf_is_array(ctx, names))
90
0
  {
91
0
    int l = 0;
92
0
    int r = (pdf_array_len(ctx, names) / 2) - 1;
93
94
0
    while (l <= r)
95
0
    {
96
0
      int m = (l + r) >> 1;
97
0
      int c;
98
0
      const char *key = pdf_array_get_text_string(ctx, names, m * 2);
99
0
      pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
100
101
0
      c = strcmp(needle, key);
102
0
      if (c < 0)
103
0
        r = m - 1;
104
0
      else if (c > 0)
105
0
        l = m + 1;
106
0
      else
107
0
        return val;
108
0
    }
109
110
    /* Spec says names should be sorted (hence the binary search,
111
     * above), but Acrobat copes with non-sorted. Drop back to a
112
     * simple search if the binary search fails. */
113
0
    r = pdf_array_len(ctx, names)/2;
114
0
    for (l = 0; l < r; l++)
115
0
      if (!strcmp(needle, pdf_array_get_text_string(ctx, names, l * 2)))
116
0
        return pdf_array_get(ctx, names, l * 2 + 1);
117
0
  }
118
119
0
  return NULL;
120
0
}
121
122
pdf_obj *
123
pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
124
0
{
125
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
126
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
127
0
  pdf_obj *tree = pdf_dict_get(ctx, names, which);
128
0
  return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
129
0
}
130
131
pdf_obj *
132
pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
133
0
{
134
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
135
0
  pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
136
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
137
138
  /* PDF 1.1 has destinations in a dictionary */
139
0
  if (dests)
140
0
  {
141
0
    if (pdf_is_name(ctx, needle))
142
0
      return pdf_dict_get(ctx, dests, needle);
143
0
    else
144
0
      return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
145
0
  }
146
147
  /* PDF 1.2 has destinations in a name tree */
148
0
  if (names)
149
0
  {
150
0
    pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
151
0
    return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
152
0
  }
153
154
0
  return NULL;
155
0
}
156
157
static void
158
pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node, pdf_cycle_list *cycle_up)
159
0
{
160
0
  pdf_cycle_list cycle;
161
0
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
162
0
  pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
163
0
  int i;
164
165
0
  if (kids && !pdf_cycle(ctx, &cycle, cycle_up, node))
166
0
  {
167
0
    int len = pdf_array_len(ctx, kids);
168
0
    for (i = 0; i < len; i++)
169
0
      pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i), &cycle);
170
0
  }
171
172
0
  if (names)
173
0
  {
174
0
    int len = pdf_array_len(ctx, names);
175
0
    for (i = 0; i + 1 < len; i += 2)
176
0
    {
177
0
      pdf_obj *key = pdf_array_get(ctx, names, i);
178
0
      pdf_obj *val = pdf_array_get(ctx, names, i + 1);
179
0
      if (pdf_is_string(ctx, key))
180
0
      {
181
0
        key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
182
0
        fz_try(ctx)
183
0
          pdf_dict_put(ctx, dict, key, val);
184
0
        fz_always(ctx)
185
0
          pdf_drop_obj(ctx, key);
186
0
        fz_catch(ctx)
187
0
          fz_rethrow(ctx);
188
0
      }
189
0
      else if (pdf_is_name(ctx, key))
190
0
      {
191
0
        pdf_dict_put(ctx, dict, key, val);
192
0
      }
193
0
    }
194
0
  }
195
0
}
196
197
/* FIXME: fz_try/fz_catch needed here */
198
pdf_obj *
199
pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
200
0
{
201
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
202
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
203
0
  pdf_obj *tree = pdf_dict_get(ctx, names, which);
204
0
  if (pdf_is_dict(ctx, tree))
205
0
  {
206
0
    pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
207
0
    pdf_load_name_tree_imp(ctx, dict, doc, tree, NULL);
208
0
    return dict;
209
0
  }
210
0
  return NULL;
211
0
}
212
213
pdf_obj *
214
pdf_lookup_number_imp(fz_context *ctx, pdf_obj *node, int needle, pdf_cycle_list *cycle_up)
215
29.9k
{
216
29.9k
  pdf_cycle_list cycle;
217
29.9k
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
218
29.9k
  pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
219
220
29.9k
  if (pdf_is_array(ctx, kids))
221
7.45k
  {
222
7.45k
    int l = 0;
223
7.45k
    int r = pdf_array_len(ctx, kids) - 1;
224
225
13.2k
    while (l <= r)
226
13.2k
    {
227
13.2k
      int m = (l + r) >> 1;
228
13.2k
      pdf_obj *kid = pdf_array_get(ctx, kids, m);
229
13.2k
      pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
230
13.2k
      int first = pdf_array_get_int(ctx, limits, 0);
231
13.2k
      int last = pdf_array_get_int(ctx, limits, 1);
232
233
13.2k
      if (needle < first)
234
5.76k
        r = m - 1;
235
7.45k
      else if (needle > last)
236
0
        l = m + 1;
237
7.45k
      else
238
7.45k
      {
239
7.45k
        if (pdf_cycle(ctx, &cycle, cycle_up, node))
240
0
          break;
241
7.45k
        return pdf_lookup_number_imp(ctx, kid, needle, &cycle);
242
7.45k
      }
243
13.2k
    }
244
7.45k
  }
245
246
22.5k
  if (pdf_is_array(ctx, nums))
247
16.2k
  {
248
16.2k
    pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
249
16.2k
    int l = 0;
250
16.2k
    int r = (pdf_array_len(ctx, nums) / 2) - 1;
251
252
43.5k
    while (l <= r)
253
43.5k
    {
254
43.5k
      int m = (l + r) >> 1;
255
43.5k
      int key = pdf_array_get_int(ctx, nums, m * 2);
256
43.5k
      pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
257
258
43.5k
      if (needle < key)
259
16.8k
        r = m - 1;
260
26.7k
      else if (needle > key)
261
10.5k
        l = m + 1;
262
16.2k
      else
263
16.2k
        return val;
264
43.5k
    }
265
266
    /* Parallel the nametree lookup above by allowing for non-sorted lists. */
267
0
    r = pdf_array_len(ctx, nums)/2;
268
0
    for (l = 0; l < r; l++)
269
0
      if (needle == pdf_array_get_int(ctx, nums, l * 2))
270
0
        return pdf_array_get(ctx, nums, l * 2 + 1);
271
0
  }
272
273
6.25k
  return NULL;
274
22.5k
}
275
276
pdf_obj *
277
pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
278
22.5k
{
279
22.5k
  return pdf_lookup_number_imp(ctx, node, needle, NULL);
280
22.5k
}
281
282
static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
283
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
284
      void (*leave)(fz_context *, pdf_obj *, void *),
285
      void *arg,
286
      pdf_obj **inherit_names,
287
      pdf_obj **inherit_vals,
288
      pdf_cycle_list *cycle_up);
289
290
static void
291
pdf_walk_tree_kid(fz_context *ctx,
292
      pdf_obj *obj,
293
      pdf_obj *kid_name,
294
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
295
      void (*leave)(fz_context *, pdf_obj *, void *),
296
      void *arg,
297
      pdf_obj **inherit_names,
298
      pdf_obj **inherit_vals,
299
      pdf_cycle_list *cycle_up)
300
0
{
301
0
  pdf_cycle_list cycle;
302
0
  pdf_obj **new_vals = NULL;
303
304
0
  if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
305
0
    return;
306
307
0
  fz_var(new_vals);
308
309
0
  fz_try(ctx)
310
0
  {
311
    /* First we run through the names we've been asked to collect
312
     * inherited values for updating the values. */
313
0
    if (inherit_names != NULL)
314
0
    {
315
0
      int i, n;
316
317
0
      for (n = 0; inherit_names[n] != NULL; n++);
318
319
0
      for (i = 0; i < n; i++)
320
0
      {
321
0
        pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
322
0
        if (v != NULL)
323
0
        {
324
0
          if (new_vals == NULL)
325
0
          {
326
0
            new_vals = fz_malloc_array(ctx, n, pdf_obj *);
327
0
            memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
328
0
            inherit_vals = new_vals;
329
0
          }
330
0
          inherit_vals[i] = v;
331
0
        }
332
0
      }
333
0
    }
334
335
0
    if (arrive)
336
0
      arrive(ctx, obj, arg, inherit_vals);
337
0
    pdf_walk_tree_imp(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
338
0
    if (leave)
339
0
      leave(ctx, obj, arg);
340
0
  }
341
0
  fz_always(ctx)
342
0
    fz_free(ctx, new_vals);
343
0
  fz_catch(ctx)
344
0
    fz_rethrow(ctx);
345
0
}
346
347
static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
348
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
349
      void (*leave)(fz_context *, pdf_obj *, void *),
350
      void *arg,
351
      pdf_obj **inherit_names,
352
      pdf_obj **inherit_vals,
353
      pdf_cycle_list *cycle_up)
354
0
{
355
0
  pdf_cycle_list cycle;
356
357
0
  if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
358
0
    return;
359
360
0
  if (pdf_is_array(ctx, obj))
361
0
  {
362
0
    int i, n = pdf_array_len(ctx, obj);
363
0
    for (i = 0; i < n; i++)
364
0
      pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
365
0
  }
366
0
  else
367
0
  {
368
0
    pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
369
0
  }
370
0
}
371
372
void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
373
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
374
      void (*leave)(fz_context *, pdf_obj *, void *),
375
      void *arg,
376
      pdf_obj **inherit_names,
377
      pdf_obj **inherit_vals)
378
0
{
379
0
  pdf_walk_tree_imp(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, NULL);
380
0
}