Coverage Report

Created: 2025-11-07 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-nametree.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
static pdf_obj *
29
pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, const char *needle, pdf_cycle_list *cycle_up)
30
0
{
31
0
  pdf_cycle_list cycle;
32
0
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
33
0
  pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
34
35
0
  if (pdf_cycle(ctx, &cycle, cycle_up, node))
36
0
    return NULL;
37
38
0
  if (pdf_is_array(ctx, kids))
39
0
  {
40
0
    int l = 0;
41
0
    int r = pdf_array_len(ctx, kids) - 1;
42
43
0
    while (l <= r)
44
0
    {
45
0
      int m = (l + r) >> 1;
46
0
      pdf_obj *kid = pdf_array_get(ctx, kids, m);
47
0
      pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
48
0
      const char *first = pdf_array_get_text_string(ctx, limits, 0);
49
0
      const char *last = pdf_array_get_text_string(ctx, limits, 1);
50
51
0
      if (!pdf_is_indirect(ctx, kid))
52
0
      {
53
0
        fz_warn(ctx, "non-indirect internal node found in name tree");
54
0
        break;
55
0
      }
56
57
0
      if (strcmp(needle, first) < 0)
58
0
        r = m - 1;
59
0
      else if (strcmp(needle, last) > 0)
60
0
        l = m + 1;
61
0
      else
62
0
      {
63
0
        pdf_obj *obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
64
0
        if (obj)
65
0
          return obj;
66
0
        else
67
0
          break;
68
0
      }
69
0
    }
70
71
    /* Spec says names should be sorted (hence the binary search,
72
     * above), but Acrobat copes with non-sorted. Drop back to a
73
     * simple search if the binary search fails. */
74
0
    r = pdf_array_len(ctx, kids);
75
0
    for (l = 0; l < r; l++)
76
0
    {
77
0
      pdf_obj *obj, *kid = pdf_array_get(ctx, kids, l);
78
0
      if (!pdf_is_indirect(ctx, kid))
79
0
      {
80
0
        fz_warn(ctx, "non-indirect internal node found in name tree");
81
0
        continue;
82
0
      }
83
0
      obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
84
0
      if (obj)
85
0
        return obj;
86
0
    }
87
0
  }
88
89
0
  if (pdf_is_array(ctx, names))
90
0
  {
91
0
    int l = 0;
92
0
    int r = (pdf_array_len(ctx, names) / 2) - 1;
93
94
0
    while (l <= r)
95
0
    {
96
0
      int m = (l + r) >> 1;
97
0
      int c;
98
0
      const char *key = pdf_array_get_text_string(ctx, names, m * 2);
99
0
      pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
100
101
0
      c = strcmp(needle, key);
102
0
      if (c < 0)
103
0
        r = m - 1;
104
0
      else if (c > 0)
105
0
        l = m + 1;
106
0
      else
107
0
        return val;
108
0
    }
109
110
    /* Spec says names should be sorted (hence the binary search,
111
     * above), but Acrobat copes with non-sorted. Drop back to a
112
     * simple search if the binary search fails. */
113
0
    r = pdf_array_len(ctx, names)/2;
114
0
    for (l = 0; l < r; l++)
115
0
      if (!strcmp(needle, pdf_array_get_text_string(ctx, names, l * 2)))
116
0
        return pdf_array_get(ctx, names, l * 2 + 1);
117
0
  }
118
119
0
  return NULL;
120
0
}
121
122
pdf_obj *
123
pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
124
0
{
125
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
126
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
127
0
  pdf_obj *tree = pdf_dict_get(ctx, names, which);
128
0
  return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
129
0
}
130
131
pdf_obj *
132
pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
133
0
{
134
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
135
0
  pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
136
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
137
138
  /* PDF 1.1 has destinations in a dictionary */
139
0
  if (dests)
140
0
  {
141
0
    if (pdf_is_name(ctx, needle))
142
0
      return pdf_dict_get(ctx, dests, needle);
143
0
    else
144
0
      return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
145
0
  }
146
147
  /* PDF 1.2 has destinations in a name tree */
148
0
  if (names)
149
0
  {
150
0
    pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
151
0
    return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
152
0
  }
153
154
0
  return NULL;
155
0
}
156
157
static void
158
pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node, pdf_cycle_list *cycle_up)
159
0
{
160
0
  pdf_cycle_list cycle;
161
0
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
162
0
  pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
163
0
  int i;
164
165
0
  if (kids && !pdf_cycle(ctx, &cycle, cycle_up, node))
166
0
  {
167
0
    int len = pdf_array_len(ctx, kids);
168
0
    for (i = 0; i < len; i++)
169
0
      pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i), &cycle);
170
0
  }
171
172
0
  if (names)
173
0
  {
174
0
    int len = pdf_array_len(ctx, names);
175
0
    for (i = 0; i + 1 < len; i += 2)
176
0
    {
177
0
      pdf_obj *key = pdf_array_get(ctx, names, i);
178
0
      pdf_obj *val = pdf_array_get(ctx, names, i + 1);
179
0
      if (pdf_is_string(ctx, key))
180
0
      {
181
0
        key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
182
0
        fz_try(ctx)
183
0
          pdf_dict_put(ctx, dict, key, val);
184
0
        fz_always(ctx)
185
0
          pdf_drop_obj(ctx, key);
186
0
        fz_catch(ctx)
187
0
          fz_rethrow(ctx);
188
0
      }
189
0
      else if (pdf_is_name(ctx, key))
190
0
      {
191
0
        pdf_dict_put(ctx, dict, key, val);
192
0
      }
193
0
    }
194
0
  }
195
0
}
196
197
/* FIXME: fz_try/fz_catch needed here */
198
pdf_obj *
199
pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
200
0
{
201
0
  pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
202
0
  pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
203
0
  pdf_obj *tree = pdf_dict_get(ctx, names, which);
204
0
  if (pdf_is_dict(ctx, tree))
205
0
  {
206
0
    pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
207
0
    pdf_load_name_tree_imp(ctx, dict, doc, tree, NULL);
208
0
    return dict;
209
0
  }
210
0
  return NULL;
211
0
}
212
213
pdf_obj *
214
pdf_lookup_number_imp(fz_context *ctx, pdf_obj *node, int needle, pdf_cycle_list *cycle_up)
215
19
{
216
19
  pdf_cycle_list cycle;
217
19
  pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
218
19
  pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
219
220
19
  if (pdf_is_array(ctx, kids))
221
0
  {
222
0
    int l = 0;
223
0
    int r = pdf_array_len(ctx, kids) - 1;
224
225
0
    while (l <= r)
226
0
    {
227
0
      int m = (l + r) >> 1;
228
0
      pdf_obj *kid = pdf_array_get(ctx, kids, m);
229
0
      pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
230
0
      int first = pdf_array_get_int(ctx, limits, 0);
231
0
      int last = pdf_array_get_int(ctx, limits, 1);
232
233
0
      if (needle < first)
234
0
        r = m - 1;
235
0
      else if (needle > last)
236
0
        l = m + 1;
237
0
      else
238
0
      {
239
0
        if (pdf_cycle(ctx, &cycle, cycle_up, node))
240
0
          break;
241
0
        return pdf_lookup_number_imp(ctx, kid, needle, &cycle);
242
0
      }
243
0
    }
244
0
  }
245
246
19
  if (pdf_is_array(ctx, nums))
247
0
  {
248
0
    int l = 0;
249
0
    int r = (pdf_array_len(ctx, nums) / 2) - 1;
250
251
0
    while (l <= r)
252
0
    {
253
0
      int m = (l + r) >> 1;
254
0
      int key = pdf_array_get_int(ctx, nums, m * 2);
255
0
      pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
256
257
0
      if (needle < key)
258
0
        r = m - 1;
259
0
      else if (needle > key)
260
0
        l = m + 1;
261
0
      else
262
0
        return val;
263
0
    }
264
265
    /* Parallel the nametree lookup above by allowing for non-sorted lists. */
266
0
    r = pdf_array_len(ctx, nums)/2;
267
0
    for (l = 0; l < r; l++)
268
0
      if (needle == pdf_array_get_int(ctx, nums, l * 2))
269
0
        return pdf_array_get(ctx, nums, l * 2 + 1);
270
0
  }
271
272
19
  return NULL;
273
19
}
274
275
pdf_obj *
276
pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
277
19
{
278
19
  return pdf_lookup_number_imp(ctx, node, needle, NULL);
279
19
}
280
281
static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
282
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
283
      void (*leave)(fz_context *, pdf_obj *, void *),
284
      void *arg,
285
      pdf_obj **inherit_names,
286
      pdf_obj **inherit_vals,
287
      pdf_cycle_list *cycle_up);
288
289
static void
290
pdf_walk_tree_kid(fz_context *ctx,
291
      pdf_obj *obj,
292
      pdf_obj *kid_name,
293
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
294
      void (*leave)(fz_context *, pdf_obj *, void *),
295
      void *arg,
296
      pdf_obj **inherit_names,
297
      pdf_obj **inherit_vals,
298
      pdf_cycle_list *cycle_up)
299
0
{
300
0
  pdf_cycle_list cycle;
301
0
  pdf_obj **new_vals = NULL;
302
303
0
  if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
304
0
    return;
305
306
0
  fz_var(new_vals);
307
308
0
  fz_try(ctx)
309
0
  {
310
    /* First we run through the names we've been asked to collect
311
     * inherited values for updating the values. */
312
0
    if (inherit_names != NULL)
313
0
    {
314
0
      int i, n;
315
316
0
      for (n = 0; inherit_names[n] != NULL; n++);
317
318
0
      for (i = 0; i < n; i++)
319
0
      {
320
0
        pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
321
0
        if (v != NULL)
322
0
        {
323
0
          if (new_vals == NULL)
324
0
          {
325
0
            new_vals = fz_malloc_array(ctx, n, pdf_obj *);
326
0
            memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
327
0
            inherit_vals = new_vals;
328
0
          }
329
0
          inherit_vals[i] = v;
330
0
        }
331
0
      }
332
0
    }
333
334
0
    if (arrive)
335
0
      arrive(ctx, obj, arg, inherit_vals);
336
0
    pdf_walk_tree_imp(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
337
0
    if (leave)
338
0
      leave(ctx, obj, arg);
339
0
  }
340
0
  fz_always(ctx)
341
0
    fz_free(ctx, new_vals);
342
0
  fz_catch(ctx)
343
0
    fz_rethrow(ctx);
344
0
}
345
346
static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
347
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
348
      void (*leave)(fz_context *, pdf_obj *, void *),
349
      void *arg,
350
      pdf_obj **inherit_names,
351
      pdf_obj **inherit_vals,
352
      pdf_cycle_list *cycle_up)
353
0
{
354
0
  pdf_cycle_list cycle;
355
356
0
  if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
357
0
    return;
358
359
0
  if (pdf_is_array(ctx, obj))
360
0
  {
361
0
    int i, n = pdf_array_len(ctx, obj);
362
0
    for (i = 0; i < n; i++)
363
0
      pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
364
0
  }
365
0
  else
366
0
  {
367
0
    pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
368
0
  }
369
0
}
370
371
void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
372
      void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
373
      void (*leave)(fz_context *, pdf_obj *, void *),
374
      void *arg,
375
      pdf_obj **inherit_names,
376
      pdf_obj **inherit_vals)
377
0
{
378
  pdf_walk_tree_imp(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, NULL);
379
0
}