Coverage Report

Created: 2026-06-08 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-label.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <stdarg.h>
27
#include <stdlib.h>
28
#include <string.h>
29
30
typedef struct pdf_object_labels pdf_object_labels;
31
typedef struct pdf_object_label_node pdf_object_label_node;
32
33
struct pdf_object_label_node
34
{
35
  int num;
36
  char *path;
37
  pdf_object_label_node *next;
38
};
39
40
struct pdf_object_labels
41
{
42
  fz_pool *pool;
43
  int object_count;
44
  int root, info, encrypt;
45
  unsigned short *pages;
46
  char *seen;
47
  pdf_object_label_node **nodes;
48
};
49
50
static void
51
add_object_label(fz_context *ctx, pdf_object_labels *g, char *path, int a, int b)
52
0
{
53
0
  pdf_object_label_node *node, **root;
54
55
0
  if (a < 0 || a >= g->object_count)
56
0
    return;
57
58
0
  node = fz_pool_alloc(ctx, g->pool, sizeof(pdf_object_label_node));
59
0
  node->path = fz_pool_strdup(ctx, g->pool, path);
60
0
  node->num = b;
61
62
0
  root = &g->nodes[a];
63
0
  node->next = *root;
64
0
  *root = node;
65
0
}
66
67
static void
68
scan_object_label_rec(fz_context *ctx, pdf_object_labels *g, char *root_path, pdf_obj *obj, int top)
69
0
{
70
0
  char path[100];
71
0
  int i, n;
72
0
  if (pdf_is_indirect(ctx, obj))
73
0
    ;
74
0
  else if (pdf_is_dict(ctx, obj))
75
0
  {
76
0
    n = pdf_dict_len(ctx, obj);
77
0
    for (i = 0; i < n; ++i)
78
0
    {
79
0
      pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
80
0
      pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
81
0
      if (val && key != PDF_NAME(Parent) && key != PDF_NAME(P) && key != PDF_NAME(Prev) && key != PDF_NAME(Last))
82
0
      {
83
0
        if (pdf_is_indirect(ctx, val))
84
0
        {
85
0
          fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
86
0
          add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
87
0
        }
88
0
        else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
89
0
        {
90
0
          fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
91
0
          scan_object_label_rec(ctx, g, path, val, top);
92
0
        }
93
0
      }
94
0
    }
95
0
  }
96
0
  else if (pdf_is_array(ctx, obj))
97
0
  {
98
0
    n = pdf_array_len(ctx, obj);
99
0
    for (i = 0; i < n; ++i)
100
0
    {
101
0
      pdf_obj *val = pdf_array_get(ctx, obj, i);
102
0
      if (val)
103
0
      {
104
0
        if (pdf_is_indirect(ctx, val))
105
0
        {
106
0
          fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
107
0
          add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
108
0
        }
109
0
        else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
110
0
        {
111
0
          fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
112
0
          scan_object_label_rec(ctx, g, path, val, top);
113
0
        }
114
0
      }
115
0
    }
116
0
  }
117
0
}
118
119
static void
120
scan_object_label(fz_context *ctx, pdf_document *doc, pdf_object_labels *g, int num)
121
0
{
122
0
  pdf_obj *obj = pdf_load_object(ctx, doc, num);
123
0
  fz_try(ctx)
124
0
    scan_object_label_rec(ctx, g, "", obj, num);
125
0
  fz_always(ctx)
126
0
    pdf_drop_obj(ctx, obj);
127
0
  fz_catch(ctx)
128
0
    fz_rethrow(ctx);
129
0
}
130
131
pdf_object_labels *
132
pdf_load_object_labels(fz_context *ctx, pdf_document *doc)
133
0
{
134
0
  pdf_object_labels *g = NULL;
135
0
  fz_pool *pool;
136
0
  int i, n, page_count;
137
138
0
  n = pdf_count_objects(ctx, doc);
139
140
0
  pool = fz_new_pool(ctx);
141
0
  fz_try(ctx)
142
0
  {
143
0
    g = fz_pool_alloc(ctx, pool, sizeof(pdf_object_labels));
144
0
    g->pool = pool;
145
0
    g->object_count = n;
146
0
    g->root = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
147
0
    g->info = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
148
0
    g->encrypt = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
149
0
    g->seen = fz_pool_alloc(ctx, pool, n);
150
0
    g->nodes = fz_pool_alloc(ctx, pool, g->object_count * sizeof(pdf_object_label_node*));
151
0
    g->pages = fz_pool_alloc(ctx, pool, g->object_count * sizeof(unsigned short));
152
153
0
    page_count = pdf_count_pages(ctx, doc);
154
0
    for (i = 0; i < page_count; ++i)
155
0
      g->pages[pdf_to_num(ctx, pdf_lookup_page_obj(ctx, doc, i))] = i+1;
156
157
0
    for (i = 1; i < n; ++i)
158
0
      scan_object_label(ctx, doc, g, i);
159
0
  }
160
0
  fz_catch(ctx)
161
0
  {
162
0
    fz_drop_pool(ctx, pool);
163
0
  }
164
0
  return g;
165
0
}
166
167
void
168
pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g)
169
0
{
170
0
  if (g)
171
0
    fz_drop_pool(ctx, g->pool);
172
0
}
173
174
static char *
175
prepend(char *path_buffer, char *path, const char *fmt, ...)
176
0
{
177
0
  char buf[256];
178
0
  size_t z;
179
0
  va_list args;
180
181
0
  va_start(args, fmt);
182
0
  z = fz_vsnprintf(buf, sizeof(buf), fmt, args);
183
0
  va_end(args);
184
185
  /* We always want to leave ourselves at least 3 chars for
186
   * a future "..." */
187
0
  if (path_buffer + z + 3 <= path)
188
0
  {
189
0
    path -= z;
190
0
    memcpy(path, buf, z);
191
0
    return path;
192
0
  }
193
194
  /* Just put ... in now. */
195
0
  path -= 3;
196
0
  path[0] = '.';
197
0
  path[1] = '.';
198
0
  path[2] = '.';
199
200
0
  return path;
201
0
}
202
203
static void
204
find_paths(fz_context *ctx, pdf_object_labels *g, int here, char *path_buffer, char *leaf_path, pdf_label_object_fn *callback, void *arg)
205
0
{
206
0
  pdf_object_label_node *node;
207
0
  int next;
208
0
  if (here == g->root)
209
0
  {
210
0
    prepend(path_buffer, leaf_path, "trailer/Root");
211
0
    callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Root"));
212
0
    return;
213
0
  }
214
0
  if (here == g->info)
215
0
  {
216
0
    callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Info"));
217
0
    return;
218
0
  }
219
0
  if (here == g->encrypt)
220
0
  {
221
0
    callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Encrypt"));
222
0
    return;
223
0
  }
224
0
  if (g->pages[here])
225
0
  {
226
0
    callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d", g->pages[here]));
227
0
  }
228
0
  for (node = g->nodes[here]; node; node = node->next)
229
0
  {
230
0
    next = node->num;
231
0
    if (next < 1 || next >= g->object_count)
232
0
      continue;
233
0
    if (g->seen[next])
234
0
      continue;
235
0
    if (g->pages[next])
236
0
    {
237
0
      callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d%s", g->pages[next], node->path));
238
0
    }
239
0
    else
240
0
    {
241
0
      char *p = prepend(path_buffer, leaf_path, "%s", node->path);
242
0
      g->seen[next] = 1;
243
      // if we've run out of room in the path buffer, send this and stop.
244
0
      if (p[0] == '.' && p[1] == '.' && p[2] == '.')
245
0
        callback(ctx, arg, p);
246
0
      else
247
0
        find_paths(ctx, g, next, path_buffer, p, callback, arg);
248
0
      g->seen[next] = 0;
249
0
    }
250
0
  }
251
0
}
252
253
void
254
pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg)
255
0
{
256
0
  int i;
257
0
  char path[4096];
258
259
0
  if (num < 1 || num >= g->object_count)
260
0
    return;
261
0
  for (i = 1; i < g->object_count; ++i)
262
0
    g->seen[i] = 0;
263
0
  path[sizeof(path)-1] = 0;
264
0
  find_paths(ctx, g, num, path, &path[sizeof(path)-1], callback, arg);
265
0
}