Coverage Report

Created: 2025-11-07 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-unicode.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
/* Load or synthesize ToUnicode map for fonts */
29
30
static void
31
pdf_remap_cmap_range(fz_context *ctx, pdf_cmap *ucs_from_gid,
32
  unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt)
33
2
{
34
2
  unsigned int k;
35
2
  int ucsbuf[PDF_MRANGE_CAP];
36
2
  int ucslen;
37
38
131k
  for (k = 0; k <= n; ++k)
39
131k
  {
40
131k
    ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf);
41
131k
    if (ucslen == 1)
42
14.4k
      pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[0]);
43
116k
    else if (ucslen > 1)
44
3.20k
      pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen);
45
131k
  }
46
2
}
47
48
static pdf_cmap *
49
pdf_remap_cmap(fz_context *ctx, pdf_cmap *gid_from_cpt, pdf_cmap *ucs_from_cpt)
50
2
{
51
2
  pdf_cmap *ucs_from_gid;
52
2
  unsigned int a, b, x;
53
2
  int i;
54
55
2
  ucs_from_gid = pdf_new_cmap(ctx);
56
57
4
  fz_try(ctx)
58
4
  {
59
2
    if (gid_from_cpt->usecmap)
60
0
      ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt);
61
62
2
    pdf_add_codespace(ctx, ucs_from_gid, 0, 0x7fffffff, 4);
63
64
4
    for (i = 0; i < gid_from_cpt->rlen; ++i)
65
2
    {
66
2
      a = gid_from_cpt->ranges[i].low;
67
2
      b = gid_from_cpt->ranges[i].high;
68
2
      x = gid_from_cpt->ranges[i].out;
69
2
      pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
70
2
    }
71
72
2
    for (i = 0; i < gid_from_cpt->xlen; ++i)
73
0
    {
74
0
      a = gid_from_cpt->xranges[i].low;
75
0
      b = gid_from_cpt->xranges[i].high;
76
0
      x = gid_from_cpt->xranges[i].out;
77
0
      pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
78
0
    }
79
80
    /* Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. */
81
82
2
    pdf_sort_cmap(ctx, ucs_from_gid);
83
2
  }
84
4
  fz_catch(ctx)
85
0
  {
86
0
    pdf_drop_cmap(ctx, ucs_from_gid);
87
0
    fz_rethrow(ctx);
88
0
  }
89
90
2
  return ucs_from_gid;
91
2
}
92
93
void
94
pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font,
95
  const char **strings, char *collection, pdf_obj *cmapstm)
96
4
{
97
4
  unsigned int cpt;
98
99
4
  if (pdf_is_stream(ctx, cmapstm))
100
2
  {
101
2
    pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(ctx, doc, cmapstm);
102
4
    fz_try(ctx)
103
4
      font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
104
4
    fz_always(ctx)
105
2
      pdf_drop_cmap(ctx, ucs_from_cpt);
106
2
    fz_catch(ctx)
107
0
      fz_rethrow(ctx);
108
2
    font->size += pdf_cmap_size(ctx, font->to_unicode);
109
2
  }
110
111
2
  else if (pdf_is_name(ctx, cmapstm))
112
0
  {
113
0
    pdf_cmap *ucs_from_cpt = pdf_load_system_cmap(ctx, pdf_to_name(ctx, cmapstm));
114
0
    fz_try(ctx)
115
0
      font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
116
0
    fz_always(ctx)
117
0
      pdf_drop_cmap(ctx, ucs_from_cpt);
118
0
    fz_catch(ctx)
119
0
      fz_rethrow(ctx);
120
0
    font->size += pdf_cmap_size(ctx, font->to_unicode);
121
0
  }
122
123
2
  else if (collection)
124
0
  {
125
0
    if (!strcmp(collection, "Adobe-CNS1"))
126
0
      font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
127
0
    else if (!strcmp(collection, "Adobe-GB1"))
128
0
      font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
129
0
    else if (!strcmp(collection, "Adobe-Japan1"))
130
0
      font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
131
0
    else if (!strcmp(collection, "Adobe-Korea1"))
132
0
      font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
133
0
  }
134
135
4
  if (strings)
136
2
  {
137
    /* TODO one-to-many mappings */
138
139
2
    font->cid_to_ucs = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_ucs");
140
2
    font->cid_to_ucs_len = 256;
141
2
    font->size += 256 * sizeof *font->cid_to_ucs;
142
143
514
    for (cpt = 0; cpt < 256; cpt++)
144
512
    {
145
512
      if (strings[cpt])
146
448
        font->cid_to_ucs[cpt] = fz_unicode_from_glyph_name(strings[cpt]);
147
64
      else
148
64
        font->cid_to_ucs[cpt] = FZ_REPLACEMENT_CHARACTER;
149
512
    }
150
2
  }
151
152
4
  if (!font->to_unicode && !font->cid_to_ucs)
153
0
  {
154
    /* TODO: synthesize a ToUnicode if it's a freetype font with
155
     * cmap and/or post tables or if it has glyph names. */
156
0
  }
157
4
}