Coverage Report

Created: 2025-12-03 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/fitz/encodings.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
#include <stdlib.h>
28
29
#include "encodings.h"
30
#include "glyphlist.h"
31
#include "smallcaps.h"
32
33
#define FROM_UNICODE(ENC) \
34
0
  int l = 0; \
35
0
  int r = nelem(ENC##_from_unicode) - 1; \
36
0
  if (u < 128) \
37
0
    return u; \
38
0
  while (l <= r) \
39
0
  { \
40
0
    int m = (l + r) >> 1; \
41
0
    if (u < ENC##_from_unicode[m].u) \
42
0
      r = m - 1; \
43
0
    else if (u > ENC##_from_unicode[m].u) \
44
0
      l = m + 1; \
45
0
    else \
46
0
      return ENC##_from_unicode[m].c; \
47
0
  } \
48
0
  return -1; \
49
50
0
int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) }
51
0
int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) }
52
0
int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) }
53
0
int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) }
54
0
int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) }
55
0
int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) }
56
57
int
58
fz_unicode_from_glyph_name_strict(const char *name)
59
0
{
60
0
  int l = 0;
61
0
  int r = nelem(single_name_list) - 1;
62
63
0
  while (l <= r)
64
0
  {
65
0
    int m = (l + r) >> 1;
66
0
    int c = strcmp(name, single_name_list[m]);
67
0
    if (c < 0)
68
0
      r = m - 1;
69
0
    else if (c > 0)
70
0
      l = m + 1;
71
0
    else
72
0
      return single_code_list[m];
73
0
  }
74
0
  return 0;
75
0
}
76
77
static int
78
read_num(const char *p, int base)
79
0
{
80
0
  char *e;
81
0
  int v = strtol(p, &e, base);
82
0
  if (*e != 0)
83
0
    return 0;
84
0
  return v;
85
0
}
86
87
int
88
fz_unicode_from_glyph_name(const char *name)
89
448
{
90
448
  char buf[64];
91
448
  char *p;
92
448
  int l = 0;
93
448
  int r = nelem(single_name_list) - 1;
94
448
  int code = 0;
95
96
448
  fz_strlcpy(buf, name, sizeof buf);
97
98
  /* kill anything after first period and underscore */
99
448
  p = strchr(buf, '.');
100
448
  if (p) p[0] = 0;
101
448
  p = strchr(buf, '_');
102
448
  if (p)
103
0
  {
104
    /* Hacky tests for alternative ligature names */
105
0
    if (buf[0] == 'f')
106
0
    {
107
0
      if (!strcmp(buf, "f_f"))
108
0
        strcpy(buf, "ff");
109
0
      else if (!strcmp(buf, "f_f_i"))
110
0
        strcpy(buf, "ffi");
111
0
      else if (!strcmp(buf, "f_f_l"))
112
0
        strcpy(buf, "ffl");
113
0
      else if (!strcmp(buf, "f_i"))
114
0
        strcpy(buf, "fi");
115
0
      else if (!strcmp(buf, "f_l"))
116
0
        strcpy(buf, "fl");
117
0
      else
118
0
        p[0] = 0;
119
0
    }
120
0
    else
121
0
      p[0] = 0;
122
0
  }
123
124
5.04k
  while (l <= r)
125
5.04k
  {
126
5.04k
    int m = (l + r) >> 1;
127
5.04k
    int c = strcmp(buf, single_name_list[m]);
128
5.04k
    if (c < 0)
129
2.32k
      r = m - 1;
130
2.72k
    else if (c > 0)
131
2.27k
      l = m + 1;
132
448
    else
133
448
      return single_code_list[m];
134
5.04k
  }
135
136
0
  if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7)
137
0
    code = read_num(buf+3, 16);
138
0
  else if (buf[0] == 'u')
139
0
    code = read_num(buf+1, 16);
140
0
  else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0)
141
0
    code = read_num(buf+1, 10);
142
0
  else
143
0
    code = read_num(buf, 10);
144
145
0
  return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER;
146
448
}
147
148
static const char *empty_dup_list[] = { 0 };
149
150
const char **
151
fz_duplicate_glyph_names_from_unicode(int ucs)
152
0
{
153
0
  int l = 0;
154
0
  int r = nelem(agl_dup_offsets) / 2 - 1;
155
0
  while (l <= r)
156
0
  {
157
0
    int m = (l + r) >> 1;
158
0
    if (ucs < agl_dup_offsets[m << 1])
159
0
      r = m - 1;
160
0
    else if (ucs > agl_dup_offsets[m << 1])
161
0
      l = m + 1;
162
0
    else
163
0
      return agl_dup_names + agl_dup_offsets[(m << 1) + 1];
164
0
  }
165
0
  return empty_dup_list;
166
0
}
167
168
const char *
169
fz_glyph_name_from_unicode_sc(int u)
170
0
{
171
0
  int l = 0;
172
0
  int r = nelem(glyph_name_from_unicode_sc) / 2 - 1;
173
0
  while (l <= r)
174
0
  {
175
0
    int m = (l + r) >> 1;
176
0
    if (u < glyph_name_from_unicode_sc[m].u)
177
0
      r = m - 1;
178
0
    else if (u > glyph_name_from_unicode_sc[m].u)
179
0
      l = m + 1;
180
0
    else
181
0
      return glyph_name_from_unicode_sc[m].n;
182
0
  }
183
0
  return NULL;
184
0
}