/src/mupdf/source/fitz/encodings.c

Source
// Copyright (C) 2004-2021 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

#include "mupdf/fitz.h"
#include "mupdf/pdf.h"

#include <string.h>
#include <stdlib.h>

#include "encodings.h"
#include "glyphlist.h"
#include "smallcaps.h"

#define FROM_UNICODE(ENC) \
  int l = 0; \
  int r = nelem(ENC##_from_unicode) - 1; \
  if (u < 128) \
    return u; \
  while (l <= r) \
  { \
    int m = (l + r) >> 1; \
    if (u < ENC##_from_unicode[m].u) \
      r = m - 1; \
    else if (u > ENC##_from_unicode[m].u) \
      l = m + 1; \
    else \
      return ENC##_from_unicode[m].c; \
  } \
  return -1; \

int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) }
int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) }
int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) }
int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) }
int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) }
int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) }

int
fz_unicode_from_glyph_name_strict(const char *name)
{
  int l = 0;
  int r = nelem(single_name_list) - 1;

  while (l <= r)
  {
    int m = (l + r) >> 1;
    int c = strcmp(name, single_name_list[m]);
    if (c < 0)
      r = m - 1;
    else if (c > 0)
      l = m + 1;
    else
      return single_code_list[m];
  }
  return 0;
}

static int
read_num(const char *p, int base)
{
  char *e;
  int v = strtol(p, &e, base);
  if (*e != 0)
    return 0;
  return v;
}

int
fz_unicode_from_glyph_name(const char *name)
{
  char buf[64];
  char *p;
  int l = 0;
  int r = nelem(single_name_list) - 1;
  int code = 0;

  fz_strlcpy(buf, name, sizeof buf);

  /* kill anything after first period and underscore */
  p = strchr(buf, '.');
  if (p) p[0] = 0;
  p = strchr(buf, '_');
  if (p)
  {
    /* Hacky tests for alternative ligature names */
    if (buf[0] == 'f')
    {
      if (!strcmp(buf, "f_f"))
        strcpy(buf, "ff");
      else if (!strcmp(buf, "f_f_i"))
        strcpy(buf, "ffi");
      else if (!strcmp(buf, "f_f_l"))
        strcpy(buf, "ffl");
      else if (!strcmp(buf, "f_i"))
        strcpy(buf, "fi");
      else if (!strcmp(buf, "f_l"))
        strcpy(buf, "fl");
      else
        p[0] = 0;
    }
    else
      p[0] = 0;
  }

  while (l <= r)
  {
    int m = (l + r) >> 1;
    int c = strcmp(buf, single_name_list[m]);
    if (c < 0)
      r = m - 1;
    else if (c > 0)
      l = m + 1;
    else
      return single_code_list[m];
  }

  if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7)
    code = read_num(buf+3, 16);
  else if (buf[0] == 'u')
    code = read_num(buf+1, 16);
  else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0)
    code = read_num(buf+1, 10);
  else
    code = read_num(buf, 10);

  return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER;
}

static const char *empty_dup_list[] = { 0 };

const char **
fz_duplicate_glyph_names_from_unicode(int ucs)
{
  int l = 0;
  int r = nelem(agl_dup_offsets) / 2 - 1;
  while (l <= r)
  {
    int m = (l + r) >> 1;
    if (ucs < agl_dup_offsets[m << 1])
      r = m - 1;
    else if (ucs > agl_dup_offsets[m << 1])
      l = m + 1;
    else
      return agl_dup_names + agl_dup_offsets[(m << 1) + 1];
  }
  return empty_dup_list;
}

const char *
fz_glyph_name_from_unicode_sc(int u)
{
  int l = 0;
  int r = nelem(glyph_name_from_unicode_sc) / 2 - 1;
  while (l <= r)
  {
    int m = (l + r) >> 1;
    if (u < glyph_name_from_unicode_sc[m].u)
      r = m - 1;
    else if (u > glyph_name_from_unicode_sc[m].u)
      l = m + 1;
    else
      return glyph_name_from_unicode_sc[m].n;
  }
  return NULL;
}

Line	Count	Source
1		// Copyright (C) 2004-2021 Artifex Software, Inc.
2		//
3		// This file is part of MuPDF.
4		//
5		// MuPDF is free software: you can redistribute it and/or modify it under the
6		// terms of the GNU Affero General Public License as published by the Free
7		// Software Foundation, either version 3 of the License, or (at your option)
8		// any later version.
9		//
10		// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11		// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13		// details.
14		//
15		// You should have received a copy of the GNU Affero General Public License
16		// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17		//
18		// Alternative licensing terms are available from the licensor.
19		// For commercial licensing, see <https://www.artifex.com/> or contact
20		// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21		// CA 94129, USA, for further information.
22
23		#include "mupdf/fitz.h"
24		#include "mupdf/pdf.h"
25
26		#include <string.h>
27		#include <stdlib.h>
28
29		#include "encodings.h"
30		#include "glyphlist.h"
31		#include "smallcaps.h"
32
33		#define FROM_UNICODE(ENC) \
34	0	int l = 0; \
35	0	int r = nelem(ENC##_from_unicode) - 1; \
36	0	if (u < 128) \
37	0	return u; \
38	0	while (l <= r) \
39	0	{ \
40	0	int m = (l + r) >> 1; \
41	0	if (u < ENC##_from_unicode[m].u) \
42	0	r = m - 1; \
43	0	else if (u > ENC##_from_unicode[m].u) \
44	0	l = m + 1; \
45	0	else \
46	0	return ENC##_from_unicode[m].c; \
47	0	} \
48	0	return -1; \
49
50	0	int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) }
51	0	int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) }
52	0	int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) }
53	0	int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) }
54	0	int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) }
55	0	int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) }
56
57		int
58		fz_unicode_from_glyph_name_strict(const char *name)
59	0	{
60	0	int l = 0;
61	0	int r = nelem(single_name_list) - 1;
62
63	0	while (l <= r)
64	0	{
65	0	int m = (l + r) >> 1;
66	0	int c = strcmp(name, single_name_list[m]);
67	0	if (c < 0)
68	0	r = m - 1;
69	0	else if (c > 0)
70	0	l = m + 1;
71	0	else
72	0	return single_code_list[m];
73	0	}
74	0	return 0;
75	0	}
76
77		static int
78		read_num(const char *p, int base)
79	0	{
80	0	char *e;
81	0	int v = strtol(p, &e, base);
82	0	if (*e != 0)
83	0	return 0;
84	0	return v;
85	0	}
86
87		int
88		fz_unicode_from_glyph_name(const char *name)
89	448	{
90	448	char buf[64];
91	448	char *p;
92	448	int l = 0;
93	448	int r = nelem(single_name_list) - 1;
94	448	int code = 0;
95
96	448	fz_strlcpy(buf, name, sizeof buf);
97
98		/* kill anything after first period and underscore */
99	448	p = strchr(buf, '.');
100	448	if (p) p[0] = 0;
101	448	p = strchr(buf, '_');
102	448	if (p)
103	0	{
104		/* Hacky tests for alternative ligature names */
105	0	if (buf[0] == 'f')
106	0	{
107	0	if (!strcmp(buf, "f_f"))
108	0	strcpy(buf, "ff");
109	0	else if (!strcmp(buf, "f_f_i"))
110	0	strcpy(buf, "ffi");
111	0	else if (!strcmp(buf, "f_f_l"))
112	0	strcpy(buf, "ffl");
113	0	else if (!strcmp(buf, "f_i"))
114	0	strcpy(buf, "fi");
115	0	else if (!strcmp(buf, "f_l"))
116	0	strcpy(buf, "fl");
117	0	else
118	0	p[0] = 0;
119	0	}
120	0	else
121	0	p[0] = 0;
122	0	}
123
124	5.04k	while (l <= r)
125	5.04k	{
126	5.04k	int m = (l + r) >> 1;
127	5.04k	int c = strcmp(buf, single_name_list[m]);
128	5.04k	if (c < 0)
129	2.32k	r = m - 1;
130	2.72k	else if (c > 0)
131	2.27k	l = m + 1;
132	448	else
133	448	return single_code_list[m];
134	5.04k	}
135
136	0	if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7)
137	0	code = read_num(buf+3, 16);
138	0	else if (buf[0] == 'u')
139	0	code = read_num(buf+1, 16);
140	0	else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0)
141	0	code = read_num(buf+1, 10);
142	0	else
143	0	code = read_num(buf, 10);
144
145	0	return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER;
146	448	}
147
148		static const char *empty_dup_list[] = { 0 };
149
150		const char **
151		fz_duplicate_glyph_names_from_unicode(int ucs)
152	0	{
153	0	int l = 0;
154	0	int r = nelem(agl_dup_offsets) / 2 - 1;
155	0	while (l <= r)
156	0	{
157	0	int m = (l + r) >> 1;
158	0	if (ucs < agl_dup_offsets[m << 1])
159	0	r = m - 1;
160	0	else if (ucs > agl_dup_offsets[m << 1])
161	0	l = m + 1;
162	0	else
163	0	return agl_dup_names + agl_dup_offsets[(m << 1) + 1];
164	0	}
165	0	return empty_dup_list;
166	0	}
167
168		const char *
169		fz_glyph_name_from_unicode_sc(int u)
170	0	{
171	0	int l = 0;
172	0	int r = nelem(glyph_name_from_unicode_sc) / 2 - 1;
173	0	while (l <= r)
174	0	{
175	0	int m = (l + r) >> 1;
176	0	if (u < glyph_name_from_unicode_sc[m].u)
177	0	r = m - 1;
178	0	else if (u > glyph_name_from_unicode_sc[m].u)
179	0	l = m + 1;
180	0	else
181	0	return glyph_name_from_unicode_sc[m].n;
182	0	}
183	0	return NULL;
184	0	}

Coverage Report

Created: 2025-12-03 07:00