Coverage Report

Created: 2025-01-28 06:17

/src/mupdf/thirdparty/mujs/utf.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * The authors of this software are Rob Pike and Ken Thompson.
3
 *              Copyright (c) 2002 by Lucent Technologies.
4
 * Permission to use, copy, modify, and distribute this software for any
5
 * purpose without fee is hereby granted, provided that this entire notice
6
 * is included in all copies of any software which is or includes a copy
7
 * or modification of this software and in all copies of the supporting
8
 * documentation for such software.
9
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
11
 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13
 */
14
#include <stdlib.h>
15
#include <string.h>
16
17
#include "utf.h"
18
#include "utfdata.h"
19
20
0
#define nelem(a) (int)(sizeof (a) / sizeof (a)[0])
21
22
typedef unsigned char uchar;
23
24
enum
25
{
26
  Bit1  = 7,
27
  Bitx  = 6,
28
  Bit2  = 5,
29
  Bit3  = 4,
30
  Bit4  = 3,
31
  Bit5  = 2,
32
33
  T1  = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
34
  Tx  = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
35
  T2  = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
36
  T3  = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
37
  T4  = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
38
  T5  = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
39
40
  Rune1 = (1<<(Bit1+0*Bitx))-1,   /* 0000 0000 0000 0000 0111 1111 */
41
  Rune2 = (1<<(Bit2+1*Bitx))-1,   /* 0000 0000 0000 0111 1111 1111 */
42
  Rune3 = (1<<(Bit3+2*Bitx))-1,   /* 0000 0000 1111 1111 1111 1111 */
43
  Rune4 = (1<<(Bit4+3*Bitx))-1,   /* 0001 1111 1111 1111 1111 1111 */
44
45
  Maskx = (1<<Bitx)-1,      /* 0011 1111 */
46
  Testx = Maskx ^ 0xFF,     /* 1100 0000 */
47
48
  Bad = Runeerror
49
};
50
51
int
52
chartorune(Rune *rune, const char *str)
53
0
{
54
0
  int c, c1, c2, c3;
55
0
  int l;
56
57
  /* overlong null character */
58
0
  if((uchar)str[0] == 0xc0 && (uchar)str[1] == 0x80) {
59
0
    *rune = 0;
60
0
    return 2;
61
0
  }
62
63
  /*
64
   * one character sequence
65
   *  00000-0007F => T1
66
   */
67
0
  c = *(uchar*)str;
68
0
  if(c < Tx) {
69
0
    *rune = c;
70
0
    return 1;
71
0
  }
72
73
  /*
74
   * two character sequence
75
   *  0080-07FF => T2 Tx
76
   */
77
0
  c1 = *(uchar*)(str+1) ^ Tx;
78
0
  if(c1 & Testx)
79
0
    goto bad;
80
0
  if(c < T3) {
81
0
    if(c < T2)
82
0
      goto bad;
83
0
    l = ((c << Bitx) | c1) & Rune2;
84
0
    if(l <= Rune1)
85
0
      goto bad;
86
0
    *rune = l;
87
0
    return 2;
88
0
  }
89
90
  /*
91
   * three character sequence
92
   *  0800-FFFF => T3 Tx Tx
93
   */
94
0
  c2 = *(uchar*)(str+2) ^ Tx;
95
0
  if(c2 & Testx)
96
0
    goto bad;
97
0
  if(c < T4) {
98
0
    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
99
0
    if(l <= Rune2)
100
0
      goto bad;
101
0
    *rune = l;
102
0
    return 3;
103
0
  }
104
105
  /*
106
   * four character sequence
107
   *  10000-10FFFF => T4 Tx Tx Tx
108
   */
109
0
  if(UTFmax >= 4) {
110
0
    c3 = *(uchar*)(str+3) ^ Tx;
111
0
    if(c3 & Testx)
112
0
      goto bad;
113
0
    if(c < T5) {
114
0
      l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
115
0
      if(l <= Rune3)
116
0
        goto bad;
117
0
      if(l > Runemax)
118
0
        goto bad;
119
0
      *rune = l;
120
0
      return 4;
121
0
    }
122
0
  }
123
124
  /*
125
   * bad decoding
126
   */
127
0
bad:
128
0
  *rune = Bad;
129
0
  return 1;
130
0
}
131
132
int
133
runetochar(char *str, const Rune *rune)
134
0
{
135
0
  int c = *rune;
136
137
  /* overlong null character */
138
0
  if (c == 0) {
139
0
    str[0] = (char)0xc0;
140
0
    str[1] = (char)0x80;
141
0
    return 2;
142
0
  }
143
144
  /*
145
   * one character sequence
146
   *  00000-0007F => 00-7F
147
   */
148
0
  if(c <= Rune1) {
149
0
    str[0] = c;
150
0
    return 1;
151
0
  }
152
153
  /*
154
   * two character sequence
155
   *  00080-007FF => T2 Tx
156
   */
157
0
  if(c <= Rune2) {
158
0
    str[0] = T2 | (c >> 1*Bitx);
159
0
    str[1] = Tx | (c & Maskx);
160
0
    return 2;
161
0
  }
162
163
  /*
164
   * three character sequence
165
   *  00800-0FFFF => T3 Tx Tx
166
   */
167
0
  if(c > Runemax)
168
0
    c = Runeerror;
169
0
  if(c <= Rune3) {
170
0
    str[0] = T3 |  (c >> 2*Bitx);
171
0
    str[1] = Tx | ((c >> 1*Bitx) & Maskx);
172
0
    str[2] = Tx |  (c & Maskx);
173
0
    return 3;
174
0
  }
175
176
  /*
177
   * four character sequence
178
   *  010000-1FFFFF => T4 Tx Tx Tx
179
   */
180
0
  str[0] = T4 |  (c >> 3*Bitx);
181
0
  str[1] = Tx | ((c >> 2*Bitx) & Maskx);
182
0
  str[2] = Tx | ((c >> 1*Bitx) & Maskx);
183
0
  str[3] = Tx |  (c & Maskx);
184
0
  return 4;
185
0
}
186
187
int
188
runelen(int c)
189
0
{
190
0
  Rune rune;
191
0
  char str[10];
192
193
0
  rune = c;
194
0
  return runetochar(str, &rune);
195
0
}
196
197
static const Rune *
198
ucd_bsearch(Rune c, const Rune *t, int n, int ne)
199
0
{
200
0
  const Rune *p;
201
0
  int m;
202
203
0
  while(n > 1) {
204
0
    m = n/2;
205
0
    p = t + m*ne;
206
0
    if(c >= p[0]) {
207
0
      t = p;
208
0
      n = n-m;
209
0
    } else
210
0
      n = m;
211
0
  }
212
0
  if(n && c >= t[0])
213
0
    return t;
214
0
  return 0;
215
0
}
216
217
Rune
218
tolowerrune(Rune c)
219
0
{
220
0
  const Rune *p;
221
222
0
  p = ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2)/3, 3);
223
0
  if(p && c >= p[0] && c <= p[1])
224
0
    return c + p[2];
225
0
  p = ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1)/2, 2);
226
0
  if(p && c == p[0])
227
0
    return c + p[1];
228
0
  return c;
229
0
}
230
231
Rune
232
toupperrune(Rune c)
233
0
{
234
0
  const Rune *p;
235
236
0
  p = ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2)/3, 3);
237
0
  if(p && c >= p[0] && c <= p[1])
238
0
    return c + p[2];
239
0
  p = ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1)/2, 2);
240
0
  if(p && c == p[0])
241
0
    return c + p[1];
242
0
  return c;
243
0
}
244
245
int
246
islowerrune(Rune c)
247
0
{
248
0
  const Rune *p;
249
250
0
  p = ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2)/3, 3);
251
0
  if(p && c >= p[0] && c <= p[1])
252
0
    return 1;
253
0
  p = ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1)/2, 2);
254
0
  if(p && c == p[0])
255
0
    return 1;
256
0
  return 0;
257
0
}
258
259
int
260
isupperrune(Rune c)
261
0
{
262
0
  const Rune *p;
263
264
0
  p = ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2)/3, 3);
265
0
  if(p && c >= p[0] && c <= p[1])
266
0
    return 1;
267
0
  p = ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1)/2, 2);
268
0
  if(p && c == p[0])
269
0
    return 1;
270
0
  return 0;
271
0
}
272
273
int
274
isalpharune(Rune c)
275
0
{
276
0
  const Rune *p;
277
278
0
  p = ucd_bsearch(c, ucd_alpha2, nelem(ucd_alpha2)/2, 2);
279
0
  if(p && c >= p[0] && c <= p[1])
280
0
    return 1;
281
0
  p = ucd_bsearch(c, ucd_alpha1, nelem(ucd_alpha1), 1);
282
0
  if(p && c == p[0])
283
0
    return 1;
284
0
  return 0;
285
0
}