Coverage Report

Created: 2023-06-07 06:20

/src/mupdf/source/fitz/ucdn.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
3
 *
4
 * Permission to use, copy, modify, and/or distribute this software for any
5
 * purpose with or without fee is hereby granted, provided that the above
6
 * copyright notice and this permission notice appear in all copies.
7
 *
8
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
 */
16
17
#include "mupdf/fitz.h"
18
#include "mupdf/ucdn.h"
19
20
#include <stdio.h>
21
#include <stdlib.h>
22
23
typedef struct {
24
  unsigned char category;
25
  unsigned char combining;
26
  unsigned char bidi_class;
27
  unsigned char east_asian_width;
28
  unsigned char script;
29
  unsigned char linebreak_class;
30
} UCDRecord;
31
32
typedef struct {
33
  unsigned short from, to;
34
} MirrorPair;
35
36
typedef struct {
37
  unsigned short from, to;
38
  unsigned char type;
39
} BracketPair;
40
41
typedef struct {
42
  unsigned int start;
43
  short count, index;
44
} Reindex;
45
46
#include "ucdn_db.h"
47
48
/* constants required for Hangul (de)composition */
49
0
#define SBASE 0xAC00
50
0
#define LBASE 0x1100
51
0
#define VBASE 0x1161
52
0
#define TBASE 0x11A7
53
0
#define SCOUNT 11172
54
0
#define LCOUNT 19
55
0
#define VCOUNT 21
56
0
#define TCOUNT 28
57
0
#define NCOUNT (VCOUNT * TCOUNT)
58
59
static const UCDRecord *get_ucd_record(uint32_t code)
60
66.8k
{
61
66.8k
  int index, offset;
62
63
66.8k
  if (code >= 0x110000)
64
0
    index = 0;
65
66.8k
  else {
66
66.8k
    index  = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
67
66.8k
    offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
68
66.8k
    index  = index1[index + offset] << SHIFT2;
69
66.8k
    offset = code & ((1<<SHIFT2) - 1);
70
66.8k
    index  = index2[index + offset];
71
66.8k
  }
72
73
66.8k
  return &ucd_records[index];
74
66.8k
}
75
76
static const unsigned short *get_decomp_record(uint32_t code)
77
0
{
78
0
  int index, offset;
79
80
0
  if (code >= 0x110000)
81
0
    index = 0;
82
0
  else {
83
0
    index  = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
84
0
      << DECOMP_SHIFT1;
85
0
    offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
86
0
    index  = decomp_index1[index + offset] << DECOMP_SHIFT2;
87
0
    offset = code & ((1<<DECOMP_SHIFT2) - 1);
88
0
    index  = decomp_index2[index + offset];
89
0
  }
90
91
0
  return &decomp_data[index];
92
0
}
93
94
static int compare_reindex(const void *a, const void *b)
95
0
{
96
0
  Reindex *ra = (Reindex *)a;
97
0
  Reindex *rb = (Reindex *)b;
98
99
0
  if (ra->start < rb->start)
100
0
    return -1;
101
0
  else if (ra->start > (rb->start + rb->count))
102
0
    return 1;
103
0
  else
104
0
    return 0;
105
0
}
106
107
static int get_comp_index(uint32_t code, const Reindex *idx, size_t len)
108
0
{
109
0
  Reindex *res;
110
0
  Reindex r = {0, 0, 0};
111
0
  r.start = code;
112
0
  res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex);
113
114
0
  if (res != NULL)
115
0
    return res->index + (code - res->start);
116
0
  else
117
0
    return -1;
118
0
}
119
120
static int compare_mp(const void *a, const void *b)
121
0
{
122
0
  MirrorPair *mpa = (MirrorPair *)a;
123
0
  MirrorPair *mpb = (MirrorPair *)b;
124
0
  return mpa->from - mpb->from;
125
0
}
126
127
static int compare_bp(const void *a, const void *b)
128
0
{
129
0
  BracketPair *bpa = (BracketPair *)a;
130
0
  BracketPair *bpb = (BracketPair *)b;
131
0
  return bpa->from - bpb->from;
132
0
}
133
134
static BracketPair *search_bp(uint32_t code)
135
0
{
136
0
  BracketPair bp = {0,0,2};
137
0
  BracketPair *res;
138
139
0
  bp.from = code;
140
0
  res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN,
141
0
    sizeof(BracketPair), compare_bp);
142
0
  return res;
143
0
}
144
145
static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
146
0
{
147
0
  int si = code - SBASE;
148
149
0
  if (si < 0 || si >= SCOUNT)
150
0
    return 0;
151
152
0
  if (si % TCOUNT) {
153
    /* LV,T */
154
0
    *a = SBASE + (si / TCOUNT) * TCOUNT;
155
0
    *b = TBASE + (si % TCOUNT);
156
0
    return 3;
157
0
  } else {
158
    /* L,V */
159
0
    *a = LBASE + (si / NCOUNT);
160
0
    *b = VBASE + (si % NCOUNT) / TCOUNT;
161
0
    return 2;
162
0
  }
163
0
}
164
165
static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
166
0
{
167
0
  if (a >= SBASE && a < (SBASE + SCOUNT) && b >= TBASE && b < (TBASE + TCOUNT)) {
168
    /* LV,T */
169
0
    *code = a + (b - TBASE);
170
0
    return 3;
171
0
  } else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) {
172
    /* L,V */
173
0
    int li = a - LBASE;
174
0
    int vi = b - VBASE;
175
0
    *code = SBASE + li * NCOUNT + vi * TCOUNT;
176
0
    return 2;
177
0
  } else {
178
0
    return 0;
179
0
  }
180
0
}
181
182
static uint32_t decode_utf16(const unsigned short **code_ptr)
183
0
{
184
0
  const unsigned short *code = *code_ptr;
185
186
0
  if (code[0] < 0xd800 || code[0] > 0xdc00) {
187
0
    *code_ptr += 1;
188
0
    return (uint32_t)code[0];
189
0
  } else {
190
0
    *code_ptr += 2;
191
0
    return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
192
0
      (((uint32_t)code[0] - 0xd800) << 10);
193
0
  }
194
0
}
195
196
const char *ucdn_get_unicode_version(void)
197
0
{
198
0
  return UNIDATA_VERSION;
199
0
}
200
201
int ucdn_get_combining_class(uint32_t code)
202
0
{
203
0
  return get_ucd_record(code)->combining;
204
0
}
205
206
int ucdn_get_east_asian_width(uint32_t code)
207
0
{
208
0
  return get_ucd_record(code)->east_asian_width;
209
0
}
210
211
int ucdn_get_general_category(uint32_t code)
212
0
{
213
0
  return get_ucd_record(code)->category;
214
0
}
215
216
int ucdn_get_bidi_class(uint32_t code)
217
0
{
218
0
  return get_ucd_record(code)->bidi_class;
219
0
}
220
221
int ucdn_get_mirrored(uint32_t code)
222
0
{
223
0
  return ucdn_mirror(code) != code;
224
0
}
225
226
int ucdn_get_script(uint32_t code)
227
66.8k
{
228
66.8k
  return get_ucd_record(code)->script;
229
66.8k
}
230
231
int ucdn_get_linebreak_class(uint32_t code)
232
0
{
233
0
  return get_ucd_record(code)->linebreak_class;
234
0
}
235
236
int ucdn_get_resolved_linebreak_class(uint32_t code)
237
0
{
238
0
  const UCDRecord *record = get_ucd_record(code);
239
240
0
  switch (record->linebreak_class)
241
0
  {
242
0
  case UCDN_LINEBREAK_CLASS_AI:
243
0
  case UCDN_LINEBREAK_CLASS_SG:
244
0
  case UCDN_LINEBREAK_CLASS_XX:
245
0
    return UCDN_LINEBREAK_CLASS_AL;
246
247
0
  case UCDN_LINEBREAK_CLASS_SA:
248
0
    if (record->category == UCDN_GENERAL_CATEGORY_MC ||
249
0
      record->category == UCDN_GENERAL_CATEGORY_MN)
250
0
      return UCDN_LINEBREAK_CLASS_CM;
251
0
    return UCDN_LINEBREAK_CLASS_AL;
252
253
0
  case UCDN_LINEBREAK_CLASS_CJ:
254
0
    return UCDN_LINEBREAK_CLASS_NS;
255
256
0
  case UCDN_LINEBREAK_CLASS_CB:
257
0
    return UCDN_LINEBREAK_CLASS_B2;
258
259
0
  case UCDN_LINEBREAK_CLASS_NL:
260
0
    return UCDN_LINEBREAK_CLASS_BK;
261
262
0
  default:
263
0
    return record->linebreak_class;
264
0
  }
265
0
}
266
267
uint32_t ucdn_mirror(uint32_t code)
268
0
{
269
0
  MirrorPair mp = {0};
270
0
  MirrorPair *res;
271
272
0
  mp.from = code;
273
0
  res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN,
274
0
    sizeof(MirrorPair), compare_mp);
275
276
0
  if (res == NULL)
277
0
    return code;
278
0
  else
279
0
    return res->to;
280
0
}
281
282
uint32_t ucdn_paired_bracket(uint32_t code)
283
0
{
284
0
  BracketPair *res = search_bp(code);
285
0
  if (res == NULL)
286
0
    return code;
287
0
  else
288
0
    return res->to;
289
0
}
290
291
int ucdn_paired_bracket_type(uint32_t code)
292
0
{
293
0
  BracketPair *res = search_bp(code);
294
0
  if (res == NULL)
295
0
    return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE;
296
0
  else
297
0
    return res->type;
298
0
}
299
300
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
301
0
{
302
0
  const unsigned short *rec;
303
0
  int len;
304
305
0
  if (hangul_pair_decompose(code, a, b))
306
0
    return 1;
307
308
0
  rec = get_decomp_record(code);
309
0
  len = rec[0] >> 8;
310
311
0
  if ((rec[0] & 0xff) != 0 || len == 0)
312
0
    return 0;
313
314
0
  rec++;
315
0
  *a = decode_utf16(&rec);
316
0
  if (len > 1)
317
0
    *b = decode_utf16(&rec);
318
0
  else
319
0
    *b = 0;
320
321
0
  return 1;
322
0
}
323
324
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
325
0
{
326
0
  int l, r, index, indexi, offset;
327
328
0
  if (hangul_pair_compose(code, a, b))
329
0
    return 1;
330
331
0
  l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex));
332
0
  r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex));
333
334
0
  if (l < 0 || r < 0)
335
0
    return 0;
336
337
0
  indexi = l * TOTAL_LAST + r;
338
0
  index  = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
339
0
  offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
340
0
  index  = comp_index1[index + offset] << COMP_SHIFT2;
341
0
  offset = indexi & ((1<<COMP_SHIFT2) - 1);
342
0
  *code  = comp_data[index + offset];
343
344
0
  return *code != 0;
345
0
}
346
347
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
348
0
{
349
0
  int i, len;
350
0
  const unsigned short *rec = get_decomp_record(code);
351
0
  len = rec[0] >> 8;
352
353
0
  if (len == 0)
354
0
    return 0;
355
356
0
  rec++;
357
0
  for (i = 0; i < len; i++)
358
0
    decomposed[i] = decode_utf16(&rec);
359
360
0
  return len;
361
0
}