Coverage Report

Created: 2026-06-11 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/ucs.c
Line
Count
Source
1
2
#ifdef USE_UNICODE
3
4
#include <stdlib.h>
5
#include "wc.h"
6
#include "ucs.h"
7
#include "search.h"
8
#include "big5.h"
9
#include "hkscs.h"
10
#include "sjis.h"
11
#include "johab.h"
12
#include "gbk.h"
13
#include "gb18030.h"
14
#include "uhc.h"
15
#include "viet.h"
16
#include "wtf.h"
17
18
#include "ucs.map"
19
20
#include "map/ucs_ambwidth.map"
21
#include "map/ucs_wide.map"
22
#include "map/ucs_combining.map"
23
#include "map/ucs_precompose.map"
24
#include "map/ucs_hangul.map"
25
#include "map/ucs_fullwidth.map"
26
#include "map/ucs_isalpha.map"
27
#include "map/ucs_isdigit.map"
28
#include "map/ucs_islower.map"
29
#include "map/ucs_isupper.map"
30
#include "map/ucs_case.map"
31
32
82.0k
#define MAX_TAG_LEN (8 + 1 + 8)
33
1.17k
#define MAX_TAG_MAP 0x100
34
static int n_tag_map = 0;
35
static char *tag_map[ MAX_TAG_MAP ];
36
37
wc_table *
38
wc_get_ucs_table(wc_ccs ccs)
39
1.10M
{
40
1.10M
    int f = WC_CCS_INDEX(ccs);
41
42
1.10M
    switch (WC_CCS_TYPE(ccs)) {
43
8.89k
    case WC_CCS_A_CS94:
44
8.89k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
45
0
      return NULL;
46
8.89k
  return &ucs_cs94_table[f - WC_F_ISO_BASE];
47
5.90k
    case WC_CCS_A_CS94W:
48
5.90k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
49
0
      return NULL;
50
5.90k
  return &ucs_cs94w_table[f - WC_F_ISO_BASE];
51
299
    case WC_CCS_A_CS96:
52
299
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
53
0
      return NULL;
54
299
  return &ucs_cs96_table[f - WC_F_ISO_BASE];
55
0
    case WC_CCS_A_CS96W:
56
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
57
0
      return NULL;
58
0
  return &ucs_cs96w_table[f - WC_F_ISO_BASE];
59
0
    case WC_CCS_A_CS942:
60
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
61
0
      return NULL;
62
0
  return &ucs_cs942_table[f - WC_F_ISO_BASE];
63
1.09k
    case WC_CCS_A_PCS:
64
1.09k
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
65
0
      return NULL;
66
1.09k
  return &ucs_pcs_table[f - WC_F_PCS_BASE];
67
1.08M
    case WC_CCS_A_PCSW:
68
1.08M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
69
0
      return NULL;
70
1.08M
  return &ucs_pcsw_table[f - WC_F_PCS_BASE];
71
3.32k
    default:
72
3.32k
  return NULL;
73
1.10M
    }
74
1.10M
}
75
76
wc_wchar_t
77
wc_ucs_to_any(wc_uint32 ucs, wc_table *t)
78
125M
{
79
125M
    wc_wchar_t cc;
80
125M
    wc_map *map;
81
82
125M
    if (t && t->map && ucs && ucs <= WC_C_UCS2_END) {
83
125M
  map = wc_map_search((wc_uint16)ucs, t->map, t->n);
84
125M
  if (map)
85
59.9M
      return t->conv(t->ccs, map->code2);
86
125M
    }
87
65.7M
    if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) {
88
33.6k
  if (t->ccs == WC_CCS_JIS_X_0213_1)
89
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
90
0
    ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map);
91
33.6k
  else if (t->ccs == WC_CCS_JIS_X_0213_2)
92
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
93
0
    ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map);
94
33.6k
  else if (t->ccs == WC_CCS_HKSCS ||
95
33.6k
     t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2)
96
1.56k
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
97
1.56k
    ucs_p2_hkscs_map, N_ucs_p2_hkscs_map);
98
32.1k
  else
99
32.1k
      map = NULL;
100
33.6k
  if (map)
101
81
      return t->conv(t->ccs, map->code2);
102
33.6k
    }
103
65.7M
    cc.ccs = WC_CCS_UNKNOWN;
104
65.7M
    cc.code = 0;
105
65.7M
    return cc;
106
65.7M
}
107
108
wc_uint32
109
wc_any_to_ucs(wc_wchar_t cc)
110
136M
{
111
136M
    int f;
112
136M
    wc_uint16 *map = NULL;
113
136M
    wc_uint32 map_size = 0x80;
114
136M
    wc_map *map2;
115
116
136M
    f = WC_CCS_INDEX(cc.ccs);
117
136M
    switch (WC_CCS_TYPE(cc.ccs)) {
118
1.63M
    case WC_CCS_A_CS94:
119
1.63M
  if (cc.ccs == WC_CCS_US_ASCII)
120
0
      return cc.code;
121
1.63M
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
122
565k
      return WC_C_UCS4_ERROR;
123
1.07M
  map = cs94_ucs_map[f - WC_F_ISO_BASE];
124
1.07M
  cc.code &= 0x7f;
125
1.07M
  break;
126
69.2M
    case WC_CCS_A_CS94W:
127
69.2M
  if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) {
128
0
      cc.ccs = WC_CCS_GB_12345;
129
0
      return wc_any_to_ucs(cc);
130
69.2M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_1) {
131
705k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
132
705k
    jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map);
133
705k
      if (map2)
134
811
    return map2->code2 | WC_C_UCS4_PLANE2;
135
68.5M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_2) {
136
51.2k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
137
51.2k
    jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map);
138
51.2k
      if (map2)
139
6.27k
    return map2->code2 | WC_C_UCS4_PLANE2;
140
51.2k
  }
141
69.2M
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
142
154k
      return 0;
143
69.0M
  map = cs94w_ucs_map[f - WC_F_ISO_BASE];
144
69.0M
  map_size = cs94w_ucs_map_size[f - WC_F_ISO_BASE];
145
69.0M
  cc.code = WC_CS94W_N(cc.code);
146
69.0M
  break;
147
4.64M
    case WC_CCS_A_CS96:
148
4.64M
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
149
262k
      return WC_C_UCS4_ERROR;
150
4.38M
  map = cs96_ucs_map[f - WC_F_ISO_BASE];
151
4.38M
  cc.code &= 0x7f;
152
4.38M
  break;
153
8.20k
    case WC_CCS_A_CS96W:
154
8.20k
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
155
8.20k
      return WC_C_UCS4_ERROR;
156
0
  map = cs96w_ucs_map[f - WC_F_ISO_BASE];
157
0
  map_size = cs96w_ucs_map_size[f - WC_F_ISO_BASE];
158
0
  cc.code = WC_CS96W_N(cc.code);
159
0
  break;
160
6.09k
    case WC_CCS_A_CS942:
161
6.09k
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
162
6.09k
      return WC_C_UCS4_ERROR;
163
0
  map = cs942_ucs_map[f - WC_F_ISO_BASE];
164
0
  cc.code &= 0x7f;
165
0
  break;
166
23.0M
    case WC_CCS_A_PCS:
167
23.0M
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
168
1.54k
      return WC_C_UCS4_ERROR;
169
23.0M
  switch (cc.ccs) {
170
3.51k
  case WC_CCS_CP1258_2:
171
3.51k
      map2 = wc_map_search((wc_uint16)cc.code,
172
3.51k
    cp12582_ucs_map, N_cp12582_ucs_map);
173
3.51k
      if (map2)
174
3.30k
    return map2->code2;
175
209
      return WC_C_UCS4_ERROR;
176
3.22k
  case WC_CCS_TCVN_5712_3:
177
3.22k
      return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc));
178
245k
  case WC_CCS_GBK_80:
179
245k
      return WC_C_UCS2_EURO;
180
23.0M
  }
181
22.8M
  map = pcs_ucs_map[f - WC_F_PCS_BASE];
182
22.8M
  map_size = pcs_ucs_map_size[f - WC_F_PCS_BASE];
183
22.8M
  cc.code &= 0x7f;
184
22.8M
  break;
185
8.71M
    case WC_CCS_A_PCSW:
186
8.71M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
187
27.0k
      return WC_C_UCS4_ERROR;
188
8.69M
  map = pcsw_ucs_map[f - WC_F_PCS_BASE];
189
8.69M
  map_size = pcsw_ucs_map_size[f - WC_F_PCS_BASE];
190
8.69M
  switch (cc.ccs) {
191
492k
  case WC_CCS_BIG5:
192
492k
      cc.code = WC_BIG5_N(cc.code);
193
492k
      break;
194
0
  case WC_CCS_BIG5_2:
195
0
      cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE;
196
0
      break;
197
0
  case WC_CCS_HKSCS_1:
198
0
  case WC_CCS_HKSCS_2:
199
0
      cc = wc_cs128w_to_hkscs(cc);
200
28.4k
  case WC_CCS_HKSCS:
201
28.4k
      map2 = wc_map_search((wc_uint16)cc.code,
202
28.4k
    hkscs_ucs_p2_map, N_hkscs_ucs_p2_map);
203
28.4k
      if (map2)
204
6.34k
    return map2->code2 | WC_C_UCS4_PLANE2;
205
22.1k
      cc.code = wc_hkscs_to_N(cc.code);
206
22.1k
      break;
207
1.12M
  case WC_CCS_JOHAB:
208
1.12M
      return wc_any_to_ucs(wc_johab_to_cs128w(cc));
209
1.20M
  case WC_CCS_JOHAB_1:
210
1.20M
      return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL;
211
156k
  case WC_CCS_JOHAB_2:
212
156k
      cc.code = WC_CS128W_N(cc.code);
213
156k
      cc.code = WC_N_JOHAB2(cc.code);
214
156k
      map2 = wc_map_search((wc_uint16)cc.code,
215
156k
    johab2_ucs_map, N_johab2_ucs_map);
216
156k
      if (map2)
217
676
    return map2->code2;
218
156k
      return WC_C_UCS4_ERROR;
219
7.99k
  case WC_CCS_JOHAB_3:
220
7.99k
      if ((cc.code & 0x7f7f) < 0x2121)
221
886
    return WC_C_UCS4_ERROR;
222
391k
  case WC_CCS_SJIS_EXT:
223
391k
      return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc));
224
256k
  case WC_CCS_SJIS_EXT_1:
225
256k
      cc.code = wc_sjis_ext1_to_N(cc.code);
226
256k
      if (cc.code == WC_C_SJIS_ERROR)
227
120k
    return WC_C_UCS4_ERROR;
228
136k
      break;
229
136k
  case WC_CCS_SJIS_EXT_2:
230
134k
      cc.code = wc_sjis_ext2_to_N(cc.code);
231
134k
      if (cc.code == WC_C_SJIS_ERROR)
232
104k
    return WC_C_UCS4_ERROR;
233
30.6k
      break;
234
30.6k
  case WC_CCS_GBK_1:
235
0
  case WC_CCS_GBK_2:
236
0
      cc = wc_cs128w_to_gbk(cc);
237
3.57M
  case WC_CCS_GBK:
238
3.57M
      cc.code = wc_gbk_to_N(cc.code);
239
3.57M
      break;
240
395k
  case WC_CCS_GBK_EXT:
241
395k
  case WC_CCS_GBK_EXT_1:
242
395k
  case WC_CCS_GBK_EXT_2:
243
395k
      return wc_gb18030_to_ucs(cc);
244
0
  case WC_CCS_UHC_1:
245
0
  case WC_CCS_UHC_2:
246
0
      cc = wc_cs128w_to_uhc(cc);
247
917k
  case WC_CCS_UHC:
248
917k
      if (cc.code > WC_C_UHC_END)
249
46.4k
    return WC_C_UCS4_ERROR;
250
871k
      cc.code = wc_uhc_to_N(cc.code);
251
871k
      break;
252
9.04k
  default:
253
9.04k
      cc.code = WC_CS94W_N(cc.code);
254
9.04k
      break;
255
8.69M
  }
256
5.13M
  break;
257
5.13M
    case WC_CCS_A_WCS16:
258
99.7k
  switch (WC_CCS_SET(cc.ccs)) {
259
96.9k
  case WC_CCS_UCS2:
260
96.9k
      return cc.code;
261
99.7k
  }
262
2.77k
  return WC_C_UCS4_ERROR;
263
25.8M
    case WC_CCS_A_WCS32:
264
25.8M
  switch (WC_CCS_SET(cc.ccs)) {
265
6.65k
  case WC_CCS_UCS4:
266
6.65k
      return cc.code;
267
25.8M
  case WC_CCS_UCS_TAG:
268
25.8M
      return wc_ucs_tag_to_ucs(cc.code);
269
34.9k
  case WC_CCS_GB18030:
270
34.9k
      return wc_gb18030_to_ucs(cc);
271
25.8M
  }
272
1.59k
  return WC_C_UCS4_ERROR;
273
3.27M
    case WC_CCS_A_UNKNOWN:
274
3.27M
  if (cc.ccs == WC_CCS_C1)
275
3.26M
      return (cc.code | 0x80);
276
38.3k
    default:
277
38.3k
  return WC_C_UCS4_ERROR;
278
136M
    }
279
102M
    if (map == NULL)
280
684k
  return WC_C_UCS4_ERROR;
281
101M
    if (map_size == 0 || cc.code > map_size - 1)
282
7.28k
  return WC_C_UCS4_ERROR;
283
101M
    cc.code = map[cc.code];
284
101M
    return cc.code ? cc.code : WC_C_UCS4_ERROR;
285
101M
}
286
287
wc_wchar_t
288
wc_any_to_any(wc_wchar_t cc, wc_table *t)
289
246k
{
290
246k
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
291
246k
    wc_uint32 ucs = wc_any_to_ucs(cc);
292
293
246k
    if (ucs != WC_C_UCS4_ERROR) {
294
100k
  cc = wc_ucs_to_any(ucs, t);
295
100k
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
296
18.6k
      return cc;
297
298
81.5k
  ucs = wc_ucs_to_fullwidth(ucs);
299
81.5k
  if (ucs != WC_C_UCS4_ERROR) {
300
0
      cc = wc_ucs_to_any(ucs, t);
301
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
302
0
    return cc;
303
0
  }
304
81.5k
    }
305
227k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
306
227k
    return cc;
307
246k
}
308
309
wc_wchar_t
310
wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist)
311
103M
{
312
103M
    wc_wchar_t cc;
313
103M
    wc_table **t;
314
315
103M
    if (tlist != NULL) {
316
223M
  for (t = tlist; *t != NULL; t++) {
317
179M
      if ((*t)->map == NULL)
318
54.8M
    continue;
319
124M
      cc = wc_ucs_to_any(ucs, *t);
320
124M
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
321
59.2M
    return cc;
322
124M
  }
323
103M
    }
324
44.1M
    cc.ccs = WC_CCS_UNKNOWN;
325
44.1M
    return cc;
326
103M
}
327
328
wc_wchar_t
329
wc_any_to_any_ces(wc_wchar_t cc, wc_status *st)
330
125M
{
331
125M
    wc_uint32 ucs = wc_any_to_ucs(cc);
332
125M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
333
334
125M
    if (ucs < 0x80) {
335
25.5M
  cc.ccs = WC_CCS_US_ASCII;
336
25.5M
  cc.code = ucs;
337
25.5M
  return cc;
338
25.5M
    }
339
99.5M
    if (ucs != WC_C_UCS4_ERROR) {
340
96.4M
  if (st->ces_info->id & WC_CES_T_UTF) {
341
0
      cc.ccs = wc_ucs_to_ccs(ucs);
342
0
      cc.code = ucs;
343
0
      return cc;
344
96.4M
  } else if (st->ces_info->id == WC_CES_JOHAB) {
345
1.08M
      cc = wc_ucs_to_johab(ucs);
346
1.08M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
347
406k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
348
1.08M
      return cc;
349
1.08M
  }
350
95.3M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
351
95.3M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
352
52.3M
      return cc;
353
42.9M
  if (! WcOption.fix_width_conv) {
354
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
355
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
356
0
    return cc;
357
0
  }
358
42.9M
  if (st->ces_info->id == WC_CES_GB18030) {
359
15.0M
      cc = wc_ucs_to_gb18030(ucs);
360
15.0M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
361
59.1k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
362
15.0M
      return cc;
363
15.0M
  }
364
27.8M
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
365
80.5k
      cc.ccs = WC_CCS_US_ASCII;
366
80.5k
      cc.code = 0x20;
367
80.5k
      return cc;
368
80.5k
  }
369
27.7M
  if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) &&
370
3.62M
      0x80 <= ucs && ucs <= 0x9F) {
371
1.76k
      cc.ccs = WC_CCS_C1;
372
1.76k
      cc.code = ucs;
373
1.76k
      return cc;
374
1.76k
  }
375
376
27.7M
  ucs = wc_ucs_to_fullwidth(ucs);
377
27.7M
  if (ucs != WC_C_UCS4_ERROR) {
378
338k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
379
338k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
380
3.05k
    return cc;
381
335k
      if (! WcOption.fix_width_conv) {
382
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
383
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
384
0
        return cc;
385
0
      }
386
335k
  }
387
27.7M
    }
388
30.8M
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
389
30.8M
    return cc;
390
99.5M
}
391
392
wc_wchar_t
393
wc_any_to_iso2022(wc_wchar_t cc, wc_status *st)
394
7.82M
{
395
7.82M
    wc_uint32 ucs = wc_any_to_ucs(cc);
396
7.82M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
397
398
7.82M
    if (ucs < 0x80) {
399
89.6k
  cc.ccs = WC_CCS_US_ASCII;
400
89.6k
  cc.code = ucs;
401
89.6k
  return cc;
402
89.6k
    }
403
7.73M
    if (ucs != WC_C_UCS4_ERROR) {
404
7.64M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
405
7.64M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
406
6.82M
      return cc;
407
822k
  if (! WcOption.strict_iso2022) {
408
0
      cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
409
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
410
0
    return cc;
411
0
  }
412
822k
  if (! WcOption.fix_width_conv) {
413
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
414
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
415
0
    return cc;
416
0
      if (! WcOption.strict_iso2022) {
417
0
    cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
418
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
419
0
        return cc;
420
0
      }
421
0
  }
422
822k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
423
245
     cc.ccs = WC_CCS_US_ASCII;
424
245
     cc.code = 0x20;
425
245
     return cc;
426
245
  }
427
428
821k
  ucs = wc_ucs_to_fullwidth(ucs);
429
821k
  if (ucs != WC_C_UCS4_ERROR) {
430
696
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
431
696
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
432
317
    return cc;
433
379
      if (! WcOption.strict_iso2022) {
434
0
    cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
435
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
436
0
        return cc;
437
0
      }
438
379
      if (! WcOption.fix_width_conv) {
439
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
440
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
441
0
        return cc;
442
0
    if (! WcOption.strict_iso2022) {
443
0
        cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
444
0
        if (!WC_CCS_IS_UNKNOWN(cc.ccs))
445
0
      return cc;
446
0
    }
447
0
      }
448
379
  }
449
821k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
450
0
     cc.ccs = WC_CCS_US_ASCII;
451
0
     cc.code = 0x20;
452
0
     return cc;
453
0
  }
454
821k
    }
455
905k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
456
905k
    return cc;
457
7.73M
}
458
459
wc_wchar_t
460
wc_ucs_to_iso2022(wc_uint32 ucs)
461
0
{
462
0
    wc_table *t;
463
0
    wc_wchar_t cc;
464
0
    int f;
465
466
0
    if (ucs <= WC_C_UCS2_END) {
467
0
  for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) {
468
0
      t = &ucs_cs96_table[f];
469
0
      if (t->map == NULL)
470
0
    continue;
471
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
472
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
473
0
    return cc;
474
0
  }
475
0
  for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) {
476
0
      t = &ucs_cs94_table[f];
477
0
      if (t->map == NULL)
478
0
    continue;
479
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
480
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
481
0
    return cc;
482
0
  }
483
0
  for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) {
484
0
      t = &ucs_cs942_table[f];
485
0
      if (t->map == NULL)
486
0
    continue;
487
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
488
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
489
0
    return cc;
490
0
  }
491
0
    }
492
0
    cc.ccs = WC_CCS_UNKNOWN;
493
0
    return cc;
494
0
}
495
496
wc_wchar_t
497
wc_ucs_to_iso2022w(wc_uint32 ucs)
498
0
{
499
0
    wc_table *t;
500
0
    wc_wchar_t cc;
501
0
    int f;
502
503
0
    if (ucs <= WC_C_UCS2_END) {
504
0
  for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) {
505
0
      t = &ucs_cs94w_table[f];
506
0
      if (t->map == NULL)
507
0
    continue;
508
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
509
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
510
0
    return cc;
511
0
  }
512
0
  for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) {
513
0
      t = &ucs_cs96w_table[f];
514
0
      if (t->map == NULL)
515
0
    continue;
516
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
517
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
518
0
    return cc;
519
0
  }
520
0
    }
521
0
    cc.ccs = WC_CCS_UNKNOWN_W;
522
0
    return cc;
523
0
}
524
525
wc_ccs
526
wc_ucs_to_ccs(wc_uint32 ucs)
527
633k
{
528
633k
    if (0x80 <= ucs && ucs <= 0x9F)
529
8.49k
  return WC_CCS_C1;
530
625k
    return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
531
625k
  | ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
532
625k
        ? WC_CCS_A_WIDE : 0)
533
625k
  | (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
534
625k
  | (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
535
633k
}
536
537
wc_bool
538
wc_is_ucs_ambiguous_width(wc_uint32 ucs)
539
0
{
540
0
    if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
541
0
  return 1;
542
0
    else if (ucs <= WC_C_UCS2_END)
543
0
  return (wc_map_range_search((wc_uint16)ucs,
544
0
        ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
545
0
    else
546
0
  return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
547
0
    || (0x100000 <= ucs && ucs <= 0x10FFFD));
548
0
}
549
550
wc_bool
551
wc_is_ucs_wide(wc_uint32 ucs)
552
625k
{
553
625k
    if (ucs <= WC_C_UCS2_END)
554
611k
  return (wc_map_range_search((wc_uint16)ucs,
555
611k
    ucs_wide_map, N_ucs_wide_map) != NULL);
556
14.1k
    else
557
14.1k
  return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 ||
558
13.7k
    (ucs & ~0xFFFF) == WC_C_UCS4_PLANE3);
559
625k
}
560
561
wc_bool
562
wc_is_ucs_combining(wc_uint32 ucs)
563
625k
{
564
625k
    return (WcOption.use_combining && ucs <= WC_C_UCS2_END &&
565
611k
  wc_map_range_search((wc_uint16)ucs,
566
611k
  ucs_combining_map, N_ucs_combining_map) != NULL);
567
625k
}
568
569
wc_bool
570
wc_is_ucs_hangul(wc_uint32 ucs)
571
0
{
572
0
    return (ucs <= WC_C_UCS2_END &&
573
0
  wc_map_range_search((wc_uint16)ucs,
574
0
  ucs_hangul_map, N_ucs_hangul_map) != NULL);
575
0
}
576
577
wc_bool
578
wc_is_ucs_alpha(wc_uint32 ucs)
579
0
{
580
0
    return (ucs <= WC_C_UCS2_END &&
581
0
  wc_map_range_search((wc_uint16)ucs,
582
0
  ucs_isalpha_map, N_ucs_isalpha_map) != NULL);
583
0
}
584
585
wc_bool
586
wc_is_ucs_digit(wc_uint32 ucs)
587
0
{
588
0
    return (ucs <= WC_C_UCS2_END &&
589
0
  wc_map_range_search((wc_uint16)ucs,
590
0
  ucs_isdigit_map, N_ucs_isdigit_map) != NULL);
591
0
}
592
593
wc_bool
594
wc_is_ucs_alnum(wc_uint32 ucs)
595
0
{
596
0
    return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs));
597
0
}
598
599
wc_bool
600
wc_is_ucs_lower(wc_uint32 ucs)
601
0
{
602
0
    return (ucs <= WC_C_UCS2_END &&
603
0
  wc_map_range_search((wc_uint16)ucs,
604
0
  ucs_islower_map, N_ucs_islower_map) != NULL);
605
0
}
606
607
wc_bool
608
wc_is_ucs_upper(wc_uint32 ucs)
609
0
{
610
0
    return (ucs <= WC_C_UCS2_END &&
611
0
  wc_map_range_search((wc_uint16)ucs,
612
0
  ucs_isupper_map, N_ucs_isupper_map) != NULL);
613
0
}
614
615
wc_uint32
616
wc_ucs_toupper(wc_uint32 ucs)
617
0
{
618
0
    wc_map *conv = NULL;
619
0
    if (ucs <= WC_C_UCS2_END)
620
0
  conv = wc_map_search((wc_uint16)ucs,
621
0
           ucs_toupper_map, N_ucs_toupper_map);
622
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
623
0
}
624
625
wc_uint32
626
wc_ucs_tolower(wc_uint32 ucs)
627
0
{
628
0
    wc_map *conv = NULL;
629
0
    if (ucs <= WC_C_UCS2_END)
630
0
  conv = wc_map_search((wc_uint16)ucs,
631
0
           ucs_tolower_map, N_ucs_tolower_map);
632
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
633
0
}
634
635
wc_uint32
636
wc_ucs_totitle(wc_uint32 ucs)
637
0
{
638
0
    wc_map *conv = NULL;
639
0
    if (ucs <= WC_C_UCS2_END)
640
0
  conv = wc_map_search((wc_uint16)ucs,
641
0
           ucs_totitle_map, N_ucs_totitle_map);
642
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
643
0
}
644
645
wc_uint32
646
wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2)
647
40.4k
{
648
40.4k
    wc_map3 *map;
649
650
40.4k
    if (WcOption.use_combining &&
651
40.4k
  ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END &&
652
32.3k
  (map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2,
653
32.3k
  ucs_precompose_map, N_ucs_precompose_map)) != NULL)
654
2.86k
  return map->code3;
655
37.5k
    return WC_C_UCS4_ERROR;
656
40.4k
}
657
658
wc_uint32
659
wc_ucs_to_fullwidth(wc_uint32 ucs)
660
28.6M
{
661
28.6M
    wc_map *map;
662
663
28.6M
    if (ucs <= WC_C_UCS2_END &&
664
28.6M
  (map = wc_map_search((wc_uint16)ucs,
665
28.6M
  ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL)
666
338k
  return map->code2;
667
28.3M
    return WC_C_UCS4_ERROR;
668
28.6M
}
669
670
int
671
wc_ucs_put_tag(char *p)
672
4.33k
{
673
4.33k
    int i;
674
675
4.33k
    if (p == NULL || *p == '\0')
676
712
  return 0;
677
472k
    for (i = 1; i <= n_tag_map; i++) {
678
471k
  if (!strcasecmp(p, tag_map[i]))
679
2.45k
      return i;
680
471k
    }
681
1.17k
    if (n_tag_map + 1 >= MAX_TAG_MAP)
682
919
  return 0;
683
255
    n_tag_map++;
684
255
    tag_map[n_tag_map] = p;
685
255
    return n_tag_map;
686
1.17k
}
687
688
char *
689
wc_ucs_get_tag(int ntag)
690
17.0M
{
691
17.0M
    if (ntag <= 0 || ntag > n_tag_map)
692
648
  return NULL;
693
17.0M
    return tag_map[ntag];
694
17.0M
}
695
696
void
697
wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
698
46.7M
{
699
46.7M
    wc_ccs ccs;
700
701
46.7M
    if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
702
50.0k
  if (! WcOption.use_language_tag)
703
0
      return;
704
50.0k
  if (ucs == WC_C_LANGUAGE_TAG)
705
4.97k
      if (st->tag)
706
114
    Strclear(st->tag);
707
4.85k
      else
708
4.85k
    st->tag = Strnew_size(MAX_TAG_LEN);
709
45.1k
  else if (ucs == WC_C_CANCEL_TAG) {
710
683
      if (st->tag)
711
477
    Strfree(st->tag);
712
683
      st->tag = NULL;
713
683
      st->ntag = 0;
714
44.4k
  }  else if (st->tag && st->tag->length < MAX_TAG_LEN &&
715
16.1k
        ucs >= WC_C_TAG_SPACE)
716
15.9k
      Strcat_char(st->tag, (char)(ucs & 0x7f));
717
50.0k
  return;
718
50.0k
    }
719
46.6M
    if (st->tag) {
720
4.33k
  st->ntag = wc_ucs_put_tag(st->tag->ptr);
721
4.33k
  st->tag = NULL;
722
4.33k
    }
723
46.6M
    if (ucs < 0x80) {
724
46.0M
  if (st->ntag)
725
44.2M
      wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
726
1.74M
  else
727
1.74M
      Strcat_char(os, (char)ucs);
728
46.0M
    } else {
729
631k
  ccs = wc_ucs_to_ccs(ucs);
730
631k
  if (st->ntag && ucs <= WC_C_UNICODE_END) {
731
519k
      ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
732
519k
      ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
733
519k
  }
734
631k
  wtf_push(os, ccs, ucs);
735
631k
    }
736
46.6M
}
737
738
#endif