Coverage Report

Created: 2025-12-05 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/ucs.c
Line
Count
Source
1
2
#ifdef USE_UNICODE
3
4
#include <stdlib.h>
5
#include "wc.h"
6
#include "ucs.h"
7
#include "search.h"
8
#include "big5.h"
9
#include "hkscs.h"
10
#include "sjis.h"
11
#include "johab.h"
12
#include "gbk.h"
13
#include "gb18030.h"
14
#include "uhc.h"
15
#include "viet.h"
16
#include "wtf.h"
17
18
#include "ucs.map"
19
20
#include "map/ucs_ambwidth.map"
21
#include "map/ucs_wide.map"
22
#include "map/ucs_combining.map"
23
#include "map/ucs_precompose.map"
24
#include "map/ucs_hangul.map"
25
#include "map/ucs_fullwidth.map"
26
#include "map/ucs_isalpha.map"
27
#include "map/ucs_isdigit.map"
28
#include "map/ucs_islower.map"
29
#include "map/ucs_isupper.map"
30
#include "map/ucs_case.map"
31
32
72.4k
#define MAX_TAG_LEN (8 + 1 + 8)
33
455
#define MAX_TAG_MAP 0x100
34
static int n_tag_map = 0;
35
static char *tag_map[ MAX_TAG_MAP ];
36
37
wc_table *
38
wc_get_ucs_table(wc_ccs ccs)
39
226k
{
40
226k
    int f = WC_CCS_INDEX(ccs);
41
42
226k
    switch (WC_CCS_TYPE(ccs)) {
43
8.70k
    case WC_CCS_A_CS94:
44
8.70k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
45
0
      return NULL;
46
8.70k
  return &ucs_cs94_table[f - WC_F_ISO_BASE];
47
5.72k
    case WC_CCS_A_CS94W:
48
5.72k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
49
0
      return NULL;
50
5.72k
  return &ucs_cs94w_table[f - WC_F_ISO_BASE];
51
248
    case WC_CCS_A_CS96:
52
248
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
53
0
      return NULL;
54
248
  return &ucs_cs96_table[f - WC_F_ISO_BASE];
55
0
    case WC_CCS_A_CS96W:
56
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
57
0
      return NULL;
58
0
  return &ucs_cs96w_table[f - WC_F_ISO_BASE];
59
0
    case WC_CCS_A_CS942:
60
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
61
0
      return NULL;
62
0
  return &ucs_cs942_table[f - WC_F_ISO_BASE];
63
998
    case WC_CCS_A_PCS:
64
998
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
65
0
      return NULL;
66
998
  return &ucs_pcs_table[f - WC_F_PCS_BASE];
67
207k
    case WC_CCS_A_PCSW:
68
207k
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
69
0
      return NULL;
70
207k
  return &ucs_pcsw_table[f - WC_F_PCS_BASE];
71
3.30k
    default:
72
3.30k
  return NULL;
73
226k
    }
74
226k
}
75
76
wc_wchar_t
77
wc_ucs_to_any(wc_uint32 ucs, wc_table *t)
78
121M
{
79
121M
    wc_wchar_t cc;
80
121M
    wc_map *map;
81
82
121M
    if (t && t->map && ucs && ucs <= WC_C_UCS2_END) {
83
121M
  map = wc_map_search((wc_uint16)ucs, t->map, t->n);
84
121M
  if (map)
85
46.7M
      return t->conv(t->ccs, map->code2);
86
121M
    }
87
74.3M
    if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) {
88
31.0k
  if (t->ccs == WC_CCS_JIS_X_0213_1)
89
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
90
0
    ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map);
91
31.0k
  else if (t->ccs == WC_CCS_JIS_X_0213_2)
92
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
93
0
    ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map);
94
31.0k
  else if (t->ccs == WC_CCS_HKSCS ||
95
31.0k
     t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2)
96
10.9k
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
97
10.9k
    ucs_p2_hkscs_map, N_ucs_p2_hkscs_map);
98
20.0k
  else
99
20.0k
      map = NULL;
100
31.0k
  if (map)
101
201
      return t->conv(t->ccs, map->code2);
102
31.0k
    }
103
74.3M
    cc.ccs = WC_CCS_UNKNOWN;
104
74.3M
    cc.code = 0;
105
74.3M
    return cc;
106
74.3M
}
107
108
wc_uint32
109
wc_any_to_ucs(wc_wchar_t cc)
110
104M
{
111
104M
    int f;
112
104M
    wc_uint16 *map = NULL;
113
104M
    wc_uint32 map_size = 0x80;
114
104M
    wc_map *map2;
115
116
104M
    f = WC_CCS_INDEX(cc.ccs);
117
104M
    switch (WC_CCS_TYPE(cc.ccs)) {
118
680k
    case WC_CCS_A_CS94:
119
680k
  if (cc.ccs == WC_CCS_US_ASCII)
120
0
      return cc.code;
121
680k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
122
63.4k
      return WC_C_UCS4_ERROR;
123
617k
  map = cs94_ucs_map[f - WC_F_ISO_BASE];
124
617k
  cc.code &= 0x7f;
125
617k
  break;
126
70.5M
    case WC_CCS_A_CS94W:
127
70.5M
  if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) {
128
0
      cc.ccs = WC_CCS_GB_12345;
129
0
      return wc_any_to_ucs(cc);
130
70.5M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_1) {
131
87.5k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
132
87.5k
    jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map);
133
87.5k
      if (map2)
134
922
    return map2->code2 | WC_C_UCS4_PLANE2;
135
70.4M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_2) {
136
167k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
137
167k
    jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map);
138
167k
      if (map2)
139
5.52k
    return map2->code2 | WC_C_UCS4_PLANE2;
140
167k
  }
141
70.5M
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
142
131k
      return 0;
143
70.4M
  map = cs94w_ucs_map[f - WC_F_ISO_BASE];
144
70.4M
  map_size = cs94w_ucs_map_size[f - WC_F_ISO_BASE];
145
70.4M
  cc.code = WC_CS94W_N(cc.code);
146
70.4M
  break;
147
3.10M
    case WC_CCS_A_CS96:
148
3.10M
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
149
142k
      return WC_C_UCS4_ERROR;
150
2.95M
  map = cs96_ucs_map[f - WC_F_ISO_BASE];
151
2.95M
  cc.code &= 0x7f;
152
2.95M
  break;
153
39.5k
    case WC_CCS_A_CS96W:
154
39.5k
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
155
39.5k
      return WC_C_UCS4_ERROR;
156
0
  map = cs96w_ucs_map[f - WC_F_ISO_BASE];
157
0
  map_size = cs96w_ucs_map_size[f - WC_F_ISO_BASE];
158
0
  cc.code = WC_CS96W_N(cc.code);
159
0
  break;
160
45.5k
    case WC_CCS_A_CS942:
161
45.5k
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
162
45.5k
      return WC_C_UCS4_ERROR;
163
0
  map = cs942_ucs_map[f - WC_F_ISO_BASE];
164
0
  cc.code &= 0x7f;
165
0
  break;
166
9.96M
    case WC_CCS_A_PCS:
167
9.96M
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
168
581
      return WC_C_UCS4_ERROR;
169
9.95M
  switch (cc.ccs) {
170
400
  case WC_CCS_CP1258_2:
171
400
      map2 = wc_map_search((wc_uint16)cc.code,
172
400
    cp12582_ucs_map, N_cp12582_ucs_map);
173
400
      if (map2)
174
205
    return map2->code2;
175
195
      return WC_C_UCS4_ERROR;
176
373
  case WC_CCS_TCVN_5712_3:
177
373
      return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc));
178
453k
  case WC_CCS_GBK_80:
179
453k
      return WC_C_UCS2_EURO;
180
9.95M
  }
181
9.50M
  map = pcs_ucs_map[f - WC_F_PCS_BASE];
182
9.50M
  map_size = pcs_ucs_map_size[f - WC_F_PCS_BASE];
183
9.50M
  cc.code &= 0x7f;
184
9.50M
  break;
185
2.41M
    case WC_CCS_A_PCSW:
186
2.41M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
187
675
      return WC_C_UCS4_ERROR;
188
2.41M
  map = pcsw_ucs_map[f - WC_F_PCS_BASE];
189
2.41M
  map_size = pcsw_ucs_map_size[f - WC_F_PCS_BASE];
190
2.41M
  switch (cc.ccs) {
191
554k
  case WC_CCS_BIG5:
192
554k
      cc.code = WC_BIG5_N(cc.code);
193
554k
      break;
194
0
  case WC_CCS_BIG5_2:
195
0
      cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE;
196
0
      break;
197
0
  case WC_CCS_HKSCS_1:
198
0
  case WC_CCS_HKSCS_2:
199
0
      cc = wc_cs128w_to_hkscs(cc);
200
62.4k
  case WC_CCS_HKSCS:
201
62.4k
      map2 = wc_map_search((wc_uint16)cc.code,
202
62.4k
    hkscs_ucs_p2_map, N_hkscs_ucs_p2_map);
203
62.4k
      if (map2)
204
13.3k
    return map2->code2 | WC_C_UCS4_PLANE2;
205
49.1k
      cc.code = wc_hkscs_to_N(cc.code);
206
49.1k
      break;
207
378k
  case WC_CCS_JOHAB:
208
378k
      return wc_any_to_ucs(wc_johab_to_cs128w(cc));
209
321k
  case WC_CCS_JOHAB_1:
210
321k
      return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL;
211
39.2k
  case WC_CCS_JOHAB_2:
212
39.2k
      cc.code = WC_CS128W_N(cc.code);
213
39.2k
      cc.code = WC_N_JOHAB2(cc.code);
214
39.2k
      map2 = wc_map_search((wc_uint16)cc.code,
215
39.2k
    johab2_ucs_map, N_johab2_ucs_map);
216
39.2k
      if (map2)
217
251
    return map2->code2;
218
38.9k
      return WC_C_UCS4_ERROR;
219
18.9k
  case WC_CCS_JOHAB_3:
220
18.9k
      if ((cc.code & 0x7f7f) < 0x2121)
221
974
    return WC_C_UCS4_ERROR;
222
47.5k
  case WC_CCS_SJIS_EXT:
223
47.5k
      return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc));
224
22.2k
  case WC_CCS_SJIS_EXT_1:
225
22.2k
      cc.code = wc_sjis_ext1_to_N(cc.code);
226
22.2k
      if (cc.code == WC_C_SJIS_ERROR)
227
14.7k
    return WC_C_UCS4_ERROR;
228
7.50k
      break;
229
25.2k
  case WC_CCS_SJIS_EXT_2:
230
25.2k
      cc.code = wc_sjis_ext2_to_N(cc.code);
231
25.2k
      if (cc.code == WC_C_SJIS_ERROR)
232
23.0k
    return WC_C_UCS4_ERROR;
233
2.22k
      break;
234
2.22k
  case WC_CCS_GBK_1:
235
0
  case WC_CCS_GBK_2:
236
0
      cc = wc_cs128w_to_gbk(cc);
237
751k
  case WC_CCS_GBK:
238
751k
      cc.code = wc_gbk_to_N(cc.code);
239
751k
      break;
240
145k
  case WC_CCS_GBK_EXT:
241
145k
  case WC_CCS_GBK_EXT_1:
242
145k
  case WC_CCS_GBK_EXT_2:
243
145k
      return wc_gb18030_to_ucs(cc);
244
0
  case WC_CCS_UHC_1:
245
0
  case WC_CCS_UHC_2:
246
0
      cc = wc_cs128w_to_uhc(cc);
247
59.6k
  case WC_CCS_UHC:
248
59.6k
      if (cc.code > WC_C_UHC_END)
249
10.0k
    return WC_C_UCS4_ERROR;
250
49.5k
      cc.code = wc_uhc_to_N(cc.code);
251
49.5k
      break;
252
4.04k
  default:
253
4.04k
      cc.code = WC_CS94W_N(cc.code);
254
4.04k
      break;
255
2.41M
  }
256
1.41M
  break;
257
1.41M
    case WC_CCS_A_WCS16:
258
16.5k
  switch (WC_CCS_SET(cc.ccs)) {
259
16.0k
  case WC_CCS_UCS2:
260
16.0k
      return cc.code;
261
16.5k
  }
262
486
  return WC_C_UCS4_ERROR;
263
17.0M
    case WC_CCS_A_WCS32:
264
17.0M
  switch (WC_CCS_SET(cc.ccs)) {
265
4.32k
  case WC_CCS_UCS4:
266
4.32k
      return cc.code;
267
17.0M
  case WC_CCS_UCS_TAG:
268
17.0M
      return wc_ucs_tag_to_ucs(cc.code);
269
8.29k
  case WC_CCS_GB18030:
270
8.29k
      return wc_gb18030_to_ucs(cc);
271
17.0M
  }
272
411
  return WC_C_UCS4_ERROR;
273
838k
    case WC_CCS_A_UNKNOWN:
274
838k
  if (cc.ccs == WC_CCS_C1)
275
837k
      return (cc.code | 0x80);
276
4.88k
    default:
277
4.88k
  return WC_C_UCS4_ERROR;
278
104M
    }
279
84.9M
    if (map == NULL)
280
426k
  return WC_C_UCS4_ERROR;
281
84.4M
    if (map_size == 0 || cc.code > map_size - 1)
282
2.57k
  return WC_C_UCS4_ERROR;
283
84.4M
    cc.code = map[cc.code];
284
84.4M
    return cc.code ? cc.code : WC_C_UCS4_ERROR;
285
84.4M
}
286
287
wc_wchar_t
288
wc_any_to_any(wc_wchar_t cc, wc_table *t)
289
1.35k
{
290
1.35k
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
291
1.35k
    wc_uint32 ucs = wc_any_to_ucs(cc);
292
293
1.35k
    if (ucs != WC_C_UCS4_ERROR) {
294
898
  cc = wc_ucs_to_any(ucs, t);
295
898
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
296
389
      return cc;
297
298
509
  ucs = wc_ucs_to_fullwidth(ucs);
299
509
  if (ucs != WC_C_UCS4_ERROR) {
300
0
      cc = wc_ucs_to_any(ucs, t);
301
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
302
0
    return cc;
303
0
  }
304
509
    }
305
964
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
306
964
    return cc;
307
1.35k
}
308
309
wc_wchar_t
310
wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist)
311
83.4M
{
312
83.4M
    wc_wchar_t cc;
313
83.4M
    wc_table **t;
314
315
83.4M
    if (tlist != NULL) {
316
198M
  for (t = tlist; *t != NULL; t++) {
317
161M
      if ((*t)->map == NULL)
318
40.7M
    continue;
319
120M
      cc = wc_ucs_to_any(ucs, *t);
320
120M
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
321
46.6M
    return cc;
322
120M
  }
323
83.4M
    }
324
36.7M
    cc.ccs = WC_CCS_UNKNOWN;
325
36.7M
    return cc;
326
83.4M
}
327
328
wc_wchar_t
329
wc_any_to_any_ces(wc_wchar_t cc, wc_status *st)
330
97.4M
{
331
97.4M
    wc_uint32 ucs = wc_any_to_ucs(cc);
332
97.4M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
333
334
97.4M
    if (ucs < 0x80) {
335
17.0M
  cc.ccs = WC_CCS_US_ASCII;
336
17.0M
  cc.code = ucs;
337
17.0M
  return cc;
338
17.0M
    }
339
80.3M
    if (ucs != WC_C_UCS4_ERROR) {
340
78.1M
  if (st->ces_info->id & WC_CES_T_UTF) {
341
0
      cc.ccs = wc_ucs_to_ccs(ucs);
342
0
      cc.code = ucs;
343
0
      return cc;
344
78.1M
  } else if (st->ces_info->id == WC_CES_JOHAB) {
345
214k
      cc = wc_ucs_to_johab(ucs);
346
214k
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
347
50.5k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
348
214k
      return cc;
349
214k
  }
350
77.9M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
351
77.9M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
352
41.2M
      return cc;
353
36.6M
  if (! WcOption.fix_width_conv) {
354
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
355
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
356
0
    return cc;
357
0
  }
358
36.6M
  if (st->ces_info->id == WC_CES_GB18030) {
359
9.98M
      cc = wc_ucs_to_gb18030(ucs);
360
9.98M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
361
5.28k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
362
9.98M
      return cc;
363
9.98M
  }
364
26.6M
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
365
6.53k
      cc.ccs = WC_CCS_US_ASCII;
366
6.53k
      cc.code = 0x20;
367
6.53k
      return cc;
368
6.53k
  }
369
26.6M
  if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) &&
370
3.58M
      0x80 <= ucs && ucs <= 0x9F) {
371
253
      cc.ccs = WC_CCS_C1;
372
253
      cc.code = ucs;
373
253
      return cc;
374
253
  }
375
376
26.6M
  ucs = wc_ucs_to_fullwidth(ucs);
377
26.6M
  if (ucs != WC_C_UCS4_ERROR) {
378
62.5k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
379
62.5k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
380
471
    return cc;
381
62.0k
      if (! WcOption.fix_width_conv) {
382
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
383
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
384
0
        return cc;
385
0
      }
386
62.0k
  }
387
26.6M
    }
388
28.8M
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
389
28.8M
    return cc;
390
80.3M
}
391
392
wc_wchar_t
393
wc_any_to_iso2022(wc_wchar_t cc, wc_status *st)
394
5.45M
{
395
5.45M
    wc_uint32 ucs = wc_any_to_ucs(cc);
396
5.45M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
397
398
5.45M
    if (ucs < 0x80) {
399
1.38k
  cc.ccs = WC_CCS_US_ASCII;
400
1.38k
  cc.code = ucs;
401
1.38k
  return cc;
402
1.38k
    }
403
5.45M
    if (ucs != WC_C_UCS4_ERROR) {
404
5.44M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
405
5.44M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
406
5.36M
      return cc;
407
83.3k
  if (! WcOption.strict_iso2022) {
408
0
      cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
409
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
410
0
    return cc;
411
0
  }
412
83.3k
  if (! WcOption.fix_width_conv) {
413
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
414
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
415
0
    return cc;
416
0
      if (! WcOption.strict_iso2022) {
417
0
    cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
418
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
419
0
        return cc;
420
0
      }
421
0
  }
422
83.3k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
423
1.53k
     cc.ccs = WC_CCS_US_ASCII;
424
1.53k
     cc.code = 0x20;
425
1.53k
     return cc;
426
1.53k
  }
427
428
81.7k
  ucs = wc_ucs_to_fullwidth(ucs);
429
81.7k
  if (ucs != WC_C_UCS4_ERROR) {
430
8.33k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
431
8.33k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
432
196
    return cc;
433
8.13k
      if (! WcOption.strict_iso2022) {
434
0
    cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
435
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
436
0
        return cc;
437
0
      }
438
8.13k
      if (! WcOption.fix_width_conv) {
439
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
440
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
441
0
        return cc;
442
0
    if (! WcOption.strict_iso2022) {
443
0
        cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
444
0
        if (!WC_CCS_IS_UNKNOWN(cc.ccs))
445
0
      return cc;
446
0
    }
447
0
      }
448
8.13k
  }
449
81.5k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
450
0
     cc.ccs = WC_CCS_US_ASCII;
451
0
     cc.code = 0x20;
452
0
     return cc;
453
0
  }
454
81.5k
    }
455
87.5k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
456
87.5k
    return cc;
457
5.45M
}
458
459
wc_wchar_t
460
wc_ucs_to_iso2022(wc_uint32 ucs)
461
0
{
462
0
    wc_table *t;
463
0
    wc_wchar_t cc;
464
0
    int f;
465
466
0
    if (ucs <= WC_C_UCS2_END) {
467
0
  for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) {
468
0
      t = &ucs_cs96_table[f];
469
0
      if (t->map == NULL)
470
0
    continue;
471
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
472
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
473
0
    return cc;
474
0
  }
475
0
  for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) {
476
0
      t = &ucs_cs94_table[f];
477
0
      if (t->map == NULL)
478
0
    continue;
479
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
480
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
481
0
    return cc;
482
0
  }
483
0
  for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) {
484
0
      t = &ucs_cs942_table[f];
485
0
      if (t->map == NULL)
486
0
    continue;
487
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
488
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
489
0
    return cc;
490
0
  }
491
0
    }
492
0
    cc.ccs = WC_CCS_UNKNOWN;
493
0
    return cc;
494
0
}
495
496
wc_wchar_t
497
wc_ucs_to_iso2022w(wc_uint32 ucs)
498
0
{
499
0
    wc_table *t;
500
0
    wc_wchar_t cc;
501
0
    int f;
502
503
0
    if (ucs <= WC_C_UCS2_END) {
504
0
  for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) {
505
0
      t = &ucs_cs94w_table[f];
506
0
      if (t->map == NULL)
507
0
    continue;
508
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
509
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
510
0
    return cc;
511
0
  }
512
0
  for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) {
513
0
      t = &ucs_cs96w_table[f];
514
0
      if (t->map == NULL)
515
0
    continue;
516
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
517
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
518
0
    return cc;
519
0
  }
520
0
    }
521
0
    cc.ccs = WC_CCS_UNKNOWN_W;
522
0
    return cc;
523
0
}
524
525
wc_ccs
526
wc_ucs_to_ccs(wc_uint32 ucs)
527
130k
{
528
130k
    if (0x80 <= ucs && ucs <= 0x9F)
529
3.41k
  return WC_CCS_C1;
530
127k
    return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
531
127k
  | ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
532
127k
        ? WC_CCS_A_WIDE : 0)
533
127k
  | (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
534
127k
  | (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
535
130k
}
536
537
wc_bool
538
wc_is_ucs_ambiguous_width(wc_uint32 ucs)
539
0
{
540
0
    if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
541
0
  return 1;
542
0
    else if (ucs <= WC_C_UCS2_END)
543
0
  return (wc_map_range_search((wc_uint16)ucs,
544
0
        ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
545
0
    else
546
0
  return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
547
0
    || (0x100000 <= ucs && ucs <= 0x10FFFD));
548
0
}
549
550
wc_bool
551
wc_is_ucs_wide(wc_uint32 ucs)
552
127k
{
553
127k
    if (ucs <= WC_C_UCS2_END)
554
120k
  return (wc_map_range_search((wc_uint16)ucs,
555
120k
    ucs_wide_map, N_ucs_wide_map) != NULL);
556
6.23k
    else
557
6.23k
  return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 ||
558
5.67k
    (ucs & ~0xFFFF) == WC_C_UCS4_PLANE3);
559
127k
}
560
561
wc_bool
562
wc_is_ucs_combining(wc_uint32 ucs)
563
127k
{
564
127k
    return (WcOption.use_combining && ucs <= WC_C_UCS2_END &&
565
120k
  wc_map_range_search((wc_uint16)ucs,
566
120k
  ucs_combining_map, N_ucs_combining_map) != NULL);
567
127k
}
568
569
wc_bool
570
wc_is_ucs_hangul(wc_uint32 ucs)
571
0
{
572
0
    return (ucs <= WC_C_UCS2_END &&
573
0
  wc_map_range_search((wc_uint16)ucs,
574
0
  ucs_hangul_map, N_ucs_hangul_map) != NULL);
575
0
}
576
577
wc_bool
578
wc_is_ucs_alpha(wc_uint32 ucs)
579
0
{
580
0
    return (ucs <= WC_C_UCS2_END &&
581
0
  wc_map_range_search((wc_uint16)ucs,
582
0
  ucs_isalpha_map, N_ucs_isalpha_map) != NULL);
583
0
}
584
585
wc_bool
586
wc_is_ucs_digit(wc_uint32 ucs)
587
0
{
588
0
    return (ucs <= WC_C_UCS2_END &&
589
0
  wc_map_range_search((wc_uint16)ucs,
590
0
  ucs_isdigit_map, N_ucs_isdigit_map) != NULL);
591
0
}
592
593
wc_bool
594
wc_is_ucs_alnum(wc_uint32 ucs)
595
0
{
596
0
    return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs));
597
0
}
598
599
wc_bool
600
wc_is_ucs_lower(wc_uint32 ucs)
601
0
{
602
0
    return (ucs <= WC_C_UCS2_END &&
603
0
  wc_map_range_search((wc_uint16)ucs,
604
0
  ucs_islower_map, N_ucs_islower_map) != NULL);
605
0
}
606
607
wc_bool
608
wc_is_ucs_upper(wc_uint32 ucs)
609
0
{
610
0
    return (ucs <= WC_C_UCS2_END &&
611
0
  wc_map_range_search((wc_uint16)ucs,
612
0
  ucs_isupper_map, N_ucs_isupper_map) != NULL);
613
0
}
614
615
wc_uint32
616
wc_ucs_toupper(wc_uint32 ucs)
617
0
{
618
0
    wc_map *conv = NULL;
619
0
    if (ucs <= WC_C_UCS2_END)
620
0
  conv = wc_map_search((wc_uint16)ucs,
621
0
           ucs_toupper_map, N_ucs_toupper_map);
622
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
623
0
}
624
625
wc_uint32
626
wc_ucs_tolower(wc_uint32 ucs)
627
0
{
628
0
    wc_map *conv = NULL;
629
0
    if (ucs <= WC_C_UCS2_END)
630
0
  conv = wc_map_search((wc_uint16)ucs,
631
0
           ucs_tolower_map, N_ucs_tolower_map);
632
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
633
0
}
634
635
wc_uint32
636
wc_ucs_totitle(wc_uint32 ucs)
637
0
{
638
0
    wc_map *conv = NULL;
639
0
    if (ucs <= WC_C_UCS2_END)
640
0
  conv = wc_map_search((wc_uint16)ucs,
641
0
           ucs_totitle_map, N_ucs_totitle_map);
642
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
643
0
}
644
645
wc_uint32
646
wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2)
647
9.59k
{
648
9.59k
    wc_map3 *map;
649
650
9.59k
    if (WcOption.use_combining &&
651
9.59k
  ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END &&
652
6.92k
  (map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2,
653
6.92k
  ucs_precompose_map, N_ucs_precompose_map)) != NULL)
654
1.83k
  return map->code3;
655
7.76k
    return WC_C_UCS4_ERROR;
656
9.59k
}
657
658
wc_uint32
659
wc_ucs_to_fullwidth(wc_uint32 ucs)
660
26.7M
{
661
26.7M
    wc_map *map;
662
663
26.7M
    if (ucs <= WC_C_UCS2_END &&
664
26.6M
  (map = wc_map_search((wc_uint16)ucs,
665
26.6M
  ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL)
666
70.8k
  return map->code2;
667
26.6M
    return WC_C_UCS4_ERROR;
668
26.7M
}
669
670
int
671
wc_ucs_put_tag(char *p)
672
3.34k
{
673
3.34k
    int i;
674
675
3.34k
    if (p == NULL || *p == '\0')
676
688
  return 0;
677
218k
    for (i = 1; i <= n_tag_map; i++) {
678
218k
  if (!strcasecmp(p, tag_map[i]))
679
2.20k
      return i;
680
218k
    }
681
455
    if (n_tag_map + 1 >= MAX_TAG_MAP)
682
200
  return 0;
683
255
    n_tag_map++;
684
255
    tag_map[n_tag_map] = p;
685
255
    return n_tag_map;
686
455
}
687
688
char *
689
wc_ucs_get_tag(int ntag)
690
19.3M
{
691
19.3M
    if (ntag <= 0 || ntag > n_tag_map)
692
493
  return NULL;
693
19.3M
    return tag_map[ntag];
694
19.3M
}
695
696
void
697
wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
698
38.1M
{
699
38.1M
    wc_ccs ccs;
700
701
38.1M
    if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
702
45.2k
  if (! WcOption.use_language_tag)
703
0
      return;
704
45.2k
  if (ucs == WC_C_LANGUAGE_TAG)
705
4.17k
      if (st->tag)
706
308
    Strclear(st->tag);
707
3.86k
      else
708
3.86k
    st->tag = Strnew_size(MAX_TAG_LEN);
709
41.0k
  else if (ucs == WC_C_CANCEL_TAG) {
710
672
      if (st->tag)
711
475
    Strfree(st->tag);
712
672
      st->tag = NULL;
713
672
      st->ntag = 0;
714
40.4k
  }  else if (st->tag && st->tag->length < MAX_TAG_LEN &&
715
11.6k
        ucs >= WC_C_TAG_SPACE)
716
11.4k
      Strcat_char(st->tag, (char)(ucs & 0x7f));
717
45.2k
  return;
718
45.2k
    }
719
38.1M
    if (st->tag) {
720
3.34k
  st->ntag = wc_ucs_put_tag(st->tag->ptr);
721
3.34k
  st->tag = NULL;
722
3.34k
    }
723
38.1M
    if (ucs < 0x80) {
724
37.9M
  if (st->ntag)
725
37.3M
      wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
726
662k
  else
727
662k
      Strcat_char(os, (char)ucs);
728
37.9M
    } else {
729
129k
  ccs = wc_ucs_to_ccs(ucs);
730
129k
  if (st->ntag && ucs <= WC_C_UNICODE_END) {
731
104k
      ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
732
104k
      ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
733
104k
  }
734
129k
  wtf_push(os, ccs, ucs);
735
129k
    }
736
38.1M
}
737
738
#endif