Coverage Report

Created: 2025-08-28 06:51

/src/w3m/libwc/ucs.c
Line
Count
Source (jump to first uncovered line)
1
2
#ifdef USE_UNICODE
3
4
#include <stdlib.h>
5
#include "wc.h"
6
#include "ucs.h"
7
#include "search.h"
8
#include "big5.h"
9
#include "hkscs.h"
10
#include "sjis.h"
11
#include "johab.h"
12
#include "gbk.h"
13
#include "gb18030.h"
14
#include "uhc.h"
15
#include "viet.h"
16
#include "wtf.h"
17
18
#include "ucs.map"
19
20
#include "map/ucs_ambwidth.map"
21
#include "map/ucs_wide.map"
22
#include "map/ucs_combining.map"
23
#include "map/ucs_precompose.map"
24
#include "map/ucs_hangul.map"
25
#include "map/ucs_fullwidth.map"
26
#include "map/ucs_isalpha.map"
27
#include "map/ucs_isdigit.map"
28
#include "map/ucs_islower.map"
29
#include "map/ucs_isupper.map"
30
#include "map/ucs_case.map"
31
32
72.7k
#define MAX_TAG_LEN (8 + 1 + 8)
33
257
#define MAX_TAG_MAP 0x100
34
static int n_tag_map = 0;
35
static char *tag_map[ MAX_TAG_MAP ];
36
37
wc_table *
38
wc_get_ucs_table(wc_ccs ccs)
39
2.34M
{
40
2.34M
    int f = WC_CCS_INDEX(ccs);
41
42
2.34M
    switch (WC_CCS_TYPE(ccs)) {
43
8.92k
    case WC_CCS_A_CS94:
44
8.92k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
45
0
      return NULL;
46
8.92k
  return &ucs_cs94_table[f - WC_F_ISO_BASE];
47
5.83k
    case WC_CCS_A_CS94W:
48
5.83k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
49
0
      return NULL;
50
5.83k
  return &ucs_cs94w_table[f - WC_F_ISO_BASE];
51
231
    case WC_CCS_A_CS96:
52
231
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
53
0
      return NULL;
54
231
  return &ucs_cs96_table[f - WC_F_ISO_BASE];
55
0
    case WC_CCS_A_CS96W:
56
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
57
0
      return NULL;
58
0
  return &ucs_cs96w_table[f - WC_F_ISO_BASE];
59
0
    case WC_CCS_A_CS942:
60
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
61
0
      return NULL;
62
0
  return &ucs_cs942_table[f - WC_F_ISO_BASE];
63
935
    case WC_CCS_A_PCS:
64
935
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
65
0
      return NULL;
66
935
  return &ucs_pcs_table[f - WC_F_PCS_BASE];
67
2.32M
    case WC_CCS_A_PCSW:
68
2.32M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
69
0
      return NULL;
70
2.32M
  return &ucs_pcsw_table[f - WC_F_PCS_BASE];
71
3.38k
    default:
72
3.38k
  return NULL;
73
2.34M
    }
74
2.34M
}
75
76
wc_wchar_t
77
wc_ucs_to_any(wc_uint32 ucs, wc_table *t)
78
134M
{
79
134M
    wc_wchar_t cc;
80
134M
    wc_map *map;
81
82
134M
    if (t && t->map && ucs && ucs <= WC_C_UCS2_END) {
83
134M
  map = wc_map_search((wc_uint16)ucs, t->map, t->n);
84
134M
  if (map)
85
41.2M
      return t->conv(t->ccs, map->code2);
86
134M
    }
87
93.0M
    if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) {
88
9.36k
  if (t->ccs == WC_CCS_JIS_X_0213_1)
89
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
90
0
    ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map);
91
9.36k
  else if (t->ccs == WC_CCS_JIS_X_0213_2)
92
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
93
0
    ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map);
94
9.36k
  else if (t->ccs == WC_CCS_HKSCS ||
95
9.36k
     t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2)
96
1.71k
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
97
1.71k
    ucs_p2_hkscs_map, N_ucs_p2_hkscs_map);
98
7.64k
  else
99
7.64k
      map = NULL;
100
9.36k
  if (map)
101
214
      return t->conv(t->ccs, map->code2);
102
9.36k
    }
103
93.0M
    cc.ccs = WC_CCS_UNKNOWN;
104
93.0M
    cc.code = 0;
105
93.0M
    return cc;
106
93.0M
}
107
108
wc_uint32
109
wc_any_to_ucs(wc_wchar_t cc)
110
98.0M
{
111
98.0M
    int f;
112
98.0M
    wc_uint16 *map = NULL;
113
98.0M
    wc_uint32 map_size = 0x80;
114
98.0M
    wc_map *map2;
115
116
98.0M
    f = WC_CCS_INDEX(cc.ccs);
117
98.0M
    switch (WC_CCS_TYPE(cc.ccs)) {
118
437k
    case WC_CCS_A_CS94:
119
437k
  if (cc.ccs == WC_CCS_US_ASCII)
120
0
      return cc.code;
121
437k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
122
65.7k
      return WC_C_UCS4_ERROR;
123
371k
  map = cs94_ucs_map[f - WC_F_ISO_BASE];
124
371k
  cc.code &= 0x7f;
125
371k
  break;
126
68.9M
    case WC_CCS_A_CS94W:
127
68.9M
  if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) {
128
0
      cc.ccs = WC_CCS_GB_12345;
129
0
      return wc_any_to_ucs(cc);
130
68.9M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_1) {
131
75.5k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
132
75.5k
    jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map);
133
75.5k
      if (map2)
134
711
    return map2->code2 | WC_C_UCS4_PLANE2;
135
68.9M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_2) {
136
252k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
137
252k
    jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map);
138
252k
      if (map2)
139
2.73k
    return map2->code2 | WC_C_UCS4_PLANE2;
140
252k
  }
141
68.9M
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
142
514k
      return 0;
143
68.4M
  map = cs94w_ucs_map[f - WC_F_ISO_BASE];
144
68.4M
  map_size = cs94w_ucs_map_size[f - WC_F_ISO_BASE];
145
68.4M
  cc.code = WC_CS94W_N(cc.code);
146
68.4M
  break;
147
4.70M
    case WC_CCS_A_CS96:
148
4.70M
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
149
18.2k
      return WC_C_UCS4_ERROR;
150
4.68M
  map = cs96_ucs_map[f - WC_F_ISO_BASE];
151
4.68M
  cc.code &= 0x7f;
152
4.68M
  break;
153
21.6k
    case WC_CCS_A_CS96W:
154
21.6k
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
155
21.6k
      return WC_C_UCS4_ERROR;
156
0
  map = cs96w_ucs_map[f - WC_F_ISO_BASE];
157
0
  map_size = cs96w_ucs_map_size[f - WC_F_ISO_BASE];
158
0
  cc.code = WC_CS96W_N(cc.code);
159
0
  break;
160
21.9k
    case WC_CCS_A_CS942:
161
21.9k
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
162
21.9k
      return WC_C_UCS4_ERROR;
163
0
  map = cs942_ucs_map[f - WC_F_ISO_BASE];
164
0
  cc.code &= 0x7f;
165
0
  break;
166
5.91M
    case WC_CCS_A_PCS:
167
5.91M
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
168
410
      return WC_C_UCS4_ERROR;
169
5.91M
  switch (cc.ccs) {
170
449
  case WC_CCS_CP1258_2:
171
449
      map2 = wc_map_search((wc_uint16)cc.code,
172
449
    cp12582_ucs_map, N_cp12582_ucs_map);
173
449
      if (map2)
174
252
    return map2->code2;
175
197
      return WC_C_UCS4_ERROR;
176
300
  case WC_CCS_TCVN_5712_3:
177
300
      return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc));
178
21.6k
  case WC_CCS_GBK_80:
179
21.6k
      return WC_C_UCS2_EURO;
180
5.91M
  }
181
5.89M
  map = pcs_ucs_map[f - WC_F_PCS_BASE];
182
5.89M
  map_size = pcs_ucs_map_size[f - WC_F_PCS_BASE];
183
5.89M
  cc.code &= 0x7f;
184
5.89M
  break;
185
1.39M
    case WC_CCS_A_PCSW:
186
1.39M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
187
596
      return WC_C_UCS4_ERROR;
188
1.39M
  map = pcsw_ucs_map[f - WC_F_PCS_BASE];
189
1.39M
  map_size = pcsw_ucs_map_size[f - WC_F_PCS_BASE];
190
1.39M
  switch (cc.ccs) {
191
107k
  case WC_CCS_BIG5:
192
107k
      cc.code = WC_BIG5_N(cc.code);
193
107k
      break;
194
0
  case WC_CCS_BIG5_2:
195
0
      cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE;
196
0
      break;
197
0
  case WC_CCS_HKSCS_1:
198
0
  case WC_CCS_HKSCS_2:
199
0
      cc = wc_cs128w_to_hkscs(cc);
200
1.18k
  case WC_CCS_HKSCS:
201
1.18k
      map2 = wc_map_search((wc_uint16)cc.code,
202
1.18k
    hkscs_ucs_p2_map, N_hkscs_ucs_p2_map);
203
1.18k
      if (map2)
204
398
    return map2->code2 | WC_C_UCS4_PLANE2;
205
790
      cc.code = wc_hkscs_to_N(cc.code);
206
790
      break;
207
487k
  case WC_CCS_JOHAB:
208
487k
      return wc_any_to_ucs(wc_johab_to_cs128w(cc));
209
447k
  case WC_CCS_JOHAB_1:
210
447k
      return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL;
211
37.6k
  case WC_CCS_JOHAB_2:
212
37.6k
      cc.code = WC_CS128W_N(cc.code);
213
37.6k
      cc.code = WC_N_JOHAB2(cc.code);
214
37.6k
      map2 = wc_map_search((wc_uint16)cc.code,
215
37.6k
    johab2_ucs_map, N_johab2_ucs_map);
216
37.6k
      if (map2)
217
578
    return map2->code2;
218
37.0k
      return WC_C_UCS4_ERROR;
219
40.9k
  case WC_CCS_JOHAB_3:
220
40.9k
      if ((cc.code & 0x7f7f) < 0x2121)
221
1.63k
    return WC_C_UCS4_ERROR;
222
88.9k
  case WC_CCS_SJIS_EXT:
223
88.9k
      return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc));
224
36.6k
  case WC_CCS_SJIS_EXT_1:
225
36.6k
      cc.code = wc_sjis_ext1_to_N(cc.code);
226
36.6k
      if (cc.code == WC_C_SJIS_ERROR)
227
28.2k
    return WC_C_UCS4_ERROR;
228
8.45k
      break;
229
52.3k
  case WC_CCS_SJIS_EXT_2:
230
52.3k
      cc.code = wc_sjis_ext2_to_N(cc.code);
231
52.3k
      if (cc.code == WC_C_SJIS_ERROR)
232
41.9k
    return WC_C_UCS4_ERROR;
233
10.4k
      break;
234
10.4k
  case WC_CCS_GBK_1:
235
0
  case WC_CCS_GBK_2:
236
0
      cc = wc_cs128w_to_gbk(cc);
237
90.1k
  case WC_CCS_GBK:
238
90.1k
      cc.code = wc_gbk_to_N(cc.code);
239
90.1k
      break;
240
26.4k
  case WC_CCS_GBK_EXT:
241
26.4k
  case WC_CCS_GBK_EXT_1:
242
26.4k
  case WC_CCS_GBK_EXT_2:
243
26.4k
      return wc_gb18030_to_ucs(cc);
244
0
  case WC_CCS_UHC_1:
245
0
  case WC_CCS_UHC_2:
246
0
      cc = wc_cs128w_to_uhc(cc);
247
15.6k
  case WC_CCS_UHC:
248
15.6k
      if (cc.code > WC_C_UHC_END)
249
5.44k
    return WC_C_UCS4_ERROR;
250
10.2k
      cc.code = wc_uhc_to_N(cc.code);
251
10.2k
      break;
252
546
  default:
253
546
      cc.code = WC_CS94W_N(cc.code);
254
546
      break;
255
1.39M
  }
256
228k
  break;
257
228k
    case WC_CCS_A_WCS16:
258
7.39k
  switch (WC_CCS_SET(cc.ccs)) {
259
7.11k
  case WC_CCS_UCS2:
260
7.11k
      return cc.code;
261
7.39k
  }
262
284
  return WC_C_UCS4_ERROR;
263
15.8M
    case WC_CCS_A_WCS32:
264
15.8M
  switch (WC_CCS_SET(cc.ccs)) {
265
4.59k
  case WC_CCS_UCS4:
266
4.59k
      return cc.code;
267
15.8M
  case WC_CCS_UCS_TAG:
268
15.8M
      return wc_ucs_tag_to_ucs(cc.code);
269
3.57k
  case WC_CCS_GB18030:
270
3.57k
      return wc_gb18030_to_ucs(cc);
271
15.8M
  }
272
335
  return WC_C_UCS4_ERROR;
273
725k
    case WC_CCS_A_UNKNOWN:
274
725k
  if (cc.ccs == WC_CCS_C1)
275
725k
      return (cc.code | 0x80);
276
2.23k
    default:
277
2.23k
  return WC_C_UCS4_ERROR;
278
98.0M
    }
279
79.6M
    if (map == NULL)
280
308k
  return WC_C_UCS4_ERROR;
281
79.3M
    if (map_size == 0 || cc.code > map_size - 1)
282
1.85k
  return WC_C_UCS4_ERROR;
283
79.3M
    cc.code = map[cc.code];
284
79.3M
    return cc.code ? cc.code : WC_C_UCS4_ERROR;
285
79.3M
}
286
287
wc_wchar_t
288
wc_any_to_any(wc_wchar_t cc, wc_table *t)
289
38.8k
{
290
38.8k
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
291
38.8k
    wc_uint32 ucs = wc_any_to_ucs(cc);
292
293
38.8k
    if (ucs != WC_C_UCS4_ERROR) {
294
35.8k
  cc = wc_ucs_to_any(ucs, t);
295
35.8k
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
296
16.9k
      return cc;
297
298
18.9k
  ucs = wc_ucs_to_fullwidth(ucs);
299
18.9k
  if (ucs != WC_C_UCS4_ERROR) {
300
0
      cc = wc_ucs_to_any(ucs, t);
301
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
302
0
    return cc;
303
0
  }
304
18.9k
    }
305
21.9k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
306
21.9k
    return cc;
307
38.8k
}
308
309
wc_wchar_t
310
wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist)
311
78.2M
{
312
78.2M
    wc_wchar_t cc;
313
78.2M
    wc_table **t;
314
315
78.2M
    if (tlist != NULL) {
316
223M
  for (t = tlist; *t != NULL; t++) {
317
184M
      if ((*t)->map == NULL)
318
52.9M
    continue;
319
131M
      cc = wc_ucs_to_any(ucs, *t);
320
131M
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
321
39.9M
    return cc;
322
131M
  }
323
78.2M
    }
324
38.2M
    cc.ccs = WC_CCS_UNKNOWN;
325
38.2M
    return cc;
326
78.2M
}
327
328
wc_wchar_t
329
wc_any_to_any_ces(wc_wchar_t cc, wc_status *st)
330
94.3M
{
331
94.3M
    wc_uint32 ucs = wc_any_to_ucs(cc);
332
94.3M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
333
334
94.3M
    if (ucs < 0x80) {
335
16.1M
  cc.ccs = WC_CCS_US_ASCII;
336
16.1M
  cc.code = ucs;
337
16.1M
  return cc;
338
16.1M
    }
339
78.1M
    if (ucs != WC_C_UCS4_ERROR) {
340
77.3M
  if (st->ces_info->id & WC_CES_T_UTF) {
341
0
      cc.ccs = wc_ucs_to_ccs(ucs);
342
0
      cc.code = ucs;
343
0
      return cc;
344
77.3M
  } else if (st->ces_info->id == WC_CES_JOHAB) {
345
2.26M
      cc = wc_ucs_to_johab(ucs);
346
2.26M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
347
1.03M
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
348
2.26M
      return cc;
349
2.26M
  }
350
75.1M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
351
75.1M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
352
37.5M
      return cc;
353
37.6M
  if (! WcOption.fix_width_conv) {
354
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
355
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
356
0
    return cc;
357
0
  }
358
37.6M
  if (st->ces_info->id == WC_CES_GB18030) {
359
15.7M
      cc = wc_ucs_to_gb18030(ucs);
360
15.7M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
361
10.7k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
362
15.7M
      return cc;
363
15.7M
  }
364
21.8M
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
365
12.2k
      cc.ccs = WC_CCS_US_ASCII;
366
12.2k
      cc.code = 0x20;
367
12.2k
      return cc;
368
12.2k
  }
369
21.8M
  if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) &&
370
21.8M
      0x80 <= ucs && ucs <= 0x9F) {
371
1.25k
      cc.ccs = WC_CCS_C1;
372
1.25k
      cc.code = ucs;
373
1.25k
      return cc;
374
1.25k
  }
375
376
21.8M
  ucs = wc_ucs_to_fullwidth(ucs);
377
21.8M
  if (ucs != WC_C_UCS4_ERROR) {
378
537k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
379
537k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
380
455
    return cc;
381
536k
      if (! WcOption.fix_width_conv) {
382
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
383
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
384
0
        return cc;
385
0
      }
386
536k
  }
387
21.8M
    }
388
22.6M
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
389
22.6M
    return cc;
390
78.1M
}
391
392
wc_wchar_t
393
wc_any_to_iso2022(wc_wchar_t cc, wc_status *st)
394
2.55M
{
395
2.55M
    wc_uint32 ucs = wc_any_to_ucs(cc);
396
2.55M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
397
398
2.55M
    if (ucs < 0x80) {
399
368
  cc.ccs = WC_CCS_US_ASCII;
400
368
  cc.code = ucs;
401
368
  return cc;
402
368
    }
403
2.55M
    if (ucs != WC_C_UCS4_ERROR) {
404
2.53M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
405
2.53M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
406
2.43M
      return cc;
407
102k
  if (! WcOption.strict_iso2022) {
408
0
      cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
409
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
410
0
    return cc;
411
0
  }
412
102k
  if (! WcOption.fix_width_conv) {
413
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
414
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
415
0
    return cc;
416
0
      if (! WcOption.strict_iso2022) {
417
0
    cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
418
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
419
0
        return cc;
420
0
      }
421
0
  }
422
102k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
423
195
     cc.ccs = WC_CCS_US_ASCII;
424
195
     cc.code = 0x20;
425
195
     return cc;
426
195
  }
427
428
102k
  ucs = wc_ucs_to_fullwidth(ucs);
429
102k
  if (ucs != WC_C_UCS4_ERROR) {
430
536
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
431
536
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
432
211
    return cc;
433
325
      if (! WcOption.strict_iso2022) {
434
0
    cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
435
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
436
0
        return cc;
437
0
      }
438
325
      if (! WcOption.fix_width_conv) {
439
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
440
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
441
0
        return cc;
442
0
    if (! WcOption.strict_iso2022) {
443
0
        cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
444
0
        if (!WC_CCS_IS_UNKNOWN(cc.ccs))
445
0
      return cc;
446
0
    }
447
0
      }
448
325
  }
449
102k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
450
0
     cc.ccs = WC_CCS_US_ASCII;
451
0
     cc.code = 0x20;
452
0
     return cc;
453
0
  }
454
102k
    }
455
113k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
456
113k
    return cc;
457
2.55M
}
458
459
wc_wchar_t
460
wc_ucs_to_iso2022(wc_uint32 ucs)
461
0
{
462
0
    wc_table *t;
463
0
    wc_wchar_t cc;
464
0
    int f;
465
466
0
    if (ucs <= WC_C_UCS2_END) {
467
0
  for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) {
468
0
      t = &ucs_cs96_table[f];
469
0
      if (t->map == NULL)
470
0
    continue;
471
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
472
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
473
0
    return cc;
474
0
  }
475
0
  for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) {
476
0
      t = &ucs_cs94_table[f];
477
0
      if (t->map == NULL)
478
0
    continue;
479
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
480
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
481
0
    return cc;
482
0
  }
483
0
  for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) {
484
0
      t = &ucs_cs942_table[f];
485
0
      if (t->map == NULL)
486
0
    continue;
487
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
488
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
489
0
    return cc;
490
0
  }
491
0
    }
492
0
    cc.ccs = WC_CCS_UNKNOWN;
493
0
    return cc;
494
0
}
495
496
wc_wchar_t
497
wc_ucs_to_iso2022w(wc_uint32 ucs)
498
0
{
499
0
    wc_table *t;
500
0
    wc_wchar_t cc;
501
0
    int f;
502
503
0
    if (ucs <= WC_C_UCS2_END) {
504
0
  for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) {
505
0
      t = &ucs_cs94w_table[f];
506
0
      if (t->map == NULL)
507
0
    continue;
508
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
509
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
510
0
    return cc;
511
0
  }
512
0
  for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) {
513
0
      t = &ucs_cs96w_table[f];
514
0
      if (t->map == NULL)
515
0
    continue;
516
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
517
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
518
0
    return cc;
519
0
  }
520
0
    }
521
0
    cc.ccs = WC_CCS_UNKNOWN_W;
522
0
    return cc;
523
0
}
524
525
wc_ccs
526
wc_ucs_to_ccs(wc_uint32 ucs)
527
212k
{
528
212k
    if (0x80 <= ucs && ucs <= 0x9F)
529
2.75k
  return WC_CCS_C1;
530
209k
    return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
531
209k
  | ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
532
209k
        ? WC_CCS_A_WIDE : 0)
533
209k
  | (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
534
209k
  | (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
535
212k
}
536
537
wc_bool
538
wc_is_ucs_ambiguous_width(wc_uint32 ucs)
539
0
{
540
0
    if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
541
0
  return 1;
542
0
    else if (ucs <= WC_C_UCS2_END)
543
0
  return (wc_map_range_search((wc_uint16)ucs,
544
0
        ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
545
0
    else
546
0
  return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
547
0
    || (0x100000 <= ucs && ucs <= 0x10FFFD));
548
0
}
549
550
wc_bool
551
wc_is_ucs_wide(wc_uint32 ucs)
552
209k
{
553
209k
    if (ucs <= WC_C_UCS2_END)
554
203k
  return (wc_map_range_search((wc_uint16)ucs,
555
203k
    ucs_wide_map, N_ucs_wide_map) != NULL);
556
6.25k
    else
557
6.25k
  return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 ||
558
6.25k
    (ucs & ~0xFFFF) == WC_C_UCS4_PLANE3);
559
209k
}
560
561
wc_bool
562
wc_is_ucs_combining(wc_uint32 ucs)
563
209k
{
564
209k
    return (WcOption.use_combining && ucs <= WC_C_UCS2_END &&
565
209k
  wc_map_range_search((wc_uint16)ucs,
566
203k
  ucs_combining_map, N_ucs_combining_map) != NULL);
567
209k
}
568
569
wc_bool
570
wc_is_ucs_hangul(wc_uint32 ucs)
571
0
{
572
0
    return (ucs <= WC_C_UCS2_END &&
573
0
  wc_map_range_search((wc_uint16)ucs,
574
0
  ucs_hangul_map, N_ucs_hangul_map) != NULL);
575
0
}
576
577
wc_bool
578
wc_is_ucs_alpha(wc_uint32 ucs)
579
0
{
580
0
    return (ucs <= WC_C_UCS2_END &&
581
0
  wc_map_range_search((wc_uint16)ucs,
582
0
  ucs_isalpha_map, N_ucs_isalpha_map) != NULL);
583
0
}
584
585
wc_bool
586
wc_is_ucs_digit(wc_uint32 ucs)
587
0
{
588
0
    return (ucs <= WC_C_UCS2_END &&
589
0
  wc_map_range_search((wc_uint16)ucs,
590
0
  ucs_isdigit_map, N_ucs_isdigit_map) != NULL);
591
0
}
592
593
wc_bool
594
wc_is_ucs_alnum(wc_uint32 ucs)
595
0
{
596
0
    return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs));
597
0
}
598
599
wc_bool
600
wc_is_ucs_lower(wc_uint32 ucs)
601
0
{
602
0
    return (ucs <= WC_C_UCS2_END &&
603
0
  wc_map_range_search((wc_uint16)ucs,
604
0
  ucs_islower_map, N_ucs_islower_map) != NULL);
605
0
}
606
607
wc_bool
608
wc_is_ucs_upper(wc_uint32 ucs)
609
0
{
610
0
    return (ucs <= WC_C_UCS2_END &&
611
0
  wc_map_range_search((wc_uint16)ucs,
612
0
  ucs_isupper_map, N_ucs_isupper_map) != NULL);
613
0
}
614
615
wc_uint32
616
wc_ucs_toupper(wc_uint32 ucs)
617
0
{
618
0
    wc_map *conv = NULL;
619
0
    if (ucs <= WC_C_UCS2_END)
620
0
  conv = wc_map_search((wc_uint16)ucs,
621
0
           ucs_toupper_map, N_ucs_toupper_map);
622
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
623
0
}
624
625
wc_uint32
626
wc_ucs_tolower(wc_uint32 ucs)
627
0
{
628
0
    wc_map *conv = NULL;
629
0
    if (ucs <= WC_C_UCS2_END)
630
0
  conv = wc_map_search((wc_uint16)ucs,
631
0
           ucs_tolower_map, N_ucs_tolower_map);
632
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
633
0
}
634
635
wc_uint32
636
wc_ucs_totitle(wc_uint32 ucs)
637
0
{
638
0
    wc_map *conv = NULL;
639
0
    if (ucs <= WC_C_UCS2_END)
640
0
  conv = wc_map_search((wc_uint16)ucs,
641
0
           ucs_totitle_map, N_ucs_totitle_map);
642
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
643
0
}
644
645
wc_uint32
646
wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2)
647
16.2k
{
648
16.2k
    wc_map3 *map;
649
650
16.2k
    if (WcOption.use_combining &&
651
16.2k
  ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END &&
652
16.2k
  (map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2,
653
12.2k
  ucs_precompose_map, N_ucs_precompose_map)) != NULL)
654
5.53k
  return map->code3;
655
10.7k
    return WC_C_UCS4_ERROR;
656
16.2k
}
657
658
wc_uint32
659
wc_ucs_to_fullwidth(wc_uint32 ucs)
660
21.9M
{
661
21.9M
    wc_map *map;
662
663
21.9M
    if (ucs <= WC_C_UCS2_END &&
664
21.9M
  (map = wc_map_search((wc_uint16)ucs,
665
21.9M
  ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL)
666
537k
  return map->code2;
667
21.4M
    return WC_C_UCS4_ERROR;
668
21.9M
}
669
670
int
671
wc_ucs_put_tag(char *p)
672
3.07k
{
673
3.07k
    int i;
674
675
3.07k
    if (p == NULL || *p == '\0')
676
687
  return 0;
677
194k
    for (i = 1; i <= n_tag_map; i++) {
678
194k
  if (!strcasecmp(p, tag_map[i]))
679
2.13k
      return i;
680
194k
    }
681
257
    if (n_tag_map + 1 >= MAX_TAG_MAP)
682
2
  return 0;
683
255
    n_tag_map++;
684
255
    tag_map[n_tag_map] = p;
685
255
    return n_tag_map;
686
257
}
687
688
char *
689
wc_ucs_get_tag(int ntag)
690
27.6M
{
691
27.6M
    if (ntag <= 0 || ntag > n_tag_map)
692
191
  return NULL;
693
27.6M
    return tag_map[ntag];
694
27.6M
}
695
696
void
697
wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
698
45.5M
{
699
45.5M
    wc_ccs ccs;
700
701
45.5M
    if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
702
44.6k
  if (! WcOption.use_language_tag)
703
0
      return;
704
44.6k
  if (ucs == WC_C_LANGUAGE_TAG)
705
3.83k
      if (st->tag)
706
246
    Strclear(st->tag);
707
3.59k
      else
708
3.59k
    st->tag = Strnew_size(MAX_TAG_LEN);
709
40.7k
  else if (ucs == WC_C_CANCEL_TAG) {
710
665
      if (st->tag)
711
469
    Strfree(st->tag);
712
665
      st->tag = NULL;
713
665
      st->ntag = 0;
714
40.1k
  }  else if (st->tag && st->tag->length < MAX_TAG_LEN &&
715
40.1k
        ucs >= WC_C_TAG_SPACE)
716
12.2k
      Strcat_char(st->tag, (char)(ucs & 0x7f));
717
44.6k
  return;
718
44.6k
    }
719
45.4M
    if (st->tag) {
720
3.07k
  st->ntag = wc_ucs_put_tag(st->tag->ptr);
721
3.07k
  st->tag = NULL;
722
3.07k
    }
723
45.4M
    if (ucs < 0x80) {
724
45.2M
  if (st->ntag)
725
45.0M
      wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
726
212k
  else
727
212k
      Strcat_char(os, (char)ucs);
728
45.2M
    } else {
729
209k
  ccs = wc_ucs_to_ccs(ucs);
730
209k
  if (st->ntag && ucs <= WC_C_UNICODE_END) {
731
197k
      ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
732
197k
      ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
733
197k
  }
734
209k
  wtf_push(os, ccs, ucs);
735
209k
    }
736
45.4M
}
737
738
#endif