Coverage Report

Created: 2025-11-09 06:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/ucs.c
Line
Count
Source
1
2
#ifdef USE_UNICODE
3
4
#include <stdlib.h>
5
#include "wc.h"
6
#include "ucs.h"
7
#include "search.h"
8
#include "big5.h"
9
#include "hkscs.h"
10
#include "sjis.h"
11
#include "johab.h"
12
#include "gbk.h"
13
#include "gb18030.h"
14
#include "uhc.h"
15
#include "viet.h"
16
#include "wtf.h"
17
18
#include "ucs.map"
19
20
#include "map/ucs_ambwidth.map"
21
#include "map/ucs_wide.map"
22
#include "map/ucs_combining.map"
23
#include "map/ucs_precompose.map"
24
#include "map/ucs_hangul.map"
25
#include "map/ucs_fullwidth.map"
26
#include "map/ucs_isalpha.map"
27
#include "map/ucs_isdigit.map"
28
#include "map/ucs_islower.map"
29
#include "map/ucs_isupper.map"
30
#include "map/ucs_case.map"
31
32
79.7k
#define MAX_TAG_LEN (8 + 1 + 8)
33
301
#define MAX_TAG_MAP 0x100
34
static int n_tag_map = 0;
35
static char *tag_map[ MAX_TAG_MAP ];
36
37
wc_table *
38
wc_get_ucs_table(wc_ccs ccs)
39
1.46M
{
40
1.46M
    int f = WC_CCS_INDEX(ccs);
41
42
1.46M
    switch (WC_CCS_TYPE(ccs)) {
43
9.09k
    case WC_CCS_A_CS94:
44
9.09k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
45
0
      return NULL;
46
9.09k
  return &ucs_cs94_table[f - WC_F_ISO_BASE];
47
6.05k
    case WC_CCS_A_CS94W:
48
6.05k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
49
0
      return NULL;
50
6.05k
  return &ucs_cs94w_table[f - WC_F_ISO_BASE];
51
262
    case WC_CCS_A_CS96:
52
262
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
53
0
      return NULL;
54
262
  return &ucs_cs96_table[f - WC_F_ISO_BASE];
55
0
    case WC_CCS_A_CS96W:
56
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
57
0
      return NULL;
58
0
  return &ucs_cs96w_table[f - WC_F_ISO_BASE];
59
0
    case WC_CCS_A_CS942:
60
0
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
61
0
      return NULL;
62
0
  return &ucs_cs942_table[f - WC_F_ISO_BASE];
63
1.02k
    case WC_CCS_A_PCS:
64
1.02k
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
65
0
      return NULL;
66
1.02k
  return &ucs_pcs_table[f - WC_F_PCS_BASE];
67
1.44M
    case WC_CCS_A_PCSW:
68
1.44M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
69
0
      return NULL;
70
1.44M
  return &ucs_pcsw_table[f - WC_F_PCS_BASE];
71
3.47k
    default:
72
3.47k
  return NULL;
73
1.46M
    }
74
1.46M
}
75
76
wc_wchar_t
77
wc_ucs_to_any(wc_uint32 ucs, wc_table *t)
78
123M
{
79
123M
    wc_wchar_t cc;
80
123M
    wc_map *map;
81
82
123M
    if (t && t->map && ucs && ucs <= WC_C_UCS2_END) {
83
123M
  map = wc_map_search((wc_uint16)ucs, t->map, t->n);
84
123M
  if (map)
85
50.2M
      return t->conv(t->ccs, map->code2);
86
123M
    }
87
73.5M
    if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) {
88
12.0k
  if (t->ccs == WC_CCS_JIS_X_0213_1)
89
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
90
0
    ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map);
91
12.0k
  else if (t->ccs == WC_CCS_JIS_X_0213_2)
92
0
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
93
0
    ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map);
94
12.0k
  else if (t->ccs == WC_CCS_HKSCS ||
95
12.0k
     t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2)
96
4.02k
      map = wc_map_search((wc_uint16)(ucs & 0xffff),
97
4.02k
    ucs_p2_hkscs_map, N_ucs_p2_hkscs_map);
98
8.06k
  else
99
8.06k
      map = NULL;
100
12.0k
  if (map)
101
249
      return t->conv(t->ccs, map->code2);
102
12.0k
    }
103
73.5M
    cc.ccs = WC_CCS_UNKNOWN;
104
73.5M
    cc.code = 0;
105
73.5M
    return cc;
106
73.5M
}
107
108
wc_uint32
109
wc_any_to_ucs(wc_wchar_t cc)
110
106M
{
111
106M
    int f;
112
106M
    wc_uint16 *map = NULL;
113
106M
    wc_uint32 map_size = 0x80;
114
106M
    wc_map *map2;
115
116
106M
    f = WC_CCS_INDEX(cc.ccs);
117
106M
    switch (WC_CCS_TYPE(cc.ccs)) {
118
638k
    case WC_CCS_A_CS94:
119
638k
  if (cc.ccs == WC_CCS_US_ASCII)
120
0
      return cc.code;
121
638k
  if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
122
262k
      return WC_C_UCS4_ERROR;
123
375k
  map = cs94_ucs_map[f - WC_F_ISO_BASE];
124
375k
  cc.code &= 0x7f;
125
375k
  break;
126
67.4M
    case WC_CCS_A_CS94W:
127
67.4M
  if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) {
128
0
      cc.ccs = WC_CCS_GB_12345;
129
0
      return wc_any_to_ucs(cc);
130
67.4M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_1) {
131
65.8k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
132
65.8k
    jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map);
133
65.8k
      if (map2)
134
752
    return map2->code2 | WC_C_UCS4_PLANE2;
135
67.3M
  } else if (cc.ccs == WC_CCS_JIS_X_0213_2) {
136
27.6k
      map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
137
27.6k
    jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map);
138
27.6k
      if (map2)
139
1.93k
    return map2->code2 | WC_C_UCS4_PLANE2;
140
27.6k
  }
141
67.4M
  if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
142
53.6k
      return 0;
143
67.4M
  map = cs94w_ucs_map[f - WC_F_ISO_BASE];
144
67.4M
  map_size = cs94w_ucs_map_size[f - WC_F_ISO_BASE];
145
67.4M
  cc.code = WC_CS94W_N(cc.code);
146
67.4M
  break;
147
4.27M
    case WC_CCS_A_CS96:
148
4.27M
  if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
149
71.4k
      return WC_C_UCS4_ERROR;
150
4.20M
  map = cs96_ucs_map[f - WC_F_ISO_BASE];
151
4.20M
  cc.code &= 0x7f;
152
4.20M
  break;
153
31.7k
    case WC_CCS_A_CS96W:
154
31.7k
  if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
155
31.7k
      return WC_C_UCS4_ERROR;
156
0
  map = cs96w_ucs_map[f - WC_F_ISO_BASE];
157
0
  map_size = cs96w_ucs_map_size[f - WC_F_ISO_BASE];
158
0
  cc.code = WC_CS96W_N(cc.code);
159
0
  break;
160
3.70k
    case WC_CCS_A_CS942:
161
3.70k
  if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
162
3.70k
      return WC_C_UCS4_ERROR;
163
0
  map = cs942_ucs_map[f - WC_F_ISO_BASE];
164
0
  cc.code &= 0x7f;
165
0
  break;
166
12.3M
    case WC_CCS_A_PCS:
167
12.3M
  if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
168
582
      return WC_C_UCS4_ERROR;
169
12.3M
  switch (cc.ccs) {
170
414
  case WC_CCS_CP1258_2:
171
414
      map2 = wc_map_search((wc_uint16)cc.code,
172
414
    cp12582_ucs_map, N_cp12582_ucs_map);
173
414
      if (map2)
174
220
    return map2->code2;
175
194
      return WC_C_UCS4_ERROR;
176
215
  case WC_CCS_TCVN_5712_3:
177
215
      return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc));
178
451k
  case WC_CCS_GBK_80:
179
451k
      return WC_C_UCS2_EURO;
180
12.3M
  }
181
11.8M
  map = pcs_ucs_map[f - WC_F_PCS_BASE];
182
11.8M
  map_size = pcs_ucs_map_size[f - WC_F_PCS_BASE];
183
11.8M
  cc.code &= 0x7f;
184
11.8M
  break;
185
2.37M
    case WC_CCS_A_PCSW:
186
2.37M
  if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
187
692
      return WC_C_UCS4_ERROR;
188
2.37M
  map = pcsw_ucs_map[f - WC_F_PCS_BASE];
189
2.37M
  map_size = pcsw_ucs_map_size[f - WC_F_PCS_BASE];
190
2.37M
  switch (cc.ccs) {
191
244k
  case WC_CCS_BIG5:
192
244k
      cc.code = WC_BIG5_N(cc.code);
193
244k
      break;
194
0
  case WC_CCS_BIG5_2:
195
0
      cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE;
196
0
      break;
197
0
  case WC_CCS_HKSCS_1:
198
0
  case WC_CCS_HKSCS_2:
199
0
      cc = wc_cs128w_to_hkscs(cc);
200
3.94k
  case WC_CCS_HKSCS:
201
3.94k
      map2 = wc_map_search((wc_uint16)cc.code,
202
3.94k
    hkscs_ucs_p2_map, N_hkscs_ucs_p2_map);
203
3.94k
      if (map2)
204
715
    return map2->code2 | WC_C_UCS4_PLANE2;
205
3.22k
      cc.code = wc_hkscs_to_N(cc.code);
206
3.22k
      break;
207
458k
  case WC_CCS_JOHAB:
208
458k
      return wc_any_to_ucs(wc_johab_to_cs128w(cc));
209
442k
  case WC_CCS_JOHAB_1:
210
442k
      return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL;
211
6.56k
  case WC_CCS_JOHAB_2:
212
6.56k
      cc.code = WC_CS128W_N(cc.code);
213
6.56k
      cc.code = WC_N_JOHAB2(cc.code);
214
6.56k
      map2 = wc_map_search((wc_uint16)cc.code,
215
6.56k
    johab2_ucs_map, N_johab2_ucs_map);
216
6.56k
      if (map2)
217
250
    return map2->code2;
218
6.31k
      return WC_C_UCS4_ERROR;
219
11.4k
  case WC_CCS_JOHAB_3:
220
11.4k
      if ((cc.code & 0x7f7f) < 0x2121)
221
707
    return WC_C_UCS4_ERROR;
222
39.5k
  case WC_CCS_SJIS_EXT:
223
39.5k
      return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc));
224
18.3k
  case WC_CCS_SJIS_EXT_1:
225
18.3k
      cc.code = wc_sjis_ext1_to_N(cc.code);
226
18.3k
      if (cc.code == WC_C_SJIS_ERROR)
227
10.9k
    return WC_C_UCS4_ERROR;
228
7.41k
      break;
229
21.1k
  case WC_CCS_SJIS_EXT_2:
230
21.1k
      cc.code = wc_sjis_ext2_to_N(cc.code);
231
21.1k
      if (cc.code == WC_C_SJIS_ERROR)
232
18.6k
    return WC_C_UCS4_ERROR;
233
2.48k
      break;
234
2.48k
  case WC_CCS_GBK_1:
235
0
  case WC_CCS_GBK_2:
236
0
      cc = wc_cs128w_to_gbk(cc);
237
895k
  case WC_CCS_GBK:
238
895k
      cc.code = wc_gbk_to_N(cc.code);
239
895k
      break;
240
168k
  case WC_CCS_GBK_EXT:
241
168k
  case WC_CCS_GBK_EXT_1:
242
168k
  case WC_CCS_GBK_EXT_2:
243
168k
      return wc_gb18030_to_ucs(cc);
244
0
  case WC_CCS_UHC_1:
245
0
  case WC_CCS_UHC_2:
246
0
      cc = wc_cs128w_to_uhc(cc);
247
74.4k
  case WC_CCS_UHC:
248
74.4k
      if (cc.code > WC_C_UHC_END)
249
23.6k
    return WC_C_UCS4_ERROR;
250
50.7k
      cc.code = wc_uhc_to_N(cc.code);
251
50.7k
      break;
252
1.20k
  default:
253
1.20k
      cc.code = WC_CS94W_N(cc.code);
254
1.20k
      break;
255
2.37M
  }
256
1.20M
  break;
257
1.20M
    case WC_CCS_A_WCS16:
258
14.3k
  switch (WC_CCS_SET(cc.ccs)) {
259
13.8k
  case WC_CCS_UCS2:
260
13.8k
      return cc.code;
261
14.3k
  }
262
487
  return WC_C_UCS4_ERROR;
263
18.9M
    case WC_CCS_A_WCS32:
264
18.9M
  switch (WC_CCS_SET(cc.ccs)) {
265
7.00k
  case WC_CCS_UCS4:
266
7.00k
      return cc.code;
267
18.9M
  case WC_CCS_UCS_TAG:
268
18.9M
      return wc_ucs_tag_to_ucs(cc.code);
269
14.4k
  case WC_CCS_GB18030:
270
14.4k
      return wc_gb18030_to_ucs(cc);
271
18.9M
  }
272
503
  return WC_C_UCS4_ERROR;
273
365k
    case WC_CCS_A_UNKNOWN:
274
365k
  if (cc.ccs == WC_CCS_C1)
275
365k
      return (cc.code | 0x80);
276
6.06k
    default:
277
6.06k
  return WC_C_UCS4_ERROR;
278
106M
    }
279
85.0M
    if (map == NULL)
280
368k
  return WC_C_UCS4_ERROR;
281
84.6M
    if (map_size == 0 || cc.code > map_size - 1)
282
2.39k
  return WC_C_UCS4_ERROR;
283
84.6M
    cc.code = map[cc.code];
284
84.6M
    return cc.code ? cc.code : WC_C_UCS4_ERROR;
285
84.6M
}
286
287
wc_wchar_t
288
wc_any_to_any(wc_wchar_t cc, wc_table *t)
289
1.17k
{
290
1.17k
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
291
1.17k
    wc_uint32 ucs = wc_any_to_ucs(cc);
292
293
1.17k
    if (ucs != WC_C_UCS4_ERROR) {
294
609
  cc = wc_ucs_to_any(ucs, t);
295
609
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
296
202
      return cc;
297
298
407
  ucs = wc_ucs_to_fullwidth(ucs);
299
407
  if (ucs != WC_C_UCS4_ERROR) {
300
0
      cc = wc_ucs_to_any(ucs, t);
301
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
302
0
    return cc;
303
0
  }
304
407
    }
305
968
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
306
968
    return cc;
307
1.17k
}
308
309
wc_wchar_t
310
wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist)
311
82.4M
{
312
82.4M
    wc_wchar_t cc;
313
82.4M
    wc_table **t;
314
315
82.4M
    if (tlist != NULL) {
316
195M
  for (t = tlist; *t != NULL; t++) {
317
162M
      if ((*t)->map == NULL)
318
40.2M
    continue;
319
122M
      cc = wc_ucs_to_any(ucs, *t);
320
122M
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
321
49.5M
    return cc;
322
122M
  }
323
82.4M
    }
324
32.8M
    cc.ccs = WC_CCS_UNKNOWN;
325
32.8M
    return cc;
326
82.4M
}
327
328
wc_wchar_t
329
wc_any_to_any_ces(wc_wchar_t cc, wc_status *st)
330
98.6M
{
331
98.6M
    wc_uint32 ucs = wc_any_to_ucs(cc);
332
98.6M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
333
334
98.6M
    if (ucs < 0x80) {
335
18.8M
  cc.ccs = WC_CCS_US_ASCII;
336
18.8M
  cc.code = ucs;
337
18.8M
  return cc;
338
18.8M
    }
339
79.8M
    if (ucs != WC_C_UCS4_ERROR) {
340
78.0M
  if (st->ces_info->id & WC_CES_T_UTF) {
341
0
      cc.ccs = wc_ucs_to_ccs(ucs);
342
0
      cc.code = ucs;
343
0
      return cc;
344
78.0M
  } else if (st->ces_info->id == WC_CES_JOHAB) {
345
1.44M
      cc = wc_ucs_to_johab(ucs);
346
1.44M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
347
755k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
348
1.44M
      return cc;
349
1.44M
  }
350
76.5M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
351
76.5M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
352
44.2M
      return cc;
353
32.3M
  if (! WcOption.fix_width_conv) {
354
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
355
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
356
0
    return cc;
357
0
  }
358
32.3M
  if (st->ces_info->id == WC_CES_GB18030) {
359
9.55M
      cc = wc_ucs_to_gb18030(ucs);
360
9.55M
      if (WC_CCS_IS_UNKNOWN(cc.ccs))
361
13.6k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
362
9.55M
      return cc;
363
9.55M
  }
364
22.7M
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
365
20.2k
      cc.ccs = WC_CCS_US_ASCII;
366
20.2k
      cc.code = 0x20;
367
20.2k
      return cc;
368
20.2k
  }
369
22.7M
  if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) &&
370
2.39M
      0x80 <= ucs && ucs <= 0x9F) {
371
282
      cc.ccs = WC_CCS_C1;
372
282
      cc.code = ucs;
373
282
      return cc;
374
282
  }
375
376
22.7M
  ucs = wc_ucs_to_fullwidth(ucs);
377
22.7M
  if (ucs != WC_C_UCS4_ERROR) {
378
376k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
379
376k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
380
12.2k
    return cc;
381
364k
      if (! WcOption.fix_width_conv) {
382
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
383
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
384
0
        return cc;
385
0
      }
386
364k
  }
387
22.7M
    }
388
24.5M
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
389
24.5M
    return cc;
390
79.8M
}
391
392
wc_wchar_t
393
wc_any_to_iso2022(wc_wchar_t cc, wc_status *st)
394
5.44M
{
395
5.44M
    wc_uint32 ucs = wc_any_to_ucs(cc);
396
5.44M
    wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
397
398
5.44M
    if (ucs < 0x80) {
399
570
  cc.ccs = WC_CCS_US_ASCII;
400
570
  cc.code = ucs;
401
570
  return cc;
402
570
    }
403
5.44M
    if (ucs != WC_C_UCS4_ERROR) {
404
5.43M
  cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
405
5.43M
  if (!WC_CCS_IS_UNKNOWN(cc.ccs))
406
5.33M
      return cc;
407
106k
  if (! WcOption.strict_iso2022) {
408
0
      cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
409
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
410
0
    return cc;
411
0
  }
412
106k
  if (! WcOption.fix_width_conv) {
413
0
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
414
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
415
0
    return cc;
416
0
      if (! WcOption.strict_iso2022) {
417
0
    cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
418
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
419
0
        return cc;
420
0
      }
421
0
  }
422
106k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
423
474
     cc.ccs = WC_CCS_US_ASCII;
424
474
     cc.code = 0x20;
425
474
     return cc;
426
474
  }
427
428
106k
  ucs = wc_ucs_to_fullwidth(ucs);
429
106k
  if (ucs != WC_C_UCS4_ERROR) {
430
9.91k
      cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
431
9.91k
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
432
326
    return cc;
433
9.58k
      if (! WcOption.strict_iso2022) {
434
0
    cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
435
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
436
0
        return cc;
437
0
      }
438
9.58k
      if (! WcOption.fix_width_conv) {
439
0
    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
440
0
    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
441
0
        return cc;
442
0
    if (! WcOption.strict_iso2022) {
443
0
        cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
444
0
        if (!WC_CCS_IS_UNKNOWN(cc.ccs))
445
0
      return cc;
446
0
    }
447
0
      }
448
9.58k
  }
449
105k
  if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */
450
0
     cc.ccs = WC_CCS_US_ASCII;
451
0
     cc.code = 0x20;
452
0
     return cc;
453
0
  }
454
105k
    }
455
112k
    cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
456
112k
    return cc;
457
5.44M
}
458
459
wc_wchar_t
460
wc_ucs_to_iso2022(wc_uint32 ucs)
461
0
{
462
0
    wc_table *t;
463
0
    wc_wchar_t cc;
464
0
    int f;
465
466
0
    if (ucs <= WC_C_UCS2_END) {
467
0
  for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) {
468
0
      t = &ucs_cs96_table[f];
469
0
      if (t->map == NULL)
470
0
    continue;
471
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
472
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
473
0
    return cc;
474
0
  }
475
0
  for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) {
476
0
      t = &ucs_cs94_table[f];
477
0
      if (t->map == NULL)
478
0
    continue;
479
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
480
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
481
0
    return cc;
482
0
  }
483
0
  for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) {
484
0
      t = &ucs_cs942_table[f];
485
0
      if (t->map == NULL)
486
0
    continue;
487
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
488
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
489
0
    return cc;
490
0
  }
491
0
    }
492
0
    cc.ccs = WC_CCS_UNKNOWN;
493
0
    return cc;
494
0
}
495
496
wc_wchar_t
497
wc_ucs_to_iso2022w(wc_uint32 ucs)
498
0
{
499
0
    wc_table *t;
500
0
    wc_wchar_t cc;
501
0
    int f;
502
503
0
    if (ucs <= WC_C_UCS2_END) {
504
0
  for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) {
505
0
      t = &ucs_cs94w_table[f];
506
0
      if (t->map == NULL)
507
0
    continue;
508
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
509
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
510
0
    return cc;
511
0
  }
512
0
  for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) {
513
0
      t = &ucs_cs96w_table[f];
514
0
      if (t->map == NULL)
515
0
    continue;
516
0
      cc = wc_ucs_to_any((wc_uint16)ucs, t);
517
0
      if (!WC_CCS_IS_UNKNOWN(cc.ccs))
518
0
    return cc;
519
0
  }
520
0
    }
521
0
    cc.ccs = WC_CCS_UNKNOWN_W;
522
0
    return cc;
523
0
}
524
525
wc_ccs
526
wc_ucs_to_ccs(wc_uint32 ucs)
527
166k
{
528
166k
    if (0x80 <= ucs && ucs <= 0x9F)
529
3.57k
  return WC_CCS_C1;
530
163k
    return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
531
163k
  | ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
532
163k
        ? WC_CCS_A_WIDE : 0)
533
163k
  | (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
534
163k
  | (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
535
166k
}
536
537
wc_bool
538
wc_is_ucs_ambiguous_width(wc_uint32 ucs)
539
0
{
540
0
    if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
541
0
  return 1;
542
0
    else if (ucs <= WC_C_UCS2_END)
543
0
  return (wc_map_range_search((wc_uint16)ucs,
544
0
        ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
545
0
    else
546
0
  return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
547
0
    || (0x100000 <= ucs && ucs <= 0x10FFFD));
548
0
}
549
550
wc_bool
551
wc_is_ucs_wide(wc_uint32 ucs)
552
163k
{
553
163k
    if (ucs <= WC_C_UCS2_END)
554
154k
  return (wc_map_range_search((wc_uint16)ucs,
555
154k
    ucs_wide_map, N_ucs_wide_map) != NULL);
556
8.95k
    else
557
8.95k
  return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 ||
558
8.54k
    (ucs & ~0xFFFF) == WC_C_UCS4_PLANE3);
559
163k
}
560
561
wc_bool
562
wc_is_ucs_combining(wc_uint32 ucs)
563
163k
{
564
163k
    return (WcOption.use_combining && ucs <= WC_C_UCS2_END &&
565
154k
  wc_map_range_search((wc_uint16)ucs,
566
154k
  ucs_combining_map, N_ucs_combining_map) != NULL);
567
163k
}
568
569
wc_bool
570
wc_is_ucs_hangul(wc_uint32 ucs)
571
0
{
572
0
    return (ucs <= WC_C_UCS2_END &&
573
0
  wc_map_range_search((wc_uint16)ucs,
574
0
  ucs_hangul_map, N_ucs_hangul_map) != NULL);
575
0
}
576
577
wc_bool
578
wc_is_ucs_alpha(wc_uint32 ucs)
579
0
{
580
0
    return (ucs <= WC_C_UCS2_END &&
581
0
  wc_map_range_search((wc_uint16)ucs,
582
0
  ucs_isalpha_map, N_ucs_isalpha_map) != NULL);
583
0
}
584
585
wc_bool
586
wc_is_ucs_digit(wc_uint32 ucs)
587
0
{
588
0
    return (ucs <= WC_C_UCS2_END &&
589
0
  wc_map_range_search((wc_uint16)ucs,
590
0
  ucs_isdigit_map, N_ucs_isdigit_map) != NULL);
591
0
}
592
593
wc_bool
594
wc_is_ucs_alnum(wc_uint32 ucs)
595
0
{
596
0
    return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs));
597
0
}
598
599
wc_bool
600
wc_is_ucs_lower(wc_uint32 ucs)
601
0
{
602
0
    return (ucs <= WC_C_UCS2_END &&
603
0
  wc_map_range_search((wc_uint16)ucs,
604
0
  ucs_islower_map, N_ucs_islower_map) != NULL);
605
0
}
606
607
wc_bool
608
wc_is_ucs_upper(wc_uint32 ucs)
609
0
{
610
0
    return (ucs <= WC_C_UCS2_END &&
611
0
  wc_map_range_search((wc_uint16)ucs,
612
0
  ucs_isupper_map, N_ucs_isupper_map) != NULL);
613
0
}
614
615
wc_uint32
616
wc_ucs_toupper(wc_uint32 ucs)
617
0
{
618
0
    wc_map *conv = NULL;
619
0
    if (ucs <= WC_C_UCS2_END)
620
0
  conv = wc_map_search((wc_uint16)ucs,
621
0
           ucs_toupper_map, N_ucs_toupper_map);
622
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
623
0
}
624
625
wc_uint32
626
wc_ucs_tolower(wc_uint32 ucs)
627
0
{
628
0
    wc_map *conv = NULL;
629
0
    if (ucs <= WC_C_UCS2_END)
630
0
  conv = wc_map_search((wc_uint16)ucs,
631
0
           ucs_tolower_map, N_ucs_tolower_map);
632
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
633
0
}
634
635
wc_uint32
636
wc_ucs_totitle(wc_uint32 ucs)
637
0
{
638
0
    wc_map *conv = NULL;
639
0
    if (ucs <= WC_C_UCS2_END)
640
0
  conv = wc_map_search((wc_uint16)ucs,
641
0
           ucs_totitle_map, N_ucs_totitle_map);
642
0
    return conv ? (wc_uint32)(conv->code2) : ucs;
643
0
}
644
645
wc_uint32
646
wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2)
647
12.2k
{
648
12.2k
    wc_map3 *map;
649
650
12.2k
    if (WcOption.use_combining &&
651
12.2k
  ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END &&
652
9.04k
  (map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2,
653
9.04k
  ucs_precompose_map, N_ucs_precompose_map)) != NULL)
654
2.06k
  return map->code3;
655
10.1k
    return WC_C_UCS4_ERROR;
656
12.2k
}
657
658
wc_uint32
659
wc_ucs_to_fullwidth(wc_uint32 ucs)
660
22.8M
{
661
22.8M
    wc_map *map;
662
663
22.8M
    if (ucs <= WC_C_UCS2_END &&
664
22.8M
  (map = wc_map_search((wc_uint16)ucs,
665
22.8M
  ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL)
666
386k
  return map->code2;
667
22.4M
    return WC_C_UCS4_ERROR;
668
22.8M
}
669
670
int
671
wc_ucs_put_tag(char *p)
672
3.59k
{
673
3.59k
    int i;
674
675
3.59k
    if (p == NULL || *p == '\0')
676
808
  return 0;
677
210k
    for (i = 1; i <= n_tag_map; i++) {
678
210k
  if (!strcasecmp(p, tag_map[i]))
679
2.48k
      return i;
680
210k
    }
681
301
    if (n_tag_map + 1 >= MAX_TAG_MAP)
682
46
  return 0;
683
255
    n_tag_map++;
684
255
    tag_map[n_tag_map] = p;
685
255
    return n_tag_map;
686
301
}
687
688
char *
689
wc_ucs_get_tag(int ntag)
690
18.9M
{
691
18.9M
    if (ntag <= 0 || ntag > n_tag_map)
692
665
  return NULL;
693
18.9M
    return tag_map[ntag];
694
18.9M
}
695
696
void
697
wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
698
39.1M
{
699
39.1M
    wc_ccs ccs;
700
701
39.1M
    if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
702
48.8k
  if (! WcOption.use_language_tag)
703
0
      return;
704
48.8k
  if (ucs == WC_C_LANGUAGE_TAG)
705
4.41k
      if (st->tag)
706
287
    Strclear(st->tag);
707
4.13k
      else
708
4.13k
    st->tag = Strnew_size(MAX_TAG_LEN);
709
44.3k
  else if (ucs == WC_C_CANCEL_TAG) {
710
692
      if (st->tag)
711
485
    Strfree(st->tag);
712
692
      st->tag = NULL;
713
692
      st->ntag = 0;
714
43.7k
  }  else if (st->tag && st->tag->length < MAX_TAG_LEN &&
715
13.7k
        ucs >= WC_C_TAG_SPACE)
716
13.5k
      Strcat_char(st->tag, (char)(ucs & 0x7f));
717
48.8k
  return;
718
48.8k
    }
719
39.0M
    if (st->tag) {
720
3.59k
  st->ntag = wc_ucs_put_tag(st->tag->ptr);
721
3.59k
  st->tag = NULL;
722
3.59k
    }
723
39.0M
    if (ucs < 0x80) {
724
38.9M
  if (st->ntag)
725
38.5M
      wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
726
376k
  else
727
376k
      Strcat_char(os, (char)ucs);
728
38.9M
    } else {
729
165k
  ccs = wc_ucs_to_ccs(ucs);
730
165k
  if (st->ntag && ucs <= WC_C_UNICODE_END) {
731
141k
      ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
732
141k
      ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
733
141k
  }
734
165k
  wtf_push(os, ccs, ucs);
735
165k
    }
736
39.0M
}
737
738
#endif