Coverage Report

Created: 2026-02-11 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/wtf.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "wtf.h"
4
#include "sjis.h"
5
#include "big5.h"
6
#include "hkscs.h"
7
#include "johab.h"
8
#include "jis.h"
9
#include "viet.h"
10
#include "gbk.h"
11
#include "gb18030.h"
12
#include "uhc.h"
13
#ifdef USE_UNICODE
14
#include "ucs.h"
15
#include "utf8.h"
16
#endif
17
18
wc_uint8 WTF_WIDTH_MAP[ 0x100 ] = {
19
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
20
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
21
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
22
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
23
24
    1,2,1,2,1,1,1,2, 1,2,1,2,1,1,1,1, 0,0,0,0,0,0,0,0, 0,0,0,0,1,1,1,1,
25
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
26
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
27
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
28
};
29
30
wc_uint8 WTF_LEN_MAP[ 0x100 ] = {
31
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
32
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
33
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
34
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
35
36
    3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1,
37
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
38
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
39
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
40
};
41
42
wc_uint8 WTF_TYPE_MAP[ 0x100 ] = {
43
    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
44
    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
45
    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
46
    0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,
47
48
    2,  0xA,2,  0xA, 2, 0x12,2,  0xA, 2,  0xA,2,  0xA, 0x20,0x20,0x20,0x20,
49
    4,  0xC,4,  0xC, 4, 0x20,4,  0xC, 4,  0xC,4,  0xC, 0x20,0x20,0x20,0x20,
50
 0x20,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
51
    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
52
    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
53
};
54
55
static wc_uint16 CCS_MAP[ 33 ] = {
56
    WC_CCS_A_CS94    >> 8, WC_CCS_A_CS94W    >> 8,
57
    WC_CCS_A_CS96    >> 8, WC_CCS_A_CS96W    >> 8,
58
    WC_CCS_A_CS942   >> 8, WC_CCS_A_UNKNOWN  >> 8,
59
    WC_CCS_A_PCS     >> 8, WC_CCS_A_PCSW     >> 8,
60
    WC_CCS_A_WCS16   >> 8, WC_CCS_A_WCS16W   >> 8,
61
    WC_CCS_A_WCS32   >> 8, WC_CCS_A_WCS32W   >> 8,
62
    0,                     0,
63
    0,                     0,
64
    WC_CCS_A_CS94_C  >> 8, WC_CCS_A_CS94W_C  >> 8,
65
    WC_CCS_A_CS96_C  >> 8, WC_CCS_A_CS96W_C  >> 8,
66
    WC_CCS_A_CS942_C >> 8, 0,
67
    WC_CCS_A_PCS_C   >> 8, WC_CCS_A_PCSW_C   >> 8,
68
    WC_CCS_A_WCS16_C >> 8, WC_CCS_A_WCS16W_C >> 8,
69
    WC_CCS_A_WCS32_C >> 8, WC_CCS_A_WCS32W_C >> 8,
70
    0,                     0,
71
    0,                     0,
72
    0,
73
};
74
75
wc_ccs wtf_gr_ccs = 0;
76
static wc_ces wtf_major_ces = WC_CES_US_ASCII;
77
static wc_status wtf_major_st;
78
79
void
80
wtf_init(wc_ces ces1, wc_ces ces2)
81
1
{
82
1
    int i;
83
1
    wc_gset *gset;
84
85
1
    if (wc_check_ces(ces2))
86
1
  wtf_major_ces = ces2;
87
88
1
    if (! wc_check_ces(ces1))
89
0
  return;
90
1
    gset = WcCesInfo[WC_CES_INDEX(ces1)].gset;
91
1
    if (gset == NULL || gset[1].ccs == 0 ||
92
1
  gset[1].ccs & (WC_CCS_A_WCS16|WC_CCS_A_WCS32))
93
1
  return;
94
0
    wtf_gr_ccs = gset[1].ccs;
95
96
0
    if (WC_CCS_IS_WIDE(wtf_gr_ccs)) {
97
0
  for (i = 0xa1; i <= 0xff; i++) {
98
0
      WTF_WIDTH_MAP[i] = 2;
99
0
      WTF_LEN_MAP[i] = 2;
100
0
      WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1W;
101
0
  }
102
0
    } else {
103
0
  for (i = 0xa1; i <= 0xff; i++) {
104
0
      WTF_WIDTH_MAP[i] = 1;
105
0
      WTF_LEN_MAP[i] = 1;
106
0
      WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1;
107
0
  }
108
0
    }
109
0
}
110
111
/*
112
int
113
wtf_width(wc_uchar *p)
114
{
115
    return (int)WTF_WIDTH_MAP[*p];
116
}
117
*/
118
119
int
120
wtf_strwidth(wc_uchar *p)
121
0
{
122
0
    int w = 0;
123
0
    wc_uchar *q = p + strlen((char *)p);
124
125
0
    while (p < q) {
126
0
  w += wtf_width(p);
127
0
  p += WTF_LEN_MAP[*p];
128
0
    }
129
0
    return w;
130
0
}
131
132
size_t
133
wtf_len1(wc_uchar *p)
134
0
{
135
0
    size_t len, len_max = WTF_LEN_MAP[*p];
136
137
0
    for (len = 0; *(p + len); len++)
138
0
  if (len == len_max)
139
0
      break;
140
0
    if (len == 0)
141
0
  len = 1;
142
0
    return len;
143
0
}
144
145
size_t
146
wtf_len(wc_uchar *p)
147
0
{
148
0
    wc_uchar *q = p;
149
0
    wc_uchar *strz = p + strlen((char *)p);
150
151
0
    q += WTF_LEN_MAP[*q];
152
0
    while (q < strz && ! WTF_WIDTH_MAP[*q])
153
0
  q += WTF_LEN_MAP[*q];
154
0
    return q - p;
155
0
}
156
157
/*
158
int
159
wtf_type(wc_uchar *p)
160
{
161
    return (int)WTF_TYPE_MAP[*p];
162
}
163
*/
164
165
#define wcs16_to_wtf(c, p) \
166
63.2k
    ((p)[0] = (((c) >> 14) & 0x03) | 0x80), \
167
63.2k
    ((p)[1] = (((c) >>  7) & 0x7f) | 0x80), \
168
63.2k
    ((p)[2] = ( (c)        & 0x7f) | 0x80)
169
#define wcs32_to_wtf(c, p) \
170
44.0M
    ((p)[0] = (((c) >> 28) & 0x0f) | 0x80), \
171
44.0M
    ((p)[1] = (((c) >> 21) & 0x7f) | 0x80), \
172
44.0M
    ((p)[2] = (((c) >> 14) & 0x7f) | 0x80), \
173
44.0M
    ((p)[3] = (((c) >>  7) & 0x7f) | 0x80), \
174
44.0M
    ((p)[4] = ( (c)        & 0x7f) | 0x80)
175
#define wtf_to_wcs16(p) \
176
70.4k
    ((p)[0] == 0 || (p)[1] == 0 || (p)[2] == 0 ? 0 : \
177
70.4k
      ((wc_uint32)((p)[0] & 0x03) << 14) \
178
70.4k
    | ((wc_uint32)((p)[1] & 0x7f) <<  7) \
179
70.4k
    | ((wc_uint32)((p)[2] & 0x7f)      ))
180
#define wtf_to_wcs32(p) \
181
44.0M
    ((p)[0] == 0 || (p)[1] == 0 || (p)[2] == 0 || (p)[3] == 0 || (p)[4] == 0 ? 0 : \
182
44.0M
      ((wc_uint32)((p)[0] & 0x0f) << 28) \
183
44.0M
    | ((wc_uint32)((p)[1] & 0x7f) << 21) \
184
44.0M
    | ((wc_uint32)((p)[2] & 0x7f) << 14) \
185
44.0M
    | ((wc_uint32)((p)[3] & 0x7f) <<  7) \
186
44.0M
    | ((wc_uint32)((p)[4] & 0x7f)      ))
187
188
void
189
wtf_push(Str os, wc_ccs ccs, wc_uint32 code)
190
288M
{
191
288M
    wc_uchar s[8];
192
288M
    wc_wchar_t cc, cc2;
193
288M
    size_t n;
194
195
288M
    if (ccs == WC_CCS_US_ASCII) {
196
819k
  Strcat_char(os, (char)(code & 0x7f));
197
819k
  return;
198
819k
    }
199
288M
    cc.ccs = ccs;
200
288M
    cc.code = code;
201
288M
    if (WcOption.pre_conv && !(cc.ccs & WC_CCS_A_UNKNOWN)) {
202
0
  if ((ccs == WC_CCS_JOHAB || ccs == WC_CCS_JOHAB_1 ||
203
0
    ccs == WC_CCS_JOHAB_2 || ccs == WC_CCS_JOHAB_3) &&
204
0
    (wtf_major_ces == WC_CES_EUC_KR ||
205
0
    wtf_major_ces == WC_CES_ISO_2022_KR)) {
206
0
      cc2 = wc_johab_to_ksx1001(cc);
207
0
      if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
208
0
    cc = cc2;
209
0
  } else if (ccs == WC_CCS_KS_X_1001 &&
210
0
    wtf_major_ces == WC_CES_JOHAB) {
211
0
      cc2 = wc_ksx1001_to_johab(cc);
212
0
      if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
213
0
    cc = cc2;
214
0
  }
215
0
#ifdef USE_UNICODE
216
0
  else if (WcOption.ucs_conv) {
217
0
      wc_bool fix_width_conv = WcOption.fix_width_conv;
218
0
      WcOption.fix_width_conv = WC_FALSE;
219
0
      wc_output_init(wtf_major_ces, &wtf_major_st);
220
0
      if (! wc_ces_has_ccs(WC_CCS_SET(ccs), &wtf_major_st)) {
221
0
    cc2 = wc_any_to_any_ces(cc, &wtf_major_st);
222
0
    if (cc2.ccs == WC_CCS_US_ASCII) {
223
0
        Strcat_char(os, (char)(cc2.code & 0x7f));
224
0
        return;
225
0
    }
226
0
    if (!WC_CCS_IS_UNKNOWN(cc2.ccs) &&
227
0
      cc2.ccs != WC_CCS_CP1258_2 &&
228
0
      cc2.ccs != WC_CCS_TCVN_5712_3)
229
0
        cc = cc2;
230
0
      }
231
0
      WcOption.fix_width_conv = fix_width_conv;
232
0
  }
233
0
#endif
234
0
    }
235
236
288M
    switch (WC_CCS_TYPE(cc.ccs)) {
237
79.6M
    case WC_CCS_A_CS94:
238
79.6M
  if (cc.ccs == wtf_gr_ccs) {
239
0
      s[0] = (cc.code & 0x7f) | 0x80;
240
0
      n = 1;
241
0
      break;
242
0
  }
243
79.6M
  if (cc.ccs == WC_CCS_JIS_X_0201K && !WcOption.use_jisx0201k) {
244
78.2M
      cc2 = wc_jisx0201k_to_jisx0208(cc);
245
78.2M
      if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) {
246
77.7M
    wtf_push(os, cc2.ccs, cc2.code);
247
77.7M
    return;
248
77.7M
      }
249
78.2M
  }
250
1.97M
  s[0] = WTF_C_CS94;
251
1.97M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
252
1.97M
  s[2] = (cc.code & 0x7f) | 0x80;
253
1.97M
  n = 3;
254
1.97M
  break;
255
84.5M
    case WC_CCS_A_CS94W:
256
84.5M
  if (cc.ccs == wtf_gr_ccs) {
257
0
      s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
258
0
      s[1] = ( cc.code       & 0x7f) | 0x80;
259
0
      n = 2;
260
0
      break;
261
0
  }
262
84.5M
  s[0] = WTF_C_CS94W;
263
84.5M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
264
84.5M
  s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
265
84.5M
  s[3] = ( cc.code       & 0x7f) | 0x80;
266
84.5M
  n = 4;
267
84.5M
  break;
268
6.89M
    case WC_CCS_A_CS96:
269
6.89M
  if (WcOption.use_combining && wc_is_combining(cc))
270
25.3k
      s[0] = WTF_C_CS96_C;
271
6.87M
  else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
272
0
      s[0] = (cc.code & 0x7f) | 0x80;
273
0
      n = 1;
274
0
      break;
275
0
  } else
276
6.87M
      s[0] = WTF_C_CS96;
277
6.89M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
278
6.89M
  s[2] = (cc.code & 0x7f) | 0x80;
279
6.89M
  n = 3;
280
6.89M
  break;
281
3.53k
    case WC_CCS_A_CS96W:
282
3.53k
  if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
283
0
      s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
284
0
      s[1] = ( cc.code       & 0x7f) | 0x80;
285
0
      n = 2;
286
0
      break;
287
0
  }
288
3.53k
  s[0] = WTF_C_CS96W;
289
3.53k
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
290
3.53k
  s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
291
3.53k
  s[3] = ( cc.code       & 0x7f) | 0x80;
292
3.53k
  n = 4;
293
3.53k
  break;
294
24.8k
    case WC_CCS_A_CS942:
295
24.8k
  if (cc.ccs == wtf_gr_ccs) {
296
0
      s[0] = (cc.code & 0x7f) | 0x80;
297
0
      n = 1;
298
0
      break;
299
0
  }
300
24.8k
  s[0] = WTF_C_CS942;
301
24.8k
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
302
24.8k
  s[2] = (cc.code & 0x7f) | 0x80;
303
24.8k
  n = 3;
304
24.8k
  break;
305
21.9M
    case WC_CCS_A_PCS:
306
21.9M
  if (WcOption.use_combining && wc_is_combining(cc))
307
58.6k
      s[0] = WTF_C_PCS_C;
308
21.9M
  else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
309
0
      s[0] = (cc.code & 0x7f) | 0x80;
310
0
      n = 1;
311
0
      break;
312
0
  } else
313
21.9M
      s[0] = WTF_C_PCS;
314
21.9M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
315
21.9M
  s[2] = (cc.code & 0x7f) | 0x80;
316
21.9M
  n = 3;
317
21.9M
  break;
318
10.8M
    case WC_CCS_A_PCSW:
319
10.8M
  switch (cc.ccs) {
320
191k
  case WC_CCS_SJIS_EXT:
321
191k
      cc = wc_sjis_ext_to_cs94w(cc);
322
191k
      break;
323
4.48M
  case WC_CCS_GBK:
324
4.48M
      cc = wc_gbk_to_cs128w(cc);
325
4.48M
      break;
326
1.39M
  case WC_CCS_GBK_EXT:
327
1.39M
      cc = wc_gbk_ext_to_cs128w(cc);
328
1.39M
      break;
329
1.02M
  case WC_CCS_BIG5:
330
1.02M
      cc = wc_big5_to_cs94w(cc);
331
1.02M
      break;
332
41.3k
  case WC_CCS_HKSCS:
333
41.3k
      cc = wc_hkscs_to_cs128w(cc);
334
41.3k
      break;
335
2.10M
  case WC_CCS_JOHAB:
336
2.10M
      cc = wc_johab_to_cs128w(cc);
337
2.10M
      break;
338
1.51M
  case WC_CCS_UHC:
339
1.51M
      cc = wc_uhc_to_cs128w(cc);
340
1.51M
      break;
341
10.8M
  }
342
10.8M
  if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
343
0
      s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
344
0
      s[1] = ( cc.code       & 0x7f) | 0x80;
345
0
      n = 2;
346
0
      break;
347
0
  }
348
10.8M
  s[0] = WTF_C_PCSW;
349
10.8M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
350
10.8M
  s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
351
10.8M
  s[3] = ( cc.code       & 0x7f) | 0x80;
352
10.8M
  n = 4;
353
10.8M
  break;
354
63.2k
    case WC_CCS_A_WCS16:
355
63.2k
  s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS16W : WTF_C_WCS16)
356
63.2k
       | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
357
63.2k
  wcs16_to_wtf(cc.code, s + 1);
358
63.2k
  s[1] |= (WC_CCS_INDEX(cc.ccs) << 2);
359
63.2k
  n = 4;
360
63.2k
  break;
361
44.0M
    case WC_CCS_A_WCS32:
362
44.0M
  s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS32W : WTF_C_WCS32)
363
44.0M
       | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
364
44.0M
  wcs32_to_wtf(cc.code, s + 1);
365
44.0M
  s[1] |= (WC_CCS_INDEX(cc.ccs) << 4);
366
44.0M
  n = 6;
367
44.0M
  break;
368
40.0M
    default:
369
40.0M
  s[0] = WTF_C_UNKNOWN;
370
40.0M
  s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
371
40.0M
  s[2] = (cc.code & 0x7f) | 0x80;
372
40.0M
  n = 3;
373
40.0M
  break;
374
288M
    }
375
210M
    Strcat_charp_n(os, (char *)s, n);
376
210M
}
377
378
void
379
wtf_push_unknown(Str os, wc_uchar *p, size_t len)
380
29.1M
{
381
65.2M
    for (; len--; p++) {
382
36.0M
  if (*p & 0x80)
383
33.1M
      wtf_push(os, WC_CCS_UNKNOWN, *p);
384
2.87M
  else
385
2.87M
      Strcat_char(os, (char)*p);
386
36.0M
    }
387
29.1M
}
388
389
wc_wchar_t
390
wtf_parse1(wc_uchar **p)
391
210M
{
392
210M
    wc_uchar *q = *p;
393
210M
    wc_wchar_t cc;
394
395
210M
    if (*q < 0x80) {
396
542
  cc.ccs = WC_CCS_US_ASCII;
397
542
  cc.code = *(q++);
398
210M
    } else if (*q > 0xa0) {
399
28.7k
  cc.ccs = wtf_gr_ccs;
400
28.7k
  if (WC_CCS_IS_WIDE(cc.ccs) && *(q+1)) {
401
0
      cc.code = ((wc_uint32)*q << 8) | *(q+1);
402
0
      q += 2;
403
0
  } else
404
28.7k
      cc.code = *(q++);
405
210M
    } else {
406
210M
  cc.ccs = (wc_uint32)CCS_MAP[*(q++) - 0x80] << 8;
407
210M
  switch (WC_CCS_TYPE(cc.ccs)) {
408
2.02M
  case WC_CCS_A_CS94:
409
8.95M
  case WC_CCS_A_CS96:
410
8.98M
  case WC_CCS_A_CS942:
411
31.0M
  case WC_CCS_A_PCS:
412
71.0M
  case WC_CCS_A_UNKNOWN:
413
71.0M
      if (*q && *(q+1)) {
414
71.0M
    cc.ccs |= *(q++) & 0x7f;
415
71.0M
    cc.code = *(q++);
416
71.0M
      } else {
417
2.90k
    cc.ccs = WC_CCS_US_ASCII;
418
2.90k
    cc.code = (wc_uint32)' ';
419
2.90k
      }
420
71.0M
      break;
421
84.5M
  case WC_CCS_A_CS94W:
422
84.5M
  case WC_CCS_A_CS96W:
423
95.3M
  case WC_CCS_A_PCSW:
424
95.3M
      if (*q && *(q+1) && *(q+2)) {
425
95.3M
    cc.ccs |= *(q++) & 0x7f;
426
95.3M
    cc.code = ((wc_uint32)*q << 8) | *(q+1);
427
95.3M
    q += 2;
428
95.3M
      } else {
429
2.74k
    cc.ccs = WC_CCS_US_ASCII;
430
2.74k
    cc.code = (wc_uint32)' ';
431
2.74k
      }
432
95.3M
      break;
433
73.1k
  case WC_CCS_A_WCS16:
434
73.1k
  case WC_CCS_A_WCS16W:
435
73.1k
      if (*q && *(q+1) && *(q+2)) {
436
70.4k
    cc.ccs |= (*q & 0x7c) >> 2;
437
70.4k
    cc.code = wtf_to_wcs16(q);
438
70.4k
    q += 3;
439
70.4k
      } else {
440
2.70k
    cc.ccs = WC_CCS_US_ASCII;
441
2.70k
    cc.code = (wc_uint32)' ';
442
2.70k
      }
443
73.1k
      break;
444
44.0M
  case WC_CCS_A_WCS32:
445
44.0M
  case WC_CCS_A_WCS32W:
446
44.0M
      if (*q && *(q+1) && *(q+2) && *(q+3) && *(q+4)) {
447
44.0M
    cc.ccs |= (*q & 0x70) >> 4;
448
44.0M
    cc.code = wtf_to_wcs32(q);
449
44.0M
    q += 5;
450
44.0M
      } else {
451
4.00k
    cc.ccs = WC_CCS_US_ASCII;
452
4.00k
    cc.code = (wc_uint32)' ';
453
4.00k
      }
454
44.0M
      break;
455
2.23k
  default:
456
  /* case 0: */
457
2.23k
      cc.ccs = WC_CCS_US_ASCII;
458
2.23k
      cc.code = (wc_uint32)' ';
459
2.23k
      break;
460
210M
  }
461
210M
    }
462
463
210M
    *p = q;
464
210M
    switch (cc.ccs) {
465
146k
    case WC_CCS_SJIS_EXT_1:
466
192k
    case WC_CCS_SJIS_EXT_2:
467
192k
  return wc_cs94w_to_sjis_ext(cc);
468
4.22M
    case WC_CCS_GBK_1:
469
4.43M
    case WC_CCS_GBK_2:
470
4.43M
  return wc_cs128w_to_gbk(cc);
471
349k
    case WC_CCS_GBK_EXT_1:
472
1.38M
    case WC_CCS_GBK_EXT_2:
473
1.38M
  return wc_cs128w_to_gbk_ext(cc);
474
565k
    case WC_CCS_BIG5_1:
475
1.02M
    case WC_CCS_BIG5_2:
476
1.02M
  return wc_cs94w_to_big5(cc);
477
33.9k
    case WC_CCS_HKSCS_1:
478
41.5k
    case WC_CCS_HKSCS_2:
479
41.5k
  return wc_cs128w_to_hkscs(cc);
480
1.39M
    case WC_CCS_JOHAB_1:
481
2.05M
    case WC_CCS_JOHAB_2:
482
2.10M
    case WC_CCS_JOHAB_3:
483
2.10M
  return wc_cs128w_to_johab(cc);
484
1.50M
    case WC_CCS_UHC_1:
485
1.51M
    case WC_CCS_UHC_2:
486
1.51M
  return wc_cs128w_to_uhc(cc);
487
210M
    }
488
199M
    return cc;
489
210M
}
490
491
wc_wchar_t
492
wtf_parse(wc_uchar **p)
493
221M
{
494
221M
    wc_uchar *q;
495
221M
    wc_wchar_t cc, cc2;
496
221M
    wc_uint32 ucs, ucs2;
497
498
221M
    if (**p < 0x80) {
499
11.0M
  cc.ccs = WC_CCS_US_ASCII;
500
11.0M
  cc.code = *((*p)++);
501
11.0M
    } else
502
210M
  cc = wtf_parse1(p);
503
221M
    if ((! WcOption.use_combining) || WTF_WIDTH_MAP[**p])
504
221M
  return cc;
505
506
147k
    q = *p;
507
147k
    cc2 = wtf_parse1(&q);
508
147k
    if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_CP1258_1) &&
509
58.7k
  WC_CCS_SET(cc2.ccs) == WC_CCS_CP1258_1) {
510
24.6k
  cc2.code = wc_cp1258_precompose(cc.code, cc2.code);
511
24.6k
  if (cc2.code) {
512
2.41k
      cc2.ccs = WC_CCS_CP1258_2;
513
2.41k
      *p = q;
514
2.41k
      return cc2;
515
2.41k
  }
516
122k
    } else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_TCVN_5712_1) &&
517
45.1k
  WC_CCS_SET(cc2.ccs) == WC_CCS_TCVN_5712_1) {
518
31.2k
  cc2.code = wc_tcvn5712_precompose(cc.code, cc2.code);
519
31.2k
  if (cc2.code) {
520
2.22k
      cc2.ccs = WC_CCS_TCVN_5712_3;
521
2.22k
      *p = q;
522
2.22k
      return cc2;
523
2.22k
  }
524
31.2k
    }
525
91.7k
#ifdef USE_UNICODE
526
91.7k
    else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_ISO_8859_1 ||
527
77.5k
  WC_CCS_IS_UNICODE(cc.ccs)) && WC_CCS_IS_UNICODE(cc2.ccs)) {
528
31.2k
  while (1) {
529
31.2k
      ucs = (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
530
31.2k
    ? wc_ucs_tag_to_ucs(cc.code) : cc.code;
531
31.2k
      ucs2 = (WC_CCS_SET(cc2.ccs) == WC_CCS_UCS_TAG)
532
31.2k
    ? wc_ucs_tag_to_ucs(cc2.code) : cc2.code;
533
31.2k
      ucs = wc_ucs_precompose(ucs, ucs2);
534
31.2k
      if (ucs == WC_C_UCS4_ERROR)
535
27.7k
    break;
536
3.52k
      if (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
537
2.02k
    cc.code = wc_ucs_to_ucs_tag(ucs, wc_ucs_tag_to_tag(cc.code));
538
1.50k
      else {
539
1.50k
    cc.ccs = wc_ucs_to_ccs(ucs);
540
1.50k
    cc.code = ucs;
541
1.50k
      }
542
3.52k
      *p = q;
543
3.52k
      if (! WTF_WIDTH_MAP[*q])
544
1.48k
    break;
545
2.04k
      cc2 = wtf_parse1(&q);
546
2.04k
      if (! WC_CCS_IS_UNICODE(cc2.ccs))
547
736
    break;
548
2.04k
  }
549
29.9k
    }
550
142k
#endif
551
142k
    return cc;
552
147k
}
553
554
wc_ccs
555
wtf_get_ccs(wc_uchar *p)
556
0
{
557
0
   return wtf_parse1(&p).ccs;
558
0
}
559
560
wc_uint32
561
wtf_get_code(wc_uchar *p)
562
0
{
563
0
   return wtf_parse1(&p).code;
564
0
}
565
566
wc_bool
567
wtf_is_hangul(wc_uchar *p)
568
0
{
569
0
    if (*p > 0xa0)
570
0
  return (wtf_gr_ccs == WC_CCS_KS_X_1001 || wtf_gr_ccs == WC_CCS_JOHAB_1);
571
0
    else if (*p == WTF_C_CS94W)
572
0
  return ((*(p + 1) & 0x7f) == WC_F_KS_X_1001);
573
0
    else if (*p == WTF_C_PCSW) {
574
0
  wc_uchar f = *(p + 1) & 0x7f;
575
0
  return (f == WC_F_JOHAB_1 || f == WC_F_JOHAB_2 || f == WC_F_JOHAB_3 ||
576
0
    f == WC_F_UHC_1 || f == WC_F_UHC_2);
577
0
    }
578
0
#ifdef USE_UNICODE
579
0
    else if (*p == WTF_C_WCS16W) {
580
0
  wc_uchar f = (*(++p) & 0x7f) >> 2;
581
0
  if (f == WC_F_UCS2)
582
0
      return wc_is_ucs_hangul(wtf_to_wcs16(p));
583
0
    } else if (*p == WTF_C_WCS32W) {
584
0
  wc_uchar f = (*(++p) & 0x7f) >> 4;
585
0
  if (f == WC_F_UCS_TAG)
586
0
      return wc_is_ucs_hangul(wc_ucs_tag_to_ucs(wtf_to_wcs32(p)));
587
0
    }
588
0
#endif
589
0
    return WC_FALSE;
590
0
}
591
592
char *
593
wtf_conv_fit(char *s, wc_ces ces)
594
0
{
595
0
    wc_uchar *p;
596
0
    Str os;
597
0
    wc_wchar_t cc;
598
0
    wc_ces major_ces;
599
0
    wc_bool pre_conv, ucs_conv;
600
601
0
    if (ces == WC_CES_WTF || ces == WC_CES_US_ASCII)
602
0
  return s;
603
604
0
    for (p = (wc_uchar *)s; *p && *p < 0x80; p++)
605
0
  ;
606
0
    if (! *p)
607
0
  return s;
608
609
0
    os = Strnew_size(strlen(s));
610
0
    if (p > (wc_uchar *)s)
611
0
  Strcopy_charp_n(os, s, (int)(p - (wc_uchar *)s));
612
613
0
    major_ces = wtf_major_ces;
614
0
    pre_conv = WcOption.pre_conv;
615
0
    ucs_conv = WcOption.ucs_conv;
616
0
    wtf_major_ces = ces;
617
0
    WcOption.pre_conv = WC_TRUE;
618
0
    WcOption.ucs_conv = WC_TRUE;
619
0
    while (*p) {
620
0
  cc = wtf_parse1(&p);
621
0
  wtf_push(os, cc.ccs, cc.code);
622
0
    }
623
0
    wtf_major_ces = major_ces;
624
0
    WcOption.pre_conv = pre_conv;
625
0
    WcOption.ucs_conv = ucs_conv;
626
0
    return os->ptr;
627
0
}