Coverage Report

Created: 2026-03-13 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/gb18030.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "gb18030.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
#include "map/gb18030_ucs.map"
10
11
#define C0 WC_GB18030_MAP_C0
12
#define GL WC_GB18030_MAP_GL
13
118k
#define C1 WC_GB18030_MAP_C1
14
6.87M
#define LB WC_GB18030_MAP_LB
15
6.98M
#define UB WC_GB18030_MAP_UB
16
673k
#define L4 WC_GB18030_MAP_L4
17
18
wc_uint8 WC_GB18030_MAP[ 0x100 ] = {
19
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
22
    L4, L4, L4, L4, L4, L4, L4, L4, L4, L4, GL, GL, GL, GL, GL, GL,
23
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
27
28
    LB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
29
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
36
};
37
38
wc_wchar_t
39
wc_gbk_ext_to_cs128w(wc_wchar_t cc)
40
1.64M
{
41
1.64M
    cc.code = WC_GBK_N(cc.code);
42
1.64M
    if (cc.code < 0x4000)
43
294k
  cc.ccs = WC_CCS_GBK_EXT_1;
44
1.34M
    else {
45
1.34M
  cc.ccs = WC_CCS_GBK_EXT_2;
46
1.34M
  cc.code -= 0x4000;
47
1.34M
    }
48
1.64M
    cc.code = WC_N_CS128W(cc.code);
49
1.64M
    return cc;
50
1.64M
}
51
52
wc_wchar_t
53
wc_cs128w_to_gbk_ext(wc_wchar_t cc)
54
1.60M
{
55
1.60M
    cc.code = WC_CS128W_N(cc.code);
56
1.60M
    if (cc.ccs == WC_CCS_GBK_EXT_2)
57
1.32M
  cc.code += 0x4000;
58
1.60M
    cc.ccs = WC_CCS_GBK_EXT;
59
1.60M
    cc.code = WC_N_GBK(cc.code);
60
1.60M
    return cc;
61
1.60M
}
62
63
static wc_ccs
64
6.27M
wc_gbk_or_gbk_ext(wc_uint16 code) {
65
6.27M
    return wc_map3_range_search(code,
66
6.27M
        gbk_ext_ucs_map, N_gbk_ext_ucs_map)
67
6.27M
        ? WC_CCS_GBK_EXT : WC_CCS_GBK;
68
6.27M
}
69
70
#ifdef USE_UNICODE
71
wc_uint32
72
wc_gb18030_to_ucs(wc_wchar_t cc)
73
676k
{
74
676k
    wc_map3 *map;
75
76
676k
    switch (WC_CCS_SET(cc.ccs)) {
77
0
    case WC_CCS_GBK_EXT_1:
78
0
    case WC_CCS_GBK_EXT_2:
79
0
  cc = wc_cs128w_to_gbk_ext(cc);
80
659k
    case WC_CCS_GBK_EXT:
81
659k
  map = wc_map3_range_search((wc_uint16)cc.code,
82
659k
    gbk_ext_ucs_map, N_gbk_ext_ucs_map);
83
659k
  if (map)
84
658k
      return map->code3 + WC_GBK_N(cc.code) - WC_GBK_N(map->code2);
85
234
  return WC_C_UCS4_ERROR;
86
17.5k
    case WC_CCS_GB18030:
87
17.5k
  break;
88
0
    default:
89
0
  return wc_any_to_ucs(cc);
90
676k
    }
91
17.5k
    if (cc.code >= WC_C_GB18030_UCS2 && cc.code <= WC_C_GB18030_UCS2_END) {
92
2.80k
  int i, min = 0, max = N_ucs_gb18030_map - 1;
93
94
2.80k
  cc.code = WC_GB18030_N(cc.code) - WC_GB18030_N(WC_C_GB18030_UCS2);
95
2.80k
  if (cc.code >= ucs_gb18030_map[max].code3)
96
589
      i = max;
97
2.21k
  else {
98
16.5k
      while(1) {
99
16.5k
    i = (min + max) / 2;
100
16.5k
    if (min == max)
101
1.03k
        break;
102
15.4k
    if (cc.code < ucs_gb18030_map[i].code3)
103
7.62k
        max = i - 1;
104
7.85k
    else if (cc.code >= ucs_gb18030_map[i+1].code3)
105
6.67k
        min = i + 1;
106
1.17k
    else
107
1.17k
        break;
108
15.4k
      }
109
2.21k
  }
110
2.80k
  return ucs_gb18030_map[i].code + cc.code - ucs_gb18030_map[i].code3;
111
2.80k
    }
112
14.7k
    if (cc.code >= WC_C_GB18030_UCS4 && cc.code <= WC_C_GB18030_UCS4_END)
113
5.88k
  return WC_GB18030_N(cc.code) - WC_GB18030_N(WC_C_GB18030_UCS4)
114
5.88k
    + 0x10000;
115
8.87k
    return WC_C_UCS4_ERROR;
116
14.7k
}
117
118
wc_wchar_t
119
wc_ucs_to_gb18030(wc_uint32 ucs)
120
16.0M
{
121
16.0M
    wc_wchar_t cc;
122
16.0M
    wc_map3 *map;
123
124
16.0M
    if (ucs <= WC_C_UCS2_END) {
125
16.0M
  map = wc_map3_range_search((wc_uint16)ucs,
126
16.0M
    ucs_gbk_ext_map, N_ucs_gbk_ext_map);
127
16.0M
  if (map) {
128
1.24k
      cc.code = WC_GBK_N(map->code3) + ucs - map->code;
129
1.24k
      cc.code = WC_N_GBK(cc.code);
130
1.24k
      cc.ccs = WC_CCS_GBK_EXT;
131
1.24k
      return cc;
132
1.24k
  }
133
16.0M
  map = wc_map3_range_search((wc_uint16)ucs,
134
16.0M
    ucs_gb18030_map, N_ucs_gb18030_map);
135
16.0M
  if (map) {
136
15.9M
      cc.code = map->code3 + ucs - map->code + WC_GB18030_N(WC_C_GB18030_UCS2);
137
15.9M
      cc.code = WC_N_GB18030(cc.code);
138
15.9M
      if (WcOption.gb18030_as_ucs)
139
0
    cc.ccs = WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET);
140
15.9M
      else
141
15.9M
    cc.ccs = WC_CCS_GB18030_W;
142
15.9M
      return cc;
143
15.9M
  }
144
16.0M
    } else if (ucs <= WC_C_UNICODE_END) {
145
856
  cc.code = ucs - 0x10000 + WC_GB18030_N(WC_C_GB18030_UCS4);
146
856
  cc.code = WC_N_GB18030(cc.code);
147
856
  if (WcOption.gb18030_as_ucs)
148
0
      cc.ccs = WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET);
149
856
  else
150
856
      cc.ccs = WC_CCS_GB18030_W;
151
856
  return cc;
152
856
    }
153
36.0k
    cc.ccs = WC_CCS_UNKNOWN;
154
36.0k
    cc.code = 0;
155
36.0k
    return cc;
156
16.0M
}
157
#endif
158
159
Str
160
wc_conv_from_gb18030(Str is, wc_ces ces)
161
1.54k
{
162
1.54k
    Str os;
163
1.54k
    wc_uchar *sp = (wc_uchar *)is->ptr;
164
1.54k
    wc_uchar *ep = sp + is->length;
165
1.54k
    wc_uchar *p;
166
1.54k
    int state = WC_GB18030_NOSTATE;
167
1.54k
    wc_uint32 gbk;
168
1.54k
    wc_wchar_t cc;
169
1.54k
#ifdef USE_UNICODE
170
1.54k
    wc_uint32 ucs;
171
1.54k
#endif
172
173
74.8k
    for (p = sp; p < ep && *p < 0x80; p++) 
174
73.2k
  ;
175
1.54k
    if (p == ep)
176
14
  return is;
177
1.52k
    os = Strnew_size(is->length);
178
1.52k
    if (p > sp)
179
215
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
180
181
21.4M
    for (; p < ep; p++) {
182
21.4M
  switch (state) {
183
14.4M
  case WC_GB18030_NOSTATE:
184
14.4M
      switch (WC_GB18030_MAP[*p]) {
185
6.87M
      case UB:
186
6.87M
    state = WC_GB18030_MBYTE1;
187
6.87M
    break;
188
118k
      case C1:
189
118k
    wtf_push_unknown(os, p, 1);
190
118k
    break;
191
7.42M
      default:
192
7.42M
    Strcat_char(os, (char)*p);
193
7.42M
    break;
194
14.4M
      }
195
14.4M
      break;
196
14.4M
  case WC_GB18030_MBYTE1:
197
6.87M
      if (WC_GB18030_MAP[*p] & LB) {
198
6.27M
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
199
6.27M
    if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
200
1.64M
        wtf_push(os, WC_CCS_GBK_EXT, gbk);
201
4.62M
    else if (*(p-1) >= 0xA1 && *p >= 0xA1)
202
1.59M
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
203
3.03M
    else
204
3.03M
        wtf_push(os, WC_CCS_GBK, gbk);
205
6.27M
      } else if (WC_GB18030_MAP[*p] == L4) {
206
109k
    state = WC_GB18030_MBYTE2;
207
109k
    break;
208
109k
      } else
209
499k
    wtf_push_unknown(os, p-1, 2);
210
6.76M
      state = WC_GB18030_NOSTATE;
211
6.76M
      break;
212
109k
  case WC_GB18030_MBYTE2:
213
109k
      if (WC_GB18030_MAP[*p] == UB) {
214
64.7k
    state = WC_GB18030_MBYTE3;
215
64.7k
    break;
216
64.7k
      } else
217
44.6k
    wtf_push_unknown(os, p-2, 3);
218
44.6k
      state = WC_GB18030_NOSTATE;
219
44.6k
      break;
220
64.7k
  case WC_GB18030_MBYTE3:
221
64.7k
      if (WC_GB18030_MAP[*p] == L4) {
222
22.9k
    cc.ccs = WC_CCS_GB18030_W;
223
22.9k
    cc.code = ((wc_uint32)*(p-3) << 24)
224
22.9k
            | ((wc_uint32)*(p-2) << 16)
225
22.9k
            | ((wc_uint32)*(p-1) << 8)
226
22.9k
            | *p;
227
22.9k
#ifdef USE_UNICODE
228
22.9k
    if (WcOption.gb18030_as_ucs &&
229
0
        (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
230
0
        wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
231
22.9k
    else
232
22.9k
#endif
233
22.9k
        wtf_push(os, cc.ccs, cc.code);
234
22.9k
      } else
235
41.7k
    wtf_push_unknown(os, p-3, 4);
236
64.7k
      state = WC_GB18030_NOSTATE;
237
64.7k
      break;
238
21.4M
  }
239
21.4M
    }
240
1.52k
    switch (state) {
241
532
    case WC_GB18030_MBYTE1:
242
532
  wtf_push_unknown(os, p-1, 1);
243
532
  break;
244
6
    case WC_GB18030_MBYTE2:
245
6
  wtf_push_unknown(os, p-2, 2);
246
6
  break;
247
12
    case WC_GB18030_MBYTE3:
248
12
  wtf_push_unknown(os, p-3, 3);
249
12
  break;
250
1.52k
    }
251
1.52k
    return os;
252
1.52k
}
253
254
void
255
wc_push_to_gb18030(Str os, wc_wchar_t cc, wc_status *st)
256
20.6M
{
257
38.9M
  while (1) {
258
38.9M
    switch (WC_CCS_SET(cc.ccs)) {
259
348k
    case WC_CCS_US_ASCII:
260
348k
  Strcat_char(os, (char)cc.code);
261
348k
  return;
262
1.54M
    case WC_CCS_GB_2312:
263
1.54M
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
264
1.54M
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
265
1.54M
  return;
266
23.8k
    case WC_CCS_GBK_1:
267
26.5k
    case WC_CCS_GBK_2:
268
26.5k
  cc = wc_cs128w_to_gbk(cc);
269
150k
    case WC_CCS_GBK:
270
150k
  Strcat_char(os, (char)(cc.code >> 8));
271
150k
  Strcat_char(os, (char)(cc.code & 0xff));
272
150k
  return;
273
303
    case WC_CCS_GBK_EXT_1:
274
519
    case WC_CCS_GBK_EXT_2:
275
519
  cc = wc_cs128w_to_gbk(cc);
276
949k
    case WC_CCS_GBK_EXT:
277
949k
  Strcat_char(os, (char)(cc.code >> 8));
278
949k
  Strcat_char(os, (char)(cc.code & 0xff));
279
949k
  return;
280
15.9M
    case WC_CCS_GB18030:
281
15.9M
  Strcat_char(os, (char)((cc.code >> 24) & 0xff));
282
15.9M
  Strcat_char(os, (char)((cc.code >> 16) & 0xff));
283
15.9M
  Strcat_char(os, (char)((cc.code >> 8)  & 0xff));
284
15.9M
  Strcat_char(os, (char)(cc.code & 0xff));
285
15.9M
  return;
286
3.41k
    case WC_CCS_UNKNOWN_W:
287
3.41k
  if (!WcOption.no_replace)
288
3.41k
      Strcat_charp(os, WC_REPLACE_W);
289
3.41k
  return;
290
1.64M
    case WC_CCS_UNKNOWN:
291
1.64M
  if (!WcOption.no_replace)
292
1.64M
      Strcat_charp(os, WC_REPLACE);
293
1.64M
  return;
294
18.3M
    default:
295
18.3M
#ifdef USE_UNICODE
296
18.3M
  if (WcOption.ucs_conv)
297
18.3M
      cc = wc_any_to_any_ces(cc, st);
298
0
  else
299
0
#endif
300
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
301
18.3M
  continue;
302
38.9M
    }
303
38.9M
  }
304
20.6M
}
305
306
Str
307
wc_char_conv_from_gb18030(wc_uchar c, wc_status *st)
308
0
{
309
0
    static Str os;
310
0
    static wc_uchar gb[4];
311
0
    wc_uint32 gbk;
312
0
    wc_wchar_t cc;
313
0
#ifdef USE_UNICODE
314
0
    wc_uint32 ucs;
315
0
#endif
316
317
0
    if (st->state == -1) {
318
0
  st->state = WC_GB18030_NOSTATE;
319
0
  os = Strnew_size(8);
320
0
    }
321
322
0
    switch (st->state) {
323
0
    case WC_GB18030_NOSTATE:
324
0
  switch (WC_GB18030_MAP[c]) {
325
0
  case UB:
326
0
      gb[0] = c;
327
0
      st->state = WC_GB18030_MBYTE1;
328
0
      return NULL;
329
0
  case C1:
330
0
      break;
331
0
  default:
332
0
      Strcat_char(os, (char)c);
333
0
      break;
334
0
  }
335
0
  break;
336
0
    case WC_GB18030_MBYTE1:
337
0
  if (WC_GB18030_MAP[c] & LB) {
338
0
      gbk = ((wc_uint32)gb[0] << 8) | c;
339
0
      if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
340
0
    wtf_push(os, WC_CCS_GBK_EXT, gbk);
341
0
      else if (gb[0] >= 0xA1 && c >= 0xA1)
342
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
343
0
      else
344
0
    wtf_push(os, WC_CCS_GBK, gbk);
345
0
  } else if (WC_GB18030_MAP[c] == L4) {
346
0
      gb[1] = c;
347
0
      st->state = WC_GB18030_MBYTE2;
348
0
      return NULL;
349
0
  }
350
0
  break;
351
0
    case WC_GB18030_MBYTE2:
352
0
  if (WC_GB18030_MAP[c] == UB) {
353
0
      gb[2] = c;
354
0
      st->state = WC_GB18030_MBYTE3;
355
0
      return NULL;
356
0
  }
357
0
  break;
358
0
    case WC_GB18030_MBYTE3:
359
0
  if (WC_GB18030_MAP[c] == L4) {
360
0
      cc.ccs = WC_CCS_GB18030_W;
361
0
      cc.code = ((wc_uint32)gb[0] << 24)
362
0
        | ((wc_uint32)gb[1] << 16)
363
0
        | ((wc_uint32)gb[2] << 8)
364
0
        | c;
365
0
#ifdef USE_UNICODE
366
0
      if (WcOption.gb18030_as_ucs &&
367
0
    (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
368
0
    wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
369
0
      else
370
0
#endif
371
0
          wtf_push(os, cc.ccs, cc.code);
372
0
  }
373
0
  break;
374
0
    }
375
0
    st->state = -1;
376
0
    return os;
377
0
}