Coverage Report

Created: 2025-11-18 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/gb18030.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "gb18030.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
#include "map/gb18030_ucs.map"
10
11
#define C0 WC_GB18030_MAP_C0
12
#define GL WC_GB18030_MAP_GL
13
546k
#define C1 WC_GB18030_MAP_C1
14
1.80M
#define LB WC_GB18030_MAP_LB
15
1.88M
#define UB WC_GB18030_MAP_UB
16
277k
#define L4 WC_GB18030_MAP_L4
17
18
wc_uint8 WC_GB18030_MAP[ 0x100 ] = {
19
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
22
    L4, L4, L4, L4, L4, L4, L4, L4, L4, L4, GL, GL, GL, GL, GL, GL,
23
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
27
28
    LB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
29
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
36
};
37
38
wc_wchar_t
39
wc_gbk_ext_to_cs128w(wc_wchar_t cc)
40
312k
{
41
312k
    cc.code = WC_GBK_N(cc.code);
42
312k
    if (cc.code < 0x4000)
43
235k
  cc.ccs = WC_CCS_GBK_EXT_1;
44
77.4k
    else {
45
77.4k
  cc.ccs = WC_CCS_GBK_EXT_2;
46
77.4k
  cc.code -= 0x4000;
47
77.4k
    }
48
312k
    cc.code = WC_N_CS128W(cc.code);
49
312k
    return cc;
50
312k
}
51
52
wc_wchar_t
53
wc_cs128w_to_gbk_ext(wc_wchar_t cc)
54
297k
{
55
297k
    cc.code = WC_CS128W_N(cc.code);
56
297k
    if (cc.ccs == WC_CCS_GBK_EXT_2)
57
68.8k
  cc.code += 0x4000;
58
297k
    cc.ccs = WC_CCS_GBK_EXT;
59
297k
    cc.code = WC_N_GBK(cc.code);
60
297k
    return cc;
61
297k
}
62
63
static wc_ccs
64
1.59M
wc_gbk_or_gbk_ext(wc_uint16 code) {
65
1.59M
    return wc_map3_range_search(code,
66
1.59M
        gbk_ext_ucs_map, N_gbk_ext_ucs_map)
67
1.59M
        ? WC_CCS_GBK_EXT : WC_CCS_GBK;
68
1.59M
}
69
70
#ifdef USE_UNICODE
71
wc_uint32
72
wc_gb18030_to_ucs(wc_wchar_t cc)
73
339k
{
74
339k
    wc_map3 *map;
75
76
339k
    switch (WC_CCS_SET(cc.ccs)) {
77
0
    case WC_CCS_GBK_EXT_1:
78
0
    case WC_CCS_GBK_EXT_2:
79
0
  cc = wc_cs128w_to_gbk_ext(cc);
80
277k
    case WC_CCS_GBK_EXT:
81
277k
  map = wc_map3_range_search((wc_uint16)cc.code,
82
277k
    gbk_ext_ucs_map, N_gbk_ext_ucs_map);
83
277k
  if (map)
84
277k
      return map->code3 + WC_GBK_N(cc.code) - WC_GBK_N(map->code2);
85
253
  return WC_C_UCS4_ERROR;
86
61.9k
    case WC_CCS_GB18030:
87
61.9k
  break;
88
0
    default:
89
0
  return wc_any_to_ucs(cc);
90
339k
    }
91
61.9k
    if (cc.code >= WC_C_GB18030_UCS2 && cc.code <= WC_C_GB18030_UCS2_END) {
92
8.63k
  int i, min = 0, max = N_ucs_gb18030_map - 1;
93
94
8.63k
  cc.code = WC_GB18030_N(cc.code) - WC_GB18030_N(WC_C_GB18030_UCS2);
95
8.63k
  if (cc.code >= ucs_gb18030_map[max].code3)
96
5.80k
      i = max;
97
2.82k
  else {
98
19.8k
      while(1) {
99
19.8k
    i = (min + max) / 2;
100
19.8k
    if (min == max)
101
195
        break;
102
19.6k
    if (cc.code < ucs_gb18030_map[i].code3)
103
13.5k
        max = i - 1;
104
6.09k
    else if (cc.code >= ucs_gb18030_map[i+1].code3)
105
3.46k
        min = i + 1;
106
2.63k
    else
107
2.63k
        break;
108
19.6k
      }
109
2.82k
  }
110
8.63k
  return ucs_gb18030_map[i].code + cc.code - ucs_gb18030_map[i].code3;
111
8.63k
    }
112
53.3k
    if (cc.code >= WC_C_GB18030_UCS4 && cc.code <= WC_C_GB18030_UCS4_END)
113
20.5k
  return WC_GB18030_N(cc.code) - WC_GB18030_N(WC_C_GB18030_UCS4)
114
20.5k
    + 0x10000;
115
32.7k
    return WC_C_UCS4_ERROR;
116
53.3k
}
117
118
wc_wchar_t
119
wc_ucs_to_gb18030(wc_uint32 ucs)
120
9.95M
{
121
9.95M
    wc_wchar_t cc;
122
9.95M
    wc_map3 *map;
123
124
9.95M
    if (ucs <= WC_C_UCS2_END) {
125
9.95M
  map = wc_map3_range_search((wc_uint16)ucs,
126
9.95M
    ucs_gbk_ext_map, N_ucs_gbk_ext_map);
127
9.95M
  if (map) {
128
1.35k
      cc.code = WC_GBK_N(map->code3) + ucs - map->code;
129
1.35k
      cc.code = WC_N_GBK(cc.code);
130
1.35k
      cc.ccs = WC_CCS_GBK_EXT;
131
1.35k
      return cc;
132
1.35k
  }
133
9.95M
  map = wc_map3_range_search((wc_uint16)ucs,
134
9.95M
    ucs_gb18030_map, N_ucs_gb18030_map);
135
9.95M
  if (map) {
136
9.95M
      cc.code = map->code3 + ucs - map->code + WC_GB18030_N(WC_C_GB18030_UCS2);
137
9.95M
      cc.code = WC_N_GB18030(cc.code);
138
9.95M
      if (WcOption.gb18030_as_ucs)
139
0
    cc.ccs = WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET);
140
9.95M
      else
141
9.95M
    cc.ccs = WC_CCS_GB18030_W;
142
9.95M
      return cc;
143
9.95M
  }
144
9.95M
    } else if (ucs <= WC_C_UNICODE_END) {
145
315
  cc.code = ucs - 0x10000 + WC_GB18030_N(WC_C_GB18030_UCS4);
146
315
  cc.code = WC_N_GB18030(cc.code);
147
315
  if (WcOption.gb18030_as_ucs)
148
0
      cc.ccs = WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET);
149
315
  else
150
315
      cc.ccs = WC_CCS_GB18030_W;
151
315
  return cc;
152
315
    }
153
4.27k
    cc.ccs = WC_CCS_UNKNOWN;
154
4.27k
    cc.code = 0;
155
4.27k
    return cc;
156
9.95M
}
157
#endif
158
159
Str
160
wc_conv_from_gb18030(Str is, wc_ces ces)
161
1.35k
{
162
1.35k
    Str os;
163
1.35k
    wc_uchar *sp = (wc_uchar *)is->ptr;
164
1.35k
    wc_uchar *ep = sp + is->length;
165
1.35k
    wc_uchar *p;
166
1.35k
    int state = WC_GB18030_NOSTATE;
167
1.35k
    wc_uint32 gbk;
168
1.35k
    wc_wchar_t cc;
169
1.35k
#ifdef USE_UNICODE
170
1.35k
    wc_uint32 ucs;
171
1.35k
#endif
172
173
2.02k
    for (p = sp; p < ep && *p < 0x80; p++) 
174
665
  ;
175
1.35k
    if (p == ep)
176
7
  return is;
177
1.35k
    os = Strnew_size(is->length);
178
1.35k
    if (p > sp)
179
82
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
180
181
6.07M
    for (; p < ep; p++) {
182
6.07M
  switch (state) {
183
4.13M
  case WC_GB18030_NOSTATE:
184
4.13M
      switch (WC_GB18030_MAP[*p]) {
185
1.81M
      case UB:
186
1.81M
    state = WC_GB18030_MBYTE1;
187
1.81M
    break;
188
546k
      case C1:
189
546k
    wtf_push_unknown(os, p, 1);
190
546k
    break;
191
1.77M
      default:
192
1.77M
    Strcat_char(os, (char)*p);
193
1.77M
    break;
194
4.13M
      }
195
4.13M
      break;
196
4.13M
  case WC_GB18030_MBYTE1:
197
1.80M
      if (WC_GB18030_MAP[*p] & LB) {
198
1.59M
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
199
1.59M
    if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
200
312k
        wtf_push(os, WC_CCS_GBK_EXT, gbk);
201
1.28M
    else if (*(p-1) >= 0xA1 && *p >= 0xA1)
202
850k
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
203
429k
    else
204
429k
        wtf_push(os, WC_CCS_GBK, gbk);
205
1.59M
      } else if (WC_GB18030_MAP[*p] == L4) {
206
70.3k
    state = WC_GB18030_MBYTE2;
207
70.3k
    break;
208
70.3k
      } else
209
145k
    wtf_push_unknown(os, p-1, 2);
210
1.73M
      state = WC_GB18030_NOSTATE;
211
1.73M
      break;
212
70.2k
  case WC_GB18030_MBYTE2:
213
70.2k
      if (WC_GB18030_MAP[*p] == UB) {
214
61.3k
    state = WC_GB18030_MBYTE3;
215
61.3k
    break;
216
61.3k
      } else
217
8.89k
    wtf_push_unknown(os, p-2, 3);
218
8.89k
      state = WC_GB18030_NOSTATE;
219
8.89k
      break;
220
61.3k
  case WC_GB18030_MBYTE3:
221
61.3k
      if (WC_GB18030_MAP[*p] == L4) {
222
53.1k
    cc.ccs = WC_CCS_GB18030_W;
223
53.1k
    cc.code = ((wc_uint32)*(p-3) << 24)
224
53.1k
            | ((wc_uint32)*(p-2) << 16)
225
53.1k
            | ((wc_uint32)*(p-1) << 8)
226
53.1k
            | *p;
227
53.1k
#ifdef USE_UNICODE
228
53.1k
    if (WcOption.gb18030_as_ucs &&
229
0
        (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
230
0
        wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
231
53.1k
    else
232
53.1k
#endif
233
53.1k
        wtf_push(os, cc.ccs, cc.code);
234
53.1k
      } else
235
8.24k
    wtf_push_unknown(os, p-3, 4);
236
61.3k
      state = WC_GB18030_NOSTATE;
237
61.3k
      break;
238
6.07M
  }
239
6.07M
    }
240
1.35k
    switch (state) {
241
590
    case WC_GB18030_MBYTE1:
242
590
  wtf_push_unknown(os, p-1, 1);
243
590
  break;
244
8
    case WC_GB18030_MBYTE2:
245
8
  wtf_push_unknown(os, p-2, 2);
246
8
  break;
247
5
    case WC_GB18030_MBYTE3:
248
5
  wtf_push_unknown(os, p-3, 3);
249
5
  break;
250
1.35k
    }
251
1.35k
    return os;
252
1.35k
}
253
254
void
255
wc_push_to_gb18030(Str os, wc_wchar_t cc, wc_status *st)
256
11.6M
{
257
22.6M
  while (1) {
258
22.6M
    switch (WC_CCS_SET(cc.ccs)) {
259
3.85k
    case WC_CCS_US_ASCII:
260
3.85k
  Strcat_char(os, (char)cc.code);
261
3.85k
  return;
262
884k
    case WC_CCS_GB_2312:
263
884k
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
264
884k
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
265
884k
  return;
266
123k
    case WC_CCS_GBK_1:
267
127k
    case WC_CCS_GBK_2:
268
127k
  cc = wc_cs128w_to_gbk(cc);
269
201k
    case WC_CCS_GBK:
270
201k
  Strcat_char(os, (char)(cc.code >> 8));
271
201k
  Strcat_char(os, (char)(cc.code & 0xff));
272
201k
  return;
273
207
    case WC_CCS_GBK_EXT_1:
274
701
    case WC_CCS_GBK_EXT_2:
275
701
  cc = wc_cs128w_to_gbk(cc);
276
22.5k
    case WC_CCS_GBK_EXT:
277
22.5k
  Strcat_char(os, (char)(cc.code >> 8));
278
22.5k
  Strcat_char(os, (char)(cc.code & 0xff));
279
22.5k
  return;
280
9.95M
    case WC_CCS_GB18030:
281
9.95M
  Strcat_char(os, (char)((cc.code >> 24) & 0xff));
282
9.95M
  Strcat_char(os, (char)((cc.code >> 16) & 0xff));
283
9.95M
  Strcat_char(os, (char)((cc.code >> 8)  & 0xff));
284
9.95M
  Strcat_char(os, (char)(cc.code & 0xff));
285
9.95M
  return;
286
7.56k
    case WC_CCS_UNKNOWN_W:
287
7.56k
  if (!WcOption.no_replace)
288
7.56k
      Strcat_charp(os, WC_REPLACE_W);
289
7.56k
  return;
290
629k
    case WC_CCS_UNKNOWN:
291
629k
  if (!WcOption.no_replace)
292
629k
      Strcat_charp(os, WC_REPLACE);
293
629k
  return;
294
10.9M
    default:
295
10.9M
#ifdef USE_UNICODE
296
10.9M
  if (WcOption.ucs_conv)
297
10.9M
      cc = wc_any_to_any_ces(cc, st);
298
0
  else
299
0
#endif
300
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
301
10.9M
  continue;
302
22.6M
    }
303
22.6M
  }
304
11.6M
}
305
306
Str
307
wc_char_conv_from_gb18030(wc_uchar c, wc_status *st)
308
0
{
309
0
    static Str os;
310
0
    static wc_uchar gb[4];
311
0
    wc_uint32 gbk;
312
0
    wc_wchar_t cc;
313
0
#ifdef USE_UNICODE
314
0
    wc_uint32 ucs;
315
0
#endif
316
317
0
    if (st->state == -1) {
318
0
  st->state = WC_GB18030_NOSTATE;
319
0
  os = Strnew_size(8);
320
0
    }
321
322
0
    switch (st->state) {
323
0
    case WC_GB18030_NOSTATE:
324
0
  switch (WC_GB18030_MAP[c]) {
325
0
  case UB:
326
0
      gb[0] = c;
327
0
      st->state = WC_GB18030_MBYTE1;
328
0
      return NULL;
329
0
  case C1:
330
0
      break;
331
0
  default:
332
0
      Strcat_char(os, (char)c);
333
0
      break;
334
0
  }
335
0
  break;
336
0
    case WC_GB18030_MBYTE1:
337
0
  if (WC_GB18030_MAP[c] & LB) {
338
0
      gbk = ((wc_uint32)gb[0] << 8) | c;
339
0
      if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
340
0
    wtf_push(os, WC_CCS_GBK_EXT, gbk);
341
0
      else if (gb[0] >= 0xA1 && c >= 0xA1)
342
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
343
0
      else
344
0
    wtf_push(os, WC_CCS_GBK, gbk);
345
0
  } else if (WC_GB18030_MAP[c] == L4) {
346
0
      gb[1] = c;
347
0
      st->state = WC_GB18030_MBYTE2;
348
0
      return NULL;
349
0
  }
350
0
  break;
351
0
    case WC_GB18030_MBYTE2:
352
0
  if (WC_GB18030_MAP[c] == UB) {
353
0
      gb[2] = c;
354
0
      st->state = WC_GB18030_MBYTE3;
355
0
      return NULL;
356
0
  }
357
0
  break;
358
0
    case WC_GB18030_MBYTE3:
359
0
  if (WC_GB18030_MAP[c] == L4) {
360
0
      cc.ccs = WC_CCS_GB18030_W;
361
0
      cc.code = ((wc_uint32)gb[0] << 24)
362
0
        | ((wc_uint32)gb[1] << 16)
363
0
        | ((wc_uint32)gb[2] << 8)
364
0
        | c;
365
0
#ifdef USE_UNICODE
366
0
      if (WcOption.gb18030_as_ucs &&
367
0
    (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
368
0
    wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
369
0
      else
370
0
#endif
371
0
          wtf_push(os, cc.ccs, cc.code);
372
0
  }
373
0
  break;
374
0
    }
375
0
    st->state = -1;
376
0
    return os;
377
0
}