Coverage Report

Created: 2026-04-12 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/gbk.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "gbk.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
10
#include "map/gb2312_gbk.map"
11
12
#define C0 WC_GBK_MAP_C0
13
#define GL WC_GBK_MAP_GL
14
124k
#define C1 WC_GBK_MAP_C1
15
2.32M
#define LB WC_GBK_MAP_LB
16
2.32M
#define UB WC_GBK_MAP_UB
17
246k
#define C80 WC_GBK_MAP_80
18
19
wc_uint8 WC_GBK_MAP[ 0x100 ] = {
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
22
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
23
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
27
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
28
29
    C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
36
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
37
};
38
39
wc_ccs
40
2.28M
wc_gb2312_or_gbk(wc_uint16 code) {
41
2.28M
    return wc_map_range_search(code,
42
2.28M
  gb2312_gbk_map, N_gb2312_gbk_map)
43
2.28M
  ? WC_CCS_GBK : WC_CCS_GB_2312;
44
2.28M
}
45
46
wc_wchar_t
47
wc_gbk_to_cs128w(wc_wchar_t cc)
48
8.55M
{
49
8.55M
    cc.code = WC_GBK_N(cc.code);
50
8.55M
    if (cc.code < 0x4000)
51
8.24M
  cc.ccs = WC_CCS_GBK_1;
52
307k
    else {
53
307k
  cc.ccs = WC_CCS_GBK_2;
54
307k
  cc.code -= 0x4000;
55
307k
    }
56
8.55M
    cc.code = WC_N_CS128W(cc.code);
57
8.55M
    return cc;
58
8.55M
}
59
60
wc_wchar_t
61
wc_cs128w_to_gbk(wc_wchar_t cc)
62
8.46M
{
63
8.46M
    cc.code = WC_CS128W_N(cc.code);
64
8.46M
    if (cc.ccs == WC_CCS_GBK_2)
65
302k
  cc.code += 0x4000;
66
8.46M
    cc.ccs = WC_CCS_GBK;
67
8.46M
    cc.code = WC_N_GBK(cc.code);
68
8.46M
    return cc;
69
8.46M
}
70
71
wc_uint32
72
wc_gbk_to_N(wc_uint32 c)
73
3.57M
{
74
3.57M
    if (c <= 0xA1A0)  /* 0x8140 - 0xA1A0 */
75
2.63M
  return WC_GBK_N(c);
76
933k
    if (c <= 0xA2AA)  /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
77
13.7k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
78
919k
    if (c <= 0xA6A0)  /* 0xA240 - 0xA6A0 */
79
1.24k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
80
918k
    if (c <= 0xA6F5)  /* 0xA6E0 - 0xA6F5 */
81
1.46k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
82
917k
    if (c <= 0xA8A0)  /* 0xA7A0 - 0xA8A0 */
83
5.26k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
84
911k
    if (c <= 0xA8C0)  /* 0xA8BB - 0xA8C0 */
85
1.10k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
86
      /* 0xA940 - 0xFEA0 */
87
910k
    return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
88
911k
}
89
90
Str
91
wc_conv_from_gbk(Str is, wc_ces ces)
92
272
{
93
272
    Str os;
94
272
    wc_uchar *sp = (wc_uchar *)is->ptr;
95
272
    wc_uchar *ep = sp + is->length;
96
272
    wc_uchar *p;
97
272
    int state = WC_GBK_NOSTATE;
98
272
    wc_uint32 gbk;
99
100
613
    for (p = sp; p < ep && *p < 0x80; p++) 
101
341
  ;
102
272
    if (p == ep)
103
25
  return is;
104
247
    os = Strnew_size(is->length);
105
247
    if (p > sp)
106
23
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
107
108
5.69M
    for (; p < ep; p++) {
109
5.69M
  switch (state) {
110
3.37M
  case WC_GBK_NOSTATE:
111
3.37M
      switch (WC_GBK_MAP[*p]) {
112
2.32M
      case UB:
113
2.32M
    state = WC_GBK_MBYTE1;
114
2.32M
    break;
115
246k
      case C80:
116
246k
    wtf_push(os, WC_CCS_GBK_80, *p);
117
246k
    break;
118
124k
      case C1:
119
124k
    wtf_push_unknown(os, p, 1);
120
124k
    break;
121
687k
      default:
122
687k
    Strcat_char(os, (char)*p);
123
687k
    break;
124
3.37M
      }
125
3.37M
      break;
126
3.37M
  case WC_GBK_MBYTE1:
127
2.32M
      if (WC_GBK_MAP[*p] & LB) {
128
2.26M
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
129
2.26M
    if (*(p-1) >= 0xA1 && *p >= 0xA1)
130
299k
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
131
1.96M
    else
132
1.96M
        wtf_push(os, WC_CCS_GBK, gbk);
133
2.26M
      } else
134
51.9k
    wtf_push_unknown(os, p-1, 2);
135
2.32M
      state = WC_GBK_NOSTATE;
136
2.32M
      break;
137
5.69M
  }
138
5.69M
    }
139
247
    switch (state) {
140
31
    case WC_GBK_MBYTE1:
141
31
  wtf_push_unknown(os, p-1, 1);
142
31
  break;
143
247
    }
144
247
    return os;
145
247
}
146
147
void
148
wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
149
12.4M
{
150
22.7M
  while (1) {
151
22.7M
    switch (cc.ccs) {
152
2.52M
    case WC_CCS_US_ASCII:
153
2.52M
  Strcat_char(os, (char)cc.code);
154
2.52M
  return;
155
2.78M
    case WC_CCS_GB_2312:
156
2.78M
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
157
2.78M
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
158
2.78M
  return;
159
2.23k
    case WC_CCS_GBK_80:
160
2.23k
  Strcat_char(os, (char)(cc.code | 0x80));
161
2.23k
  return;
162
4.33M
    case WC_CCS_GBK_1:
163
4.34M
    case WC_CCS_GBK_2:
164
4.34M
  cc = wc_cs128w_to_gbk(cc);
165
4.72M
    case WC_CCS_GBK:
166
4.72M
  Strcat_char(os, (char)(cc.code >> 8));
167
4.72M
  Strcat_char(os, (char)(cc.code & 0xff));
168
4.72M
  return;
169
369k
    case WC_CCS_UNKNOWN_W:
170
369k
  if (!WcOption.no_replace)
171
369k
      Strcat_charp(os, WC_REPLACE_W);
172
369k
  return;
173
2.03M
    case WC_CCS_UNKNOWN:
174
2.03M
  if (!WcOption.no_replace)
175
2.03M
      Strcat_charp(os, WC_REPLACE);
176
2.03M
  return;
177
10.2M
    default:
178
10.2M
#ifdef USE_UNICODE
179
10.2M
  if (WcOption.ucs_conv)
180
10.2M
      cc = wc_any_to_any_ces(cc, st);
181
0
  else
182
0
#endif
183
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
184
10.2M
  continue;
185
22.7M
    }
186
22.7M
  }
187
12.4M
}
188
189
Str
190
wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
191
0
{
192
0
    static Str os;
193
0
    static wc_uchar gbku;
194
0
    wc_uint32 gbk;
195
196
0
    if (st->state == -1) {
197
0
  st->state = WC_GBK_NOSTATE;
198
0
  os = Strnew_size(8);
199
0
    }
200
201
0
    switch (st->state) {
202
0
    case WC_GBK_NOSTATE:
203
0
  switch (WC_GBK_MAP[c]) {
204
0
  case UB:
205
0
      gbku = c;
206
0
      st->state = WC_GBK_MBYTE1;
207
0
      return NULL;
208
0
  case C80:
209
0
      wtf_push(os, WC_CCS_GBK_80, c);
210
0
      break;
211
0
  case C1:
212
0
      break;
213
0
  default:
214
0
      Strcat_char(os, (char)c);
215
0
      break;
216
0
  }
217
0
  break;
218
0
    case WC_GBK_MBYTE1:
219
0
  if (WC_GBK_MAP[c] & LB) {
220
0
      gbk = ((wc_uint32)gbku << 8) | c;
221
0
      if (gbku >= 0xA1 && c >= 0xA1)
222
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
223
0
      else
224
0
    wtf_push(os, WC_CCS_GBK, gbk);
225
0
  }
226
0
  break;
227
0
    }
228
0
    st->state = -1;
229
0
    return os;
230
0
}