Coverage Report

Created: 2026-03-13 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/gbk.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "gbk.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
10
#include "map/gb2312_gbk.map"
11
12
#define C0 WC_GBK_MAP_C0
13
#define GL WC_GBK_MAP_GL
14
131k
#define C1 WC_GBK_MAP_C1
15
2.95M
#define LB WC_GBK_MAP_LB
16
2.95M
#define UB WC_GBK_MAP_UB
17
465k
#define C80 WC_GBK_MAP_80
18
19
wc_uint8 WC_GBK_MAP[ 0x100 ] = {
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
22
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
23
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
27
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
28
29
    C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
36
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
37
};
38
39
wc_ccs
40
2.29M
wc_gb2312_or_gbk(wc_uint16 code) {
41
2.29M
    return wc_map_range_search(code,
42
2.29M
  gb2312_gbk_map, N_gb2312_gbk_map)
43
2.29M
  ? WC_CCS_GBK : WC_CCS_GB_2312;
44
2.29M
}
45
46
wc_wchar_t
47
wc_gbk_to_cs128w(wc_wchar_t cc)
48
8.14M
{
49
8.14M
    cc.code = WC_GBK_N(cc.code);
50
8.14M
    if (cc.code < 0x4000)
51
7.83M
  cc.ccs = WC_CCS_GBK_1;
52
309k
    else {
53
309k
  cc.ccs = WC_CCS_GBK_2;
54
309k
  cc.code -= 0x4000;
55
309k
    }
56
8.14M
    cc.code = WC_N_CS128W(cc.code);
57
8.14M
    return cc;
58
8.14M
}
59
60
wc_wchar_t
61
wc_cs128w_to_gbk(wc_wchar_t cc)
62
8.08M
{
63
8.08M
    cc.code = WC_CS128W_N(cc.code);
64
8.08M
    if (cc.ccs == WC_CCS_GBK_2)
65
304k
  cc.code += 0x4000;
66
8.08M
    cc.ccs = WC_CCS_GBK;
67
8.08M
    cc.code = WC_N_GBK(cc.code);
68
8.08M
    return cc;
69
8.08M
}
70
71
wc_uint32
72
wc_gbk_to_N(wc_uint32 c)
73
4.14M
{
74
4.14M
    if (c <= 0xA1A0)  /* 0x8140 - 0xA1A0 */
75
3.63M
  return WC_GBK_N(c);
76
508k
    if (c <= 0xA2AA)  /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
77
21.8k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
78
486k
    if (c <= 0xA6A0)  /* 0xA240 - 0xA6A0 */
79
884
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
80
485k
    if (c <= 0xA6F5)  /* 0xA6E0 - 0xA6F5 */
81
1.43k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
82
483k
    if (c <= 0xA8A0)  /* 0xA7A0 - 0xA8A0 */
83
5.26k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
84
478k
    if (c <= 0xA8C0)  /* 0xA8BB - 0xA8C0 */
85
721
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
86
      /* 0xA940 - 0xFEA0 */
87
477k
    return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
88
478k
}
89
90
Str
91
wc_conv_from_gbk(Str is, wc_ces ces)
92
244
{
93
244
    Str os;
94
244
    wc_uchar *sp = (wc_uchar *)is->ptr;
95
244
    wc_uchar *ep = sp + is->length;
96
244
    wc_uchar *p;
97
244
    int state = WC_GBK_NOSTATE;
98
244
    wc_uint32 gbk;
99
100
569
    for (p = sp; p < ep && *p < 0x80; p++) 
101
325
  ;
102
244
    if (p == ep)
103
22
  return is;
104
222
    os = Strnew_size(is->length);
105
222
    if (p > sp)
106
24
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
107
108
7.48M
    for (; p < ep; p++) {
109
7.48M
  switch (state) {
110
4.52M
  case WC_GBK_NOSTATE:
111
4.52M
      switch (WC_GBK_MAP[*p]) {
112
2.95M
      case UB:
113
2.95M
    state = WC_GBK_MBYTE1;
114
2.95M
    break;
115
465k
      case C80:
116
465k
    wtf_push(os, WC_CCS_GBK_80, *p);
117
465k
    break;
118
131k
      case C1:
119
131k
    wtf_push_unknown(os, p, 1);
120
131k
    break;
121
978k
      default:
122
978k
    Strcat_char(os, (char)*p);
123
978k
    break;
124
4.52M
      }
125
4.52M
      break;
126
4.52M
  case WC_GBK_MBYTE1:
127
2.95M
      if (WC_GBK_MAP[*p] & LB) {
128
2.88M
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
129
2.88M
    if (*(p-1) >= 0xA1 && *p >= 0xA1)
130
699k
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
131
2.18M
    else
132
2.18M
        wtf_push(os, WC_CCS_GBK, gbk);
133
2.88M
      } else
134
69.9k
    wtf_push_unknown(os, p-1, 2);
135
2.95M
      state = WC_GBK_NOSTATE;
136
2.95M
      break;
137
7.48M
  }
138
7.48M
    }
139
222
    switch (state) {
140
31
    case WC_GBK_MBYTE1:
141
31
  wtf_push_unknown(os, p-1, 1);
142
31
  break;
143
222
    }
144
222
    return os;
145
222
}
146
147
void
148
wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
149
12.4M
{
150
21.6M
  while (1) {
151
21.6M
    switch (cc.ccs) {
152
1.50M
    case WC_CCS_US_ASCII:
153
1.50M
  Strcat_char(os, (char)cc.code);
154
1.50M
  return;
155
3.43M
    case WC_CCS_GB_2312:
156
3.43M
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
157
3.43M
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
158
3.43M
  return;
159
14.6k
    case WC_CCS_GBK_80:
160
14.6k
  Strcat_char(os, (char)(cc.code | 0x80));
161
14.6k
  return;
162
2.86M
    case WC_CCS_GBK_1:
163
2.89M
    case WC_CCS_GBK_2:
164
2.89M
  cc = wc_cs128w_to_gbk(cc);
165
3.76M
    case WC_CCS_GBK:
166
3.76M
  Strcat_char(os, (char)(cc.code >> 8));
167
3.76M
  Strcat_char(os, (char)(cc.code & 0xff));
168
3.76M
  return;
169
903k
    case WC_CCS_UNKNOWN_W:
170
903k
  if (!WcOption.no_replace)
171
903k
      Strcat_charp(os, WC_REPLACE_W);
172
903k
  return;
173
2.83M
    case WC_CCS_UNKNOWN:
174
2.83M
  if (!WcOption.no_replace)
175
2.83M
      Strcat_charp(os, WC_REPLACE);
176
2.83M
  return;
177
9.19M
    default:
178
9.19M
#ifdef USE_UNICODE
179
9.19M
  if (WcOption.ucs_conv)
180
9.19M
      cc = wc_any_to_any_ces(cc, st);
181
0
  else
182
0
#endif
183
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
184
9.19M
  continue;
185
21.6M
    }
186
21.6M
  }
187
12.4M
}
188
189
Str
190
wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
191
0
{
192
0
    static Str os;
193
0
    static wc_uchar gbku;
194
0
    wc_uint32 gbk;
195
196
0
    if (st->state == -1) {
197
0
  st->state = WC_GBK_NOSTATE;
198
0
  os = Strnew_size(8);
199
0
    }
200
201
0
    switch (st->state) {
202
0
    case WC_GBK_NOSTATE:
203
0
  switch (WC_GBK_MAP[c]) {
204
0
  case UB:
205
0
      gbku = c;
206
0
      st->state = WC_GBK_MBYTE1;
207
0
      return NULL;
208
0
  case C80:
209
0
      wtf_push(os, WC_CCS_GBK_80, c);
210
0
      break;
211
0
  case C1:
212
0
      break;
213
0
  default:
214
0
      Strcat_char(os, (char)c);
215
0
      break;
216
0
  }
217
0
  break;
218
0
    case WC_GBK_MBYTE1:
219
0
  if (WC_GBK_MAP[c] & LB) {
220
0
      gbk = ((wc_uint32)gbku << 8) | c;
221
0
      if (gbku >= 0xA1 && c >= 0xA1)
222
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
223
0
      else
224
0
    wtf_push(os, WC_CCS_GBK, gbk);
225
0
  }
226
0
  break;
227
0
    }
228
0
    st->state = -1;
229
0
    return os;
230
0
}