Coverage Report

Created: 2025-11-24 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/gbk.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "gbk.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
10
#include "map/gb2312_gbk.map"
11
12
#define C0 WC_GBK_MAP_C0
13
#define GL WC_GBK_MAP_GL
14
75.0k
#define C1 WC_GBK_MAP_C1
15
1.45M
#define LB WC_GBK_MAP_LB
16
1.45M
#define UB WC_GBK_MAP_UB
17
449k
#define C80 WC_GBK_MAP_80
18
19
wc_uint8 WC_GBK_MAP[ 0x100 ] = {
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
22
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
23
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
27
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
28
29
    C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
36
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
37
};
38
39
wc_ccs
40
1.11M
wc_gb2312_or_gbk(wc_uint16 code) {
41
1.11M
    return wc_map_range_search(code,
42
1.11M
  gb2312_gbk_map, N_gb2312_gbk_map)
43
1.11M
  ? WC_CCS_GBK : WC_CCS_GB_2312;
44
1.11M
}
45
46
wc_wchar_t
47
wc_gbk_to_cs128w(wc_wchar_t cc)
48
3.95M
{
49
3.95M
    cc.code = WC_GBK_N(cc.code);
50
3.95M
    if (cc.code < 0x4000)
51
3.91M
  cc.ccs = WC_CCS_GBK_1;
52
43.2k
    else {
53
43.2k
  cc.ccs = WC_CCS_GBK_2;
54
43.2k
  cc.code -= 0x4000;
55
43.2k
    }
56
3.95M
    cc.code = WC_N_CS128W(cc.code);
57
3.95M
    return cc;
58
3.95M
}
59
60
wc_wchar_t
61
wc_cs128w_to_gbk(wc_wchar_t cc)
62
3.94M
{
63
3.94M
    cc.code = WC_CS128W_N(cc.code);
64
3.94M
    if (cc.ccs == WC_CCS_GBK_2)
65
42.1k
  cc.code += 0x4000;
66
3.94M
    cc.ccs = WC_CCS_GBK;
67
3.94M
    cc.code = WC_N_GBK(cc.code);
68
3.94M
    return cc;
69
3.94M
}
70
71
wc_uint32
72
wc_gbk_to_N(wc_uint32 c)
73
803k
{
74
803k
    if (c <= 0xA1A0)  /* 0x8140 - 0xA1A0 */
75
715k
  return WC_GBK_N(c);
76
87.9k
    if (c <= 0xA2AA)  /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
77
21.3k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
78
66.6k
    if (c <= 0xA6A0)  /* 0xA240 - 0xA6A0 */
79
1.20k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
80
65.4k
    if (c <= 0xA6F5)  /* 0xA6E0 - 0xA6F5 */
81
595
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
82
64.8k
    if (c <= 0xA8A0)  /* 0xA7A0 - 0xA8A0 */
83
722
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
84
64.1k
    if (c <= 0xA8C0)  /* 0xA8BB - 0xA8C0 */
85
418
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
86
      /* 0xA940 - 0xFEA0 */
87
63.6k
    return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
88
64.1k
}
89
90
Str
91
wc_conv_from_gbk(Str is, wc_ces ces)
92
285
{
93
285
    Str os;
94
285
    wc_uchar *sp = (wc_uchar *)is->ptr;
95
285
    wc_uchar *ep = sp + is->length;
96
285
    wc_uchar *p;
97
285
    int state = WC_GBK_NOSTATE;
98
285
    wc_uint32 gbk;
99
100
43.0k
    for (p = sp; p < ep && *p < 0x80; p++) 
101
42.7k
  ;
102
285
    if (p == ep)
103
27
  return is;
104
258
    os = Strnew_size(is->length);
105
258
    if (p > sp)
106
27
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
107
108
4.39M
    for (; p < ep; p++) {
109
4.39M
  switch (state) {
110
2.93M
  case WC_GBK_NOSTATE:
111
2.93M
      switch (WC_GBK_MAP[*p]) {
112
1.45M
      case UB:
113
1.45M
    state = WC_GBK_MBYTE1;
114
1.45M
    break;
115
449k
      case C80:
116
449k
    wtf_push(os, WC_CCS_GBK_80, *p);
117
449k
    break;
118
75.0k
      case C1:
119
75.0k
    wtf_push_unknown(os, p, 1);
120
75.0k
    break;
121
948k
      default:
122
948k
    Strcat_char(os, (char)*p);
123
948k
    break;
124
2.93M
      }
125
2.93M
      break;
126
2.93M
  case WC_GBK_MBYTE1:
127
1.45M
      if (WC_GBK_MAP[*p] & LB) {
128
973k
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
129
973k
    if (*(p-1) >= 0xA1 && *p >= 0xA1)
130
190k
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
131
783k
    else
132
783k
        wtf_push(os, WC_CCS_GBK, gbk);
133
973k
      } else
134
484k
    wtf_push_unknown(os, p-1, 2);
135
1.45M
      state = WC_GBK_NOSTATE;
136
1.45M
      break;
137
4.39M
  }
138
4.39M
    }
139
258
    switch (state) {
140
33
    case WC_GBK_MBYTE1:
141
33
  wtf_push_unknown(os, p-1, 1);
142
33
  break;
143
258
    }
144
258
    return os;
145
258
}
146
147
void
148
wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
149
6.51M
{
150
12.8M
  while (1) {
151
12.8M
    switch (cc.ccs) {
152
1.57M
    case WC_CCS_US_ASCII:
153
1.57M
  Strcat_char(os, (char)cc.code);
154
1.57M
  return;
155
1.31M
    case WC_CCS_GB_2312:
156
1.31M
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
157
1.31M
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
158
1.31M
  return;
159
5.13k
    case WC_CCS_GBK_80:
160
5.13k
  Strcat_char(os, (char)(cc.code | 0x80));
161
5.13k
  return;
162
2.91M
    case WC_CCS_GBK_1:
163
2.92M
    case WC_CCS_GBK_2:
164
2.92M
  cc = wc_cs128w_to_gbk(cc);
165
2.95M
    case WC_CCS_GBK:
166
2.95M
  Strcat_char(os, (char)(cc.code >> 8));
167
2.95M
  Strcat_char(os, (char)(cc.code & 0xff));
168
2.95M
  return;
169
487k
    case WC_CCS_UNKNOWN_W:
170
487k
  if (!WcOption.no_replace)
171
487k
      Strcat_charp(os, WC_REPLACE_W);
172
487k
  return;
173
182k
    case WC_CCS_UNKNOWN:
174
182k
  if (!WcOption.no_replace)
175
182k
      Strcat_charp(os, WC_REPLACE);
176
182k
  return;
177
6.31M
    default:
178
6.31M
#ifdef USE_UNICODE
179
6.31M
  if (WcOption.ucs_conv)
180
6.31M
      cc = wc_any_to_any_ces(cc, st);
181
0
  else
182
0
#endif
183
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
184
6.31M
  continue;
185
12.8M
    }
186
12.8M
  }
187
6.51M
}
188
189
Str
190
wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
191
0
{
192
0
    static Str os;
193
0
    static wc_uchar gbku;
194
0
    wc_uint32 gbk;
195
196
0
    if (st->state == -1) {
197
0
  st->state = WC_GBK_NOSTATE;
198
0
  os = Strnew_size(8);
199
0
    }
200
201
0
    switch (st->state) {
202
0
    case WC_GBK_NOSTATE:
203
0
  switch (WC_GBK_MAP[c]) {
204
0
  case UB:
205
0
      gbku = c;
206
0
      st->state = WC_GBK_MBYTE1;
207
0
      return NULL;
208
0
  case C80:
209
0
      wtf_push(os, WC_CCS_GBK_80, c);
210
0
      break;
211
0
  case C1:
212
0
      break;
213
0
  default:
214
0
      Strcat_char(os, (char)c);
215
0
      break;
216
0
  }
217
0
  break;
218
0
    case WC_GBK_MBYTE1:
219
0
  if (WC_GBK_MAP[c] & LB) {
220
0
      gbk = ((wc_uint32)gbku << 8) | c;
221
0
      if (gbku >= 0xA1 && c >= 0xA1)
222
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
223
0
      else
224
0
    wtf_push(os, WC_CCS_GBK, gbk);
225
0
  }
226
0
  break;
227
0
    }
228
0
    st->state = -1;
229
0
    return os;
230
0
}