Coverage Report

Created: 2025-07-09 06:28

/src/w3m/libwc/gbk.c
Line
Count
Source (jump to first uncovered line)
1
2
#include "wc.h"
3
#include "gbk.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
10
#include "map/gb2312_gbk.map"
11
12
#define C0 WC_GBK_MAP_C0
13
#define GL WC_GBK_MAP_GL
14
86.3k
#define C1 WC_GBK_MAP_C1
15
1.25M
#define LB WC_GBK_MAP_LB
16
1.25M
#define UB WC_GBK_MAP_UB
17
61.3k
#define C80 WC_GBK_MAP_80
18
19
wc_uint8 WC_GBK_MAP[ 0x100 ] = {
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
22
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
23
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
27
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
28
29
    C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
36
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
37
};
38
39
wc_ccs
40
979k
wc_gb2312_or_gbk(wc_uint16 code) {
41
979k
    return wc_map_range_search(code,
42
979k
  gb2312_gbk_map, N_gb2312_gbk_map)
43
979k
  ? WC_CCS_GBK : WC_CCS_GB_2312;
44
979k
}
45
46
wc_wchar_t
47
wc_gbk_to_cs128w(wc_wchar_t cc)
48
1.08M
{
49
1.08M
    cc.code = WC_GBK_N(cc.code);
50
1.08M
    if (cc.code < 0x4000)
51
1.04M
  cc.ccs = WC_CCS_GBK_1;
52
40.8k
    else {
53
40.8k
  cc.ccs = WC_CCS_GBK_2;
54
40.8k
  cc.code -= 0x4000;
55
40.8k
    }
56
1.08M
    cc.code = WC_N_CS128W(cc.code);
57
1.08M
    return cc;
58
1.08M
}
59
60
wc_wchar_t
61
wc_cs128w_to_gbk(wc_wchar_t cc)
62
1.07M
{
63
1.07M
    cc.code = WC_CS128W_N(cc.code);
64
1.07M
    if (cc.ccs == WC_CCS_GBK_2)
65
39.6k
  cc.code += 0x4000;
66
1.07M
    cc.ccs = WC_CCS_GBK;
67
1.07M
    cc.code = WC_N_GBK(cc.code);
68
1.07M
    return cc;
69
1.07M
}
70
71
wc_uint32
72
wc_gbk_to_N(wc_uint32 c)
73
102k
{
74
102k
    if (c <= 0xA1A0)  /* 0x8140 - 0xA1A0 */
75
49.9k
  return WC_GBK_N(c);
76
52.8k
    if (c <= 0xA2AA)  /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
77
1.06k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
78
51.7k
    if (c <= 0xA6A0)  /* 0xA240 - 0xA6A0 */
79
1.78k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
80
49.9k
    if (c <= 0xA6F5)  /* 0xA6E0 - 0xA6F5 */
81
1.06k
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
82
48.9k
    if (c <= 0xA8A0)  /* 0xA7A0 - 0xA8A0 */
83
944
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
84
47.9k
    if (c <= 0xA8C0)  /* 0xA8BB - 0xA8C0 */
85
363
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
86
      /* 0xA940 - 0xFEA0 */
87
47.6k
    return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
88
47.9k
}
89
90
Str
91
wc_conv_from_gbk(Str is, wc_ces ces)
92
256
{
93
256
    Str os;
94
256
    wc_uchar *sp = (wc_uchar *)is->ptr;
95
256
    wc_uchar *ep = sp + is->length;
96
256
    wc_uchar *p;
97
256
    int state = WC_GBK_NOSTATE;
98
256
    wc_uint32 gbk;
99
100
18.0k
    for (p = sp; p < ep && *p < 0x80; p++) 
101
17.7k
  ;
102
256
    if (p == ep)
103
25
  return is;
104
231
    os = Strnew_size(is->length);
105
231
    if (p > sp)
106
20
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
107
108
3.20M
    for (; p < ep; p++) {
109
3.20M
  switch (state) {
110
1.94M
  case WC_GBK_NOSTATE:
111
1.94M
      switch (WC_GBK_MAP[*p]) {
112
1.25M
      case UB:
113
1.25M
    state = WC_GBK_MBYTE1;
114
1.25M
    break;
115
61.3k
      case C80:
116
61.3k
    wtf_push(os, WC_CCS_GBK_80, *p);
117
61.3k
    break;
118
86.3k
      case C1:
119
86.3k
    wtf_push_unknown(os, p, 1);
120
86.3k
    break;
121
541k
      default:
122
541k
    Strcat_char(os, (char)*p);
123
541k
    break;
124
1.94M
      }
125
1.94M
      break;
126
1.94M
  case WC_GBK_MBYTE1:
127
1.25M
      if (WC_GBK_MAP[*p] & LB) {
128
992k
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
129
992k
    if (*(p-1) >= 0xA1 && *p >= 0xA1)
130
857k
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
131
134k
    else
132
134k
        wtf_push(os, WC_CCS_GBK, gbk);
133
992k
      } else
134
263k
    wtf_push_unknown(os, p-1, 2);
135
1.25M
      state = WC_GBK_NOSTATE;
136
1.25M
      break;
137
3.20M
  }
138
3.20M
    }
139
231
    switch (state) {
140
26
    case WC_GBK_MBYTE1:
141
26
  wtf_push_unknown(os, p-1, 1);
142
26
  break;
143
231
    }
144
231
    return os;
145
231
}
146
147
void
148
wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
149
7.52M
{
150
14.4M
  while (1) {
151
14.4M
    switch (cc.ccs) {
152
9.80k
    case WC_CCS_US_ASCII:
153
9.80k
  Strcat_char(os, (char)cc.code);
154
9.80k
  return;
155
1.97M
    case WC_CCS_GB_2312:
156
1.97M
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
157
1.97M
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
158
1.97M
  return;
159
166k
    case WC_CCS_GBK_80:
160
166k
  Strcat_char(os, (char)(cc.code | 0x80));
161
166k
  return;
162
853k
    case WC_CCS_GBK_1:
163
858k
    case WC_CCS_GBK_2:
164
858k
  cc = wc_cs128w_to_gbk(cc);
165
906k
    case WC_CCS_GBK:
166
906k
  Strcat_char(os, (char)(cc.code >> 8));
167
906k
  Strcat_char(os, (char)(cc.code & 0xff));
168
906k
  return;
169
3.69M
    case WC_CCS_UNKNOWN_W:
170
3.69M
  if (!WcOption.no_replace)
171
3.69M
      Strcat_charp(os, WC_REPLACE_W);
172
3.69M
  return;
173
779k
    case WC_CCS_UNKNOWN:
174
779k
  if (!WcOption.no_replace)
175
779k
      Strcat_charp(os, WC_REPLACE);
176
779k
  return;
177
6.88M
    default:
178
6.88M
#ifdef USE_UNICODE
179
6.88M
  if (WcOption.ucs_conv)
180
6.88M
      cc = wc_any_to_any_ces(cc, st);
181
0
  else
182
0
#endif
183
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
184
6.88M
  continue;
185
14.4M
    }
186
14.4M
  }
187
7.52M
}
188
189
Str
190
wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
191
0
{
192
0
    static Str os;
193
0
    static wc_uchar gbku;
194
0
    wc_uint32 gbk;
195
196
0
    if (st->state == -1) {
197
0
  st->state = WC_GBK_NOSTATE;
198
0
  os = Strnew_size(8);
199
0
    }
200
201
0
    switch (st->state) {
202
0
    case WC_GBK_NOSTATE:
203
0
  switch (WC_GBK_MAP[c]) {
204
0
  case UB:
205
0
      gbku = c;
206
0
      st->state = WC_GBK_MBYTE1;
207
0
      return NULL;
208
0
  case C80:
209
0
      wtf_push(os, WC_CCS_GBK_80, c);
210
0
      break;
211
0
  case C1:
212
0
      break;
213
0
  default:
214
0
      Strcat_char(os, (char)c);
215
0
      break;
216
0
  }
217
0
  break;
218
0
    case WC_GBK_MBYTE1:
219
0
  if (WC_GBK_MAP[c] & LB) {
220
0
      gbk = ((wc_uint32)gbku << 8) | c;
221
0
      if (gbku >= 0xA1 && c >= 0xA1)
222
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
223
0
      else
224
0
    wtf_push(os, WC_CCS_GBK, gbk);
225
0
  }
226
0
  break;
227
0
    }
228
0
    st->state = -1;
229
0
    return os;
230
0
}