Coverage Report

Created: 2023-09-25 06:56

/src/w3m/libwc/gbk.c
Line
Count
Source (jump to first uncovered line)
1
2
#include "wc.h"
3
#include "gbk.h"
4
#include "search.h"
5
#include "wtf.h"
6
#ifdef USE_UNICODE
7
#include "ucs.h"
8
#endif
9
10
#include "map/gb2312_gbk.map"
11
12
#define C0 WC_GBK_MAP_C0
13
#define GL WC_GBK_MAP_GL
14
0
#define C1 WC_GBK_MAP_C1
15
0
#define LB WC_GBK_MAP_LB
16
0
#define UB WC_GBK_MAP_UB
17
0
#define C80 WC_GBK_MAP_80
18
19
wc_uint8 WC_GBK_MAP[ 0x100 ] = {
20
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
21
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
22
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
23
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
24
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
25
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
26
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
27
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
28
29
    C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
30
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
31
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
32
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
33
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
34
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
35
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
36
    UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
37
};
38
39
wc_ccs
40
0
wc_gb2312_or_gbk(wc_uint16 code) {
41
0
    return wc_map_range_search(code,
42
0
  gb2312_gbk_map, N_gb2312_gbk_map)
43
0
  ? WC_CCS_GBK : WC_CCS_GB_2312;
44
0
}
45
46
wc_wchar_t
47
wc_gbk_to_cs128w(wc_wchar_t cc)
48
437
{
49
437
    cc.code = WC_GBK_N(cc.code);
50
437
    if (cc.code < 0x4000)
51
437
  cc.ccs = WC_CCS_GBK_1;
52
0
    else {
53
0
  cc.ccs = WC_CCS_GBK_2;
54
0
  cc.code -= 0x4000;
55
0
    }
56
437
    cc.code = WC_N_CS128W(cc.code);
57
437
    return cc;
58
437
}
59
60
wc_wchar_t
61
wc_cs128w_to_gbk(wc_wchar_t cc)
62
437
{
63
437
    cc.code = WC_CS128W_N(cc.code);
64
437
    if (cc.ccs == WC_CCS_GBK_2)
65
0
  cc.code += 0x4000;
66
437
    cc.ccs = WC_CCS_GBK;
67
437
    cc.code = WC_N_GBK(cc.code);
68
437
    return cc;
69
437
}
70
71
wc_uint32
72
wc_gbk_to_N(wc_uint32 c)
73
0
{
74
0
    if (c <= 0xA1A0) /* 0x8140 - 0xA1A0 */
75
0
  return WC_GBK_N(c);
76
0
    if (c <= 0xA2AA) /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
77
0
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
78
0
    if (c <= 0xA6A0) /* 0xA240 - 0xA6A0 */
79
0
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
80
0
    if (c <= 0xA6F5) /* 0xA6E0 - 0xA6F5 */
81
0
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
82
0
    if (c <= 0xA8A0) /* 0xA7A0 - 0xA8A0 */
83
0
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
84
0
    if (c <= 0xA8C0) /* 0xA8BB - 0xA8C0 */
85
0
  return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
86
      /* 0xA940 - 0xFEA0 */
87
0
    return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
88
0
}
89
90
Str
91
wc_conv_from_gbk(Str is, wc_ces ces)
92
0
{
93
0
    Str os;
94
0
    wc_uchar *sp = (wc_uchar *)is->ptr;
95
0
    wc_uchar *ep = sp + is->length;
96
0
    wc_uchar *p;
97
0
    int state = WC_GBK_NOSTATE;
98
0
    wc_uint32 gbk;
99
100
0
    for (p = sp; p < ep && *p < 0x80; p++) 
101
0
  ;
102
0
    if (p == ep)
103
0
  return is;
104
0
    os = Strnew_size(is->length);
105
0
    if (p > sp)
106
0
  Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
107
108
0
    for (; p < ep; p++) {
109
0
  switch (state) {
110
0
  case WC_GBK_NOSTATE:
111
0
      switch (WC_GBK_MAP[*p]) {
112
0
      case UB:
113
0
    state = WC_GBK_MBYTE1;
114
0
    break;
115
0
      case C80:
116
0
    wtf_push(os, WC_CCS_GBK_80, *p);
117
0
    break;
118
0
      case C1:
119
0
    wtf_push_unknown(os, p, 1);
120
0
    break;
121
0
      default:
122
0
    Strcat_char(os, (char)*p);
123
0
    break;
124
0
      }
125
0
      break;
126
0
  case WC_GBK_MBYTE1:
127
0
      if (WC_GBK_MAP[*p] & LB) {
128
0
    gbk = ((wc_uint32)*(p-1) << 8) | *p;
129
0
    if (*(p-1) >= 0xA1 && *p >= 0xA1)
130
0
        wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
131
0
    else
132
0
        wtf_push(os, WC_CCS_GBK, gbk);
133
0
      } else
134
0
    wtf_push_unknown(os, p-1, 2);
135
0
      state = WC_GBK_NOSTATE;
136
0
      break;
137
0
  }
138
0
    }
139
0
    switch (state) {
140
0
    case WC_GBK_MBYTE1:
141
0
  wtf_push_unknown(os, p-1, 1);
142
0
  break;
143
0
    }
144
0
    return os;
145
0
}
146
147
void
148
wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
149
928k
{
150
1.85M
  while (1) {
151
1.85M
    switch (cc.ccs) {
152
16
    case WC_CCS_US_ASCII:
153
16
  Strcat_char(os, (char)cc.code);
154
16
  return;
155
9.69k
    case WC_CCS_GB_2312:
156
9.69k
  Strcat_char(os, (char)((cc.code >> 8) | 0x80));
157
9.69k
  Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
158
9.69k
  return;
159
0
    case WC_CCS_GBK_80:
160
0
  Strcat_char(os, (char)(cc.code | 0x80));
161
0
  return;
162
437
    case WC_CCS_GBK_1:
163
437
    case WC_CCS_GBK_2:
164
437
  cc = wc_cs128w_to_gbk(cc);
165
437
    case WC_CCS_GBK:
166
437
  Strcat_char(os, (char)(cc.code >> 8));
167
437
  Strcat_char(os, (char)(cc.code & 0xff));
168
437
  return;
169
869k
    case WC_CCS_UNKNOWN_W:
170
869k
  if (!WcOption.no_replace)
171
869k
      Strcat_charp(os, WC_REPLACE_W);
172
869k
  return;
173
48.2k
    case WC_CCS_UNKNOWN:
174
48.2k
  if (!WcOption.no_replace)
175
48.2k
      Strcat_charp(os, WC_REPLACE);
176
48.2k
  return;
177
927k
    default:
178
927k
#ifdef USE_UNICODE
179
927k
  if (WcOption.ucs_conv)
180
927k
      cc = wc_any_to_any_ces(cc, st);
181
0
  else
182
0
#endif
183
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
184
927k
  continue;
185
1.85M
    }
186
1.85M
  }
187
928k
}
188
189
Str
190
wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
191
0
{
192
0
    static Str os;
193
0
    static wc_uchar gbku;
194
0
    wc_uint32 gbk;
195
196
0
    if (st->state == -1) {
197
0
  st->state = WC_GBK_NOSTATE;
198
0
  os = Strnew_size(8);
199
0
    }
200
201
0
    switch (st->state) {
202
0
    case WC_GBK_NOSTATE:
203
0
  switch (WC_GBK_MAP[c]) {
204
0
  case UB:
205
0
      gbku = c;
206
0
      st->state = WC_GBK_MBYTE1;
207
0
      return NULL;
208
0
  case C80:
209
0
      wtf_push(os, WC_CCS_GBK_80, c);
210
0
      break;
211
0
  case C1:
212
0
      break;
213
0
  default:
214
0
      Strcat_char(os, (char)c);
215
0
      break;
216
0
  }
217
0
  break;
218
0
    case WC_GBK_MBYTE1:
219
0
  if (WC_GBK_MAP[c] & LB) {
220
0
      gbk = ((wc_uint32)gbku << 8) | c;
221
0
      if (gbku >= 0xA1 && c >= 0xA1)
222
0
    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
223
0
      else
224
0
    wtf_push(os, WC_CCS_GBK, gbk);
225
0
  }
226
0
  break;
227
0
    }
228
0
    st->state = -1;
229
0
    return os;
230
0
}