Coverage Report

Created: 2025-10-10 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/w3m/libwc/johab.c
Line
Count
Source
1
2
#include "wc.h"
3
#include "johab.h"
4
#include "wtf.h"
5
#ifdef USE_UNICODE
6
#include "ucs.h"
7
#endif
8
9
#define C0 WC_JOHAB_MAP_C0
10
#define GL WC_JOHAB_MAP_GL
11
#define C1 WC_JOHAB_MAP_C1
12
#define GH WC_JOHAB_MAP_GH
13
#define GB WC_JOHAB_MAP_GB
14
#define JJ WC_JOHAB_MAP_JJ
15
#define JB WC_JOHAB_MAP_JB
16
#define HB WC_JOHAB_MAP_HB
17
#define CJ WC_JOHAB_MAP_CJ
18
#define CB WC_JOHAB_MAP_CB
19
20
/*
21
  00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF
22
  C0    GL    GL    GL    C0 -  -     J     J     -     H     -  H     -     -
23
  -     -     J     B     -  -  J     J     B     B     B     B  B     B     -
24
25
  C0    GL    GH    GB    C0 C1 CJ    JJ    JB    CB    HB    CB HB    CB    C1 
26
*/
27
28
wc_uint8 WC_JOHAB_MAP[ 0x100 ] = {
29
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
30
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
31
/*  20 */
32
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
33
/*  30  31 */
34
    GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH,
35
/*  40  41 */
36
    GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
37
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
38
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
39
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0,
40
41
/*  80          83  84 */
42
    C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ,
43
/*  90  91 */
44
    JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
45
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
46
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
47
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
48
/*              D3  D4          D7  D8                          DF */
49
    JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, 
50
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB,
51
/*                                      F9  FA              FE  FF */
52
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1,
53
};
54
55
static wc_uint8 johab1_N_map[ 3 ][ 32 ] = {
56
  { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
57
   15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58
  { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11,
59
    0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 },
60
  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
61
   16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 }
62
};
63
64
static wc_uint8 N_johab1_map[ 3 ][ 32 ] = {
65
  { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,
66
   18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67
  { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22,
68
   23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69
  { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
70
   17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 }
71
};
72
73
wc_wchar_t
74
wc_johab_to_ksx1001(wc_wchar_t cc)
75
3.97k
{
76
3.97k
#ifdef USE_UNICODE
77
3.97k
    static wc_table *t = NULL;
78
3.97k
#endif
79
80
3.97k
    switch (cc.ccs) {
81
1.98k
    case WC_CCS_JOHAB:
82
1.98k
  return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc));
83
859
    case WC_CCS_JOHAB_1:
84
1.39k
    case WC_CCS_JOHAB_2:
85
1.39k
#ifdef USE_UNICODE
86
1.39k
  if (WcOption.ucs_conv) {
87
1.39k
      if (t == NULL)
88
1
    t = wc_get_ucs_table(WC_CCS_KS_X_1001);
89
1.39k
      cc = wc_any_to_any(cc, t);
90
1.39k
  } else
91
0
#endif
92
0
      cc.ccs = WC_CCS_UNKNOWN_W;
93
1.39k
  break;
94
593
    case WC_CCS_JOHAB_3:
95
593
  if (cc.code >= 0x2121)
96
307
      cc.ccs = WC_CCS_KS_X_1001;
97
286
  else
98
286
      cc.ccs = WC_CCS_UNKNOWN_W;
99
593
  break;
100
3.97k
    }
101
1.98k
    return cc;
102
3.97k
}
103
104
wc_wchar_t
105
wc_ksx1001_to_johab(wc_wchar_t cc)
106
28.0k
{
107
28.0k
    cc.code &= 0x7f7f;
108
28.0k
    if ((cc.code >= 0x2121 && cc.code <  0x2421) ||
109
18.0k
  (cc.code >  0x2453 && cc.code <= 0x2C7E) ||
110
23.9k
  (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) {
111
23.9k
  cc.ccs = WC_CCS_JOHAB_3;
112
23.9k
  return cc;
113
23.9k
    }
114
4.08k
#ifdef USE_UNICODE
115
4.08k
    if (WcOption.ucs_conv)
116
4.08k
  cc = wc_ucs_to_johab(wc_any_to_ucs(cc));
117
0
    else
118
0
#endif
119
0
  cc.ccs = WC_CCS_UNKNOWN_W;
120
4.08k
    return cc;
121
28.0k
}
122
123
#ifdef USE_UNICODE
124
wc_wchar_t
125
wc_ucs_to_johab(wc_uint32 ucs)
126
259k
{
127
259k
    wc_table *t;
128
259k
    wc_wchar_t cc;
129
130
259k
    if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) {
131
11.4k
  ucs -= WC_C_UCS2_HANGUL;
132
11.4k
  cc.code = WC_N_JOHAB1(ucs);
133
11.4k
  cc.ccs = WC_CCS_JOHAB;
134
247k
    } else if (ucs >= 0x3131 && ucs <= 0x3163) {
135
266
  t = wc_get_ucs_table(WC_CCS_JOHAB_2);
136
266
  cc = wc_ucs_to_any(ucs, t);
137
247k
    } else {
138
247k
  t = wc_get_ucs_table(WC_CCS_JOHAB_3);
139
247k
  cc = wc_ucs_to_any(ucs, t);
140
247k
    }
141
259k
    return cc;
142
259k
}
143
#endif
144
145
wc_uint32
146
wc_johab1_to_N(wc_uint32 code)
147
765k
{
148
765k
    wc_uint32 a, b, c;
149
150
765k
    a = johab1_N_map[0][(code >> 10) & 0x1F];
151
765k
    b = johab1_N_map[1][(code >> 5)  & 0x1F];
152
765k
    c = johab1_N_map[2][ code        & 0x1F];
153
765k
    if (a && b && c)
154
753k
  return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1);
155
12.5k
    return WC_C_JOHAB_ERROR;
156
765k
}
157
158
wc_uint32
159
wc_N_to_johab1(wc_uint32 code)
160
388k
{
161
388k
    wc_uint32 a, b, c;
162
163
388k
    a = N_johab1_map[0][(code / 28) / 21 & 0x1F];
164
388k
    b = N_johab1_map[1][(code / 28) % 21 & 0x1F];
165
388k
    c = N_johab1_map[2][ code % 28       & 0x1F];
166
388k
    return 0x8000 | (a << 10) | (b << 5) | c;
167
388k
}
168
169
/* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E
170
  (0x1F21 - 0x207E are not in KS X 1001) */
171
13.9k
#define johab3_to_ksx1001(ub, lb) \
172
13.9k
{ \
173
13.9k
    if (ub < 0xe0) { \
174
3.03k
  ub = ((ub - 0xd8) << 1) + 0x1f; \
175
10.9k
    } else { \
176
10.9k
  ub = ((ub - 0xe0) << 1) + 0x4a; \
177
10.9k
    } \
178
13.9k
    if (lb < 0xa1) { \
179
4.05k
  lb -= (lb < 0x91) ? 0x10 : 0x22; \
180
9.94k
    } else { \
181
9.94k
  ub++; \
182
9.94k
  lb -= 0x80; \
183
9.94k
    } \
184
13.9k
}
185
186
212k
#define ksx1001_to_johab3(ub, lb) \
187
212k
{ \
188
212k
    if (ub < 0x4a) { \
189
200k
  ub -= 0x1f; \
190
200k
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
191
200k
  ub = (ub >> 1) + 0xd8; \
192
200k
    } else { \
193
11.7k
  ub -= 0x4a; \
194
11.7k
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
195
11.7k
  ub = (ub >> 1) + 0xe0; \
196
11.7k
    } \
197
212k
}
198
199
wc_wchar_t
200
wc_johab_to_cs128w(wc_wchar_t cc)
201
779k
{
202
779k
    wc_uint32 n;
203
779k
    wc_uchar ub, lb;
204
205
779k
    if (cc.code < 0xD800) {
206
765k
  n = WC_JOHAB1_N(cc.code);
207
765k
  if (n != WC_C_JOHAB_ERROR) {
208
753k
      cc.code = WC_N_CS94x128(n);
209
753k
      cc.ccs = WC_CCS_JOHAB_1;
210
753k
  } else {
211
12.5k
      n = WC_JOHAB2_N(cc.code);
212
12.5k
      cc.code = WC_N_CS128W(n);
213
12.5k
      cc.ccs = WC_CCS_JOHAB_2;
214
12.5k
  }
215
765k
    } else {
216
13.9k
  ub = cc.code >> 8;
217
13.9k
  lb = cc.code & 0xff;
218
13.9k
  johab3_to_ksx1001(ub, lb);
219
13.9k
  cc.code = ((wc_uint32)ub << 8) | lb;
220
13.9k
  cc.ccs = WC_CCS_JOHAB_3;
221
13.9k
    }
222
779k
    return cc;
223
779k
}
224
225
wc_wchar_t
226
wc_cs128w_to_johab(wc_wchar_t cc)
227
595k
{
228
595k
    wc_uint32 n;
229
595k
    wc_uchar ub, lb;
230
231
595k
    switch (cc.ccs) {
232
376k
    case WC_CCS_JOHAB_1:
233
376k
  n = WC_CS94x128_N(cc.code);
234
376k
  cc.code = WC_N_JOHAB1(n);
235
376k
  break;
236
6.05k
    case WC_CCS_JOHAB_2:
237
6.05k
  n = WC_CS128W_N(cc.code);
238
6.05k
  cc.code = WC_N_JOHAB2(n);
239
6.05k
  break;
240
212k
    case WC_CCS_JOHAB_3:
241
212k
  ub = (cc.code >> 8) & 0x7f;
242
212k
  lb = cc.code & 0x7f;
243
212k
  ksx1001_to_johab3(ub, lb);
244
212k
  cc.code = ((wc_uint32)ub << 8) | lb;
245
595k
    }
246
595k
    cc.ccs = WC_CCS_JOHAB;
247
595k
    return cc;
248
595k
}
249
250
Str
251
wc_conv_from_johab(Str is, wc_ces ces)
252
336
{
253
336
    Str os;
254
336
    wc_uchar *sp = (wc_uchar *)is->ptr;
255
336
    wc_uchar *ep = sp + is->length;
256
336
    wc_uchar *p;
257
336
    int state = WC_JOHAB_NOSTATE;
258
259
707
    for (p = sp; p < ep && *p < 0x80; p++)
260
371
        ;
261
336
    if (p == ep)
262
28
  return is;
263
308
    os = Strnew_size(is->length);
264
308
    if (p > sp)
265
16
  Strcat_charp_n(os, is->ptr, (int)(p - sp));
266
267
937k
    for (; p < ep; p++) {
268
936k
  switch (state) {
269
535k
  case WC_JOHAB_NOSTATE:
270
535k
      switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) {
271
392k
      case WC_JOHAB_MAP_UJ:
272
392k
    state = WC_JOHAB_HANGUL1;
273
392k
    break;
274
8.35k
      case WC_JOHAB_MAP_UH:
275
8.35k
    state = WC_JOHAB_HANJA1;
276
8.35k
    break;
277
19.1k
      case WC_JOHAB_MAP_C1:
278
19.1k
    wtf_push_unknown(os, p, 1);
279
19.1k
    break;
280
115k
      default:
281
115k
    Strcat_char(os, (char)*p);
282
115k
    break;
283
535k
      }
284
535k
      break;
285
535k
  case WC_JOHAB_HANGUL1:
286
392k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) 
287
381k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
288
11.0k
      else
289
11.0k
    wtf_push_unknown(os, p-1, 2);
290
392k
      state = WC_JOHAB_NOSTATE;
291
392k
      break;
292
8.34k
  case WC_JOHAB_HANJA1:
293
8.34k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH)
294
6.85k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
295
1.49k
      else
296
1.49k
    wtf_push_unknown(os, p-1, 2);
297
8.34k
      state = WC_JOHAB_NOSTATE;
298
8.34k
      break;
299
936k
  }
300
936k
    }
301
308
    switch (state) {
302
20
    case WC_JOHAB_HANGUL1:
303
26
    case WC_JOHAB_HANJA1:
304
26
  wtf_push_unknown(os, p-1, 1);
305
26
  break;
306
308
    }
307
308
    return os;
308
308
}
309
310
void
311
wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st)
312
1.74M
{
313
3.27M
  while (1) {
314
3.27M
    switch (cc.ccs) {
315
1.23M
    case WC_CCS_US_ASCII:
316
1.23M
  Strcat_char(os, (char)cc.code);
317
1.23M
  return;
318
0
    case WC_CCS_JOHAB_1:
319
266
    case WC_CCS_JOHAB_2:
320
205k
    case WC_CCS_JOHAB_3:
321
205k
  cc = wc_cs128w_to_johab(cc);
322
217k
    case WC_CCS_JOHAB:
323
217k
  Strcat_char(os, (char)(cc.code >> 8));
324
217k
  Strcat_char(os, (char)(cc.code & 0xff));
325
217k
  return;
326
28.0k
    case WC_CCS_KS_X_1001:
327
28.0k
  cc = wc_ksx1001_to_johab(cc);
328
28.0k
  continue;
329
51.7k
    case WC_CCS_UNKNOWN_W:
330
51.7k
  if (!WcOption.no_replace)
331
51.7k
      Strcat_charp(os, WC_REPLACE_W);
332
51.7k
  return;
333
237k
    case WC_CCS_UNKNOWN:
334
237k
  if (!WcOption.no_replace)
335
237k
      Strcat_charp(os, WC_REPLACE);
336
237k
  return;
337
1.50M
    default:
338
1.50M
#ifdef USE_UNICODE
339
1.50M
  if (WcOption.ucs_conv)
340
1.50M
      cc = wc_any_to_any_ces(cc, st);
341
0
  else
342
0
#endif
343
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
344
1.50M
  continue;
345
3.27M
    }
346
3.27M
  }
347
1.74M
}
348
349
Str
350
wc_char_conv_from_johab(wc_uchar c, wc_status *st)
351
0
{
352
0
    static Str os;
353
0
    static wc_uchar johabu;
354
355
0
    if (st->state == -1) {
356
0
  st->state = WC_JOHAB_NOSTATE;
357
0
  os = Strnew_size(8);
358
0
    }
359
360
0
    switch (st->state) {
361
0
    case WC_JOHAB_NOSTATE:
362
0
  switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) {
363
0
  case WC_JOHAB_MAP_UJ:
364
0
      johabu = c;
365
0
      st->state = WC_JOHAB_HANGUL1;
366
0
      return NULL;
367
0
  case WC_JOHAB_MAP_UH:
368
0
      johabu = c;
369
0
      st->state = WC_JOHAB_HANJA1;
370
0
      return NULL;
371
0
  case WC_JOHAB_MAP_C1:
372
0
      break;
373
0
  default:
374
0
      Strcat_char(os, (char)c);
375
0
      break;
376
0
  }
377
0
  break;
378
0
    case WC_JOHAB_HANGUL1:
379
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ)
380
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
381
0
  break;
382
0
    case WC_JOHAB_HANJA1:
383
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH)
384
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
385
0
  break;
386
0
    }
387
0
    st->state = -1;
388
0
    return os;
389
0
}