Coverage Report

Created: 2025-07-09 06:28

/src/w3m/libwc/johab.c
Line
Count
Source (jump to first uncovered line)
1
2
#include "wc.h"
3
#include "johab.h"
4
#include "wtf.h"
5
#ifdef USE_UNICODE
6
#include "ucs.h"
7
#endif
8
9
#define C0 WC_JOHAB_MAP_C0
10
#define GL WC_JOHAB_MAP_GL
11
#define C1 WC_JOHAB_MAP_C1
12
#define GH WC_JOHAB_MAP_GH
13
#define GB WC_JOHAB_MAP_GB
14
#define JJ WC_JOHAB_MAP_JJ
15
#define JB WC_JOHAB_MAP_JB
16
#define HB WC_JOHAB_MAP_HB
17
#define CJ WC_JOHAB_MAP_CJ
18
#define CB WC_JOHAB_MAP_CB
19
20
/*
21
  00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF
22
  C0    GL    GL    GL    C0 -  -     J     J     -     H     -  H     -     -
23
  -     -     J     B     -  -  J     J     B     B     B     B  B     B     -
24
25
  C0    GL    GH    GB    C0 C1 CJ    JJ    JB    CB    HB    CB HB    CB    C1 
26
*/
27
28
wc_uint8 WC_JOHAB_MAP[ 0x100 ] = {
29
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
30
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
31
/*  20 */
32
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
33
/*  30  31 */
34
    GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH,
35
/*  40  41 */
36
    GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
37
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
38
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
39
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0,
40
41
/*  80          83  84 */
42
    C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ,
43
/*  90  91 */
44
    JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
45
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
46
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
47
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
48
/*              D3  D4          D7  D8                          DF */
49
    JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, 
50
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB,
51
/*                                      F9  FA              FE  FF */
52
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1,
53
};
54
55
static wc_uint8 johab1_N_map[ 3 ][ 32 ] = {
56
  { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
57
   15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58
  { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11,
59
    0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 },
60
  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
61
   16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 }
62
};
63
64
static wc_uint8 N_johab1_map[ 3 ][ 32 ] = {
65
  { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,
66
   18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67
  { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22,
68
   23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69
  { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
70
   17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 }
71
};
72
73
wc_wchar_t
74
wc_johab_to_ksx1001(wc_wchar_t cc)
75
4.37k
{
76
4.37k
#ifdef USE_UNICODE
77
4.37k
    static wc_table *t = NULL;
78
4.37k
#endif
79
80
4.37k
    switch (cc.ccs) {
81
2.18k
    case WC_CCS_JOHAB:
82
2.18k
  return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc));
83
1.09k
    case WC_CCS_JOHAB_1:
84
1.59k
    case WC_CCS_JOHAB_2:
85
1.59k
#ifdef USE_UNICODE
86
1.59k
  if (WcOption.ucs_conv) {
87
1.59k
      if (t == NULL)
88
1
    t = wc_get_ucs_table(WC_CCS_KS_X_1001);
89
1.59k
      cc = wc_any_to_any(cc, t);
90
1.59k
  } else
91
0
#endif
92
0
      cc.ccs = WC_CCS_UNKNOWN_W;
93
1.59k
  break;
94
595
    case WC_CCS_JOHAB_3:
95
595
  if (cc.code >= 0x2121)
96
244
      cc.ccs = WC_CCS_KS_X_1001;
97
351
  else
98
351
      cc.ccs = WC_CCS_UNKNOWN_W;
99
595
  break;
100
4.37k
    }
101
2.18k
    return cc;
102
4.37k
}
103
104
wc_wchar_t
105
wc_ksx1001_to_johab(wc_wchar_t cc)
106
1.34k
{
107
1.34k
    cc.code &= 0x7f7f;
108
1.34k
    if ((cc.code >= 0x2121 && cc.code <  0x2421) ||
109
1.34k
  (cc.code >  0x2453 && cc.code <= 0x2C7E) ||
110
1.34k
  (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) {
111
673
  cc.ccs = WC_CCS_JOHAB_3;
112
673
  return cc;
113
673
    }
114
675
#ifdef USE_UNICODE
115
675
    if (WcOption.ucs_conv)
116
675
  cc = wc_ucs_to_johab(wc_any_to_ucs(cc));
117
0
    else
118
0
#endif
119
0
  cc.ccs = WC_CCS_UNKNOWN_W;
120
675
    return cc;
121
1.34k
}
122
123
#ifdef USE_UNICODE
124
wc_wchar_t
125
wc_ucs_to_johab(wc_uint32 ucs)
126
5.51M
{
127
5.51M
    wc_table *t;
128
5.51M
    wc_wchar_t cc;
129
130
5.51M
    if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) {
131
442
  ucs -= WC_C_UCS2_HANGUL;
132
442
  cc.code = WC_N_JOHAB1(ucs);
133
442
  cc.ccs = WC_CCS_JOHAB;
134
5.51M
    } else if (ucs >= 0x3131 && ucs <= 0x3163) {
135
204
  t = wc_get_ucs_table(WC_CCS_JOHAB_2);
136
204
  cc = wc_ucs_to_any(ucs, t);
137
5.51M
    } else {
138
5.51M
  t = wc_get_ucs_table(WC_CCS_JOHAB_3);
139
5.51M
  cc = wc_ucs_to_any(ucs, t);
140
5.51M
    }
141
5.51M
    return cc;
142
5.51M
}
143
#endif
144
145
wc_uint32
146
wc_johab1_to_N(wc_uint32 code)
147
1.23M
{
148
1.23M
    wc_uint32 a, b, c;
149
150
1.23M
    a = johab1_N_map[0][(code >> 10) & 0x1F];
151
1.23M
    b = johab1_N_map[1][(code >> 5)  & 0x1F];
152
1.23M
    c = johab1_N_map[2][ code        & 0x1F];
153
1.23M
    if (a && b && c)
154
1.00M
  return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1);
155
232k
    return WC_C_JOHAB_ERROR;
156
1.23M
}
157
158
wc_uint32
159
wc_N_to_johab1(wc_uint32 code)
160
794k
{
161
794k
    wc_uint32 a, b, c;
162
163
794k
    a = N_johab1_map[0][(code / 28) / 21 & 0x1F];
164
794k
    b = N_johab1_map[1][(code / 28) % 21 & 0x1F];
165
794k
    c = N_johab1_map[2][ code % 28       & 0x1F];
166
794k
    return 0x8000 | (a << 10) | (b << 5) | c;
167
794k
}
168
169
/* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E
170
  (0x1F21 - 0x207E are not in KS X 1001) */
171
61.6k
#define johab3_to_ksx1001(ub, lb) \
172
61.6k
{ \
173
61.6k
    if (ub < 0xe0) { \
174
4.89k
  ub = ((ub - 0xd8) << 1) + 0x1f; \
175
56.7k
    } else { \
176
56.7k
  ub = ((ub - 0xe0) << 1) + 0x4a; \
177
56.7k
    } \
178
61.6k
    if (lb < 0xa1) { \
179
48.9k
  lb -= (lb < 0x91) ? 0x10 : 0x22; \
180
48.9k
    } else { \
181
12.6k
  ub++; \
182
12.6k
  lb -= 0x80; \
183
12.6k
    } \
184
61.6k
}
185
186
2.83M
#define ksx1001_to_johab3(ub, lb) \
187
2.83M
{ \
188
2.83M
    if (ub < 0x4a) { \
189
2.78M
  ub -= 0x1f; \
190
2.78M
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
191
2.78M
  ub = (ub >> 1) + 0xd8; \
192
2.78M
    } else { \
193
51.3k
  ub -= 0x4a; \
194
51.3k
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
195
51.3k
  ub = (ub >> 1) + 0xe0; \
196
51.3k
    } \
197
2.83M
}
198
199
wc_wchar_t
200
wc_johab_to_cs128w(wc_wchar_t cc)
201
1.30M
{
202
1.30M
    wc_uint32 n;
203
1.30M
    wc_uchar ub, lb;
204
205
1.30M
    if (cc.code < 0xD800) {
206
1.23M
  n = WC_JOHAB1_N(cc.code);
207
1.23M
  if (n != WC_C_JOHAB_ERROR) {
208
1.00M
      cc.code = WC_N_CS94x128(n);
209
1.00M
      cc.ccs = WC_CCS_JOHAB_1;
210
1.00M
  } else {
211
232k
      n = WC_JOHAB2_N(cc.code);
212
232k
      cc.code = WC_N_CS128W(n);
213
232k
      cc.ccs = WC_CCS_JOHAB_2;
214
232k
  }
215
1.23M
    } else {
216
61.6k
  ub = cc.code >> 8;
217
61.6k
  lb = cc.code & 0xff;
218
61.6k
  johab3_to_ksx1001(ub, lb);
219
61.6k
  cc.code = ((wc_uint32)ub << 8) | lb;
220
61.6k
  cc.ccs = WC_CCS_JOHAB_3;
221
61.6k
    }
222
1.30M
    return cc;
223
1.30M
}
224
225
wc_wchar_t
226
wc_cs128w_to_johab(wc_wchar_t cc)
227
3.76M
{
228
3.76M
    wc_uint32 n;
229
3.76M
    wc_uchar ub, lb;
230
231
3.76M
    switch (cc.ccs) {
232
794k
    case WC_CCS_JOHAB_1:
233
794k
  n = WC_CS94x128_N(cc.code);
234
794k
  cc.code = WC_N_JOHAB1(n);
235
794k
  break;
236
129k
    case WC_CCS_JOHAB_2:
237
129k
  n = WC_CS128W_N(cc.code);
238
129k
  cc.code = WC_N_JOHAB2(n);
239
129k
  break;
240
2.83M
    case WC_CCS_JOHAB_3:
241
2.83M
  ub = (cc.code >> 8) & 0x7f;
242
2.83M
  lb = cc.code & 0x7f;
243
2.83M
  ksx1001_to_johab3(ub, lb);
244
2.83M
  cc.code = ((wc_uint32)ub << 8) | lb;
245
3.76M
    }
246
3.76M
    cc.ccs = WC_CCS_JOHAB;
247
3.76M
    return cc;
248
3.76M
}
249
250
Str
251
wc_conv_from_johab(Str is, wc_ces ces)
252
361
{
253
361
    Str os;
254
361
    wc_uchar *sp = (wc_uchar *)is->ptr;
255
361
    wc_uchar *ep = sp + is->length;
256
361
    wc_uchar *p;
257
361
    int state = WC_JOHAB_NOSTATE;
258
259
707
    for (p = sp; p < ep && *p < 0x80; p++)
260
346
        ;
261
361
    if (p == ep)
262
30
  return is;
263
331
    os = Strnew_size(is->length);
264
331
    if (p > sp)
265
20
  Strcat_charp_n(os, is->ptr, (int)(p - sp));
266
267
2.43M
    for (; p < ep; p++) {
268
2.43M
  switch (state) {
269
1.42M
  case WC_JOHAB_NOSTATE:
270
1.42M
      switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) {
271
953k
      case WC_JOHAB_MAP_UJ:
272
953k
    state = WC_JOHAB_HANGUL1;
273
953k
    break;
274
56.4k
      case WC_JOHAB_MAP_UH:
275
56.4k
    state = WC_JOHAB_HANJA1;
276
56.4k
    break;
277
201k
      case WC_JOHAB_MAP_C1:
278
201k
    wtf_push_unknown(os, p, 1);
279
201k
    break;
280
215k
      default:
281
215k
    Strcat_char(os, (char)*p);
282
215k
    break;
283
1.42M
      }
284
1.42M
      break;
285
1.42M
  case WC_JOHAB_HANGUL1:
286
953k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) 
287
924k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
288
29.2k
      else
289
29.2k
    wtf_push_unknown(os, p-1, 2);
290
953k
      state = WC_JOHAB_NOSTATE;
291
953k
      break;
292
56.4k
  case WC_JOHAB_HANJA1:
293
56.4k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH)
294
52.3k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
295
4.09k
      else
296
4.09k
    wtf_push_unknown(os, p-1, 2);
297
56.4k
      state = WC_JOHAB_NOSTATE;
298
56.4k
      break;
299
2.43M
  }
300
2.43M
    }
301
331
    switch (state) {
302
13
    case WC_JOHAB_HANGUL1:
303
21
    case WC_JOHAB_HANJA1:
304
21
  wtf_push_unknown(os, p-1, 1);
305
21
  break;
306
331
    }
307
331
    return os;
308
331
}
309
310
void
311
wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st)
312
7.79M
{
313
14.5M
  while (1) {
314
14.5M
    switch (cc.ccs) {
315
1.26M
    case WC_CCS_US_ASCII:
316
1.26M
  Strcat_char(os, (char)cc.code);
317
1.26M
  return;
318
0
    case WC_CCS_JOHAB_1:
319
204
    case WC_CCS_JOHAB_2:
320
2.78M
    case WC_CCS_JOHAB_3:
321
2.78M
  cc = wc_cs128w_to_johab(cc);
322
3.43M
    case WC_CCS_JOHAB:
323
3.43M
  Strcat_char(os, (char)(cc.code >> 8));
324
3.43M
  Strcat_char(os, (char)(cc.code & 0xff));
325
3.43M
  return;
326
1.34k
    case WC_CCS_KS_X_1001:
327
1.34k
  cc = wc_ksx1001_to_johab(cc);
328
1.34k
  continue;
329
2.72M
    case WC_CCS_UNKNOWN_W:
330
2.72M
  if (!WcOption.no_replace)
331
2.72M
      Strcat_charp(os, WC_REPLACE_W);
332
2.72M
  return;
333
375k
    case WC_CCS_UNKNOWN:
334
375k
  if (!WcOption.no_replace)
335
375k
      Strcat_charp(os, WC_REPLACE);
336
375k
  return;
337
6.78M
    default:
338
6.78M
#ifdef USE_UNICODE
339
6.78M
  if (WcOption.ucs_conv)
340
6.78M
      cc = wc_any_to_any_ces(cc, st);
341
0
  else
342
0
#endif
343
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
344
6.78M
  continue;
345
14.5M
    }
346
14.5M
  }
347
7.79M
}
348
349
Str
350
wc_char_conv_from_johab(wc_uchar c, wc_status *st)
351
0
{
352
0
    static Str os;
353
0
    static wc_uchar johabu;
354
355
0
    if (st->state == -1) {
356
0
  st->state = WC_JOHAB_NOSTATE;
357
0
  os = Strnew_size(8);
358
0
    }
359
360
0
    switch (st->state) {
361
0
    case WC_JOHAB_NOSTATE:
362
0
  switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) {
363
0
  case WC_JOHAB_MAP_UJ:
364
0
      johabu = c;
365
0
      st->state = WC_JOHAB_HANGUL1;
366
0
      return NULL;
367
0
  case WC_JOHAB_MAP_UH:
368
0
      johabu = c;
369
0
      st->state = WC_JOHAB_HANJA1;
370
0
      return NULL;
371
0
  case WC_JOHAB_MAP_C1:
372
0
      break;
373
0
  default:
374
0
      Strcat_char(os, (char)c);
375
0
      break;
376
0
  }
377
0
  break;
378
0
    case WC_JOHAB_HANGUL1:
379
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ)
380
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
381
0
  break;
382
0
    case WC_JOHAB_HANJA1:
383
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH)
384
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
385
0
  break;
386
0
    }
387
0
    st->state = -1;
388
0
    return os;
389
0
}