Coverage Report

Created: 2025-06-13 06:37

/src/w3m/libwc/johab.c
Line
Count
Source (jump to first uncovered line)
1
2
#include "wc.h"
3
#include "johab.h"
4
#include "wtf.h"
5
#ifdef USE_UNICODE
6
#include "ucs.h"
7
#endif
8
9
#define C0 WC_JOHAB_MAP_C0
10
#define GL WC_JOHAB_MAP_GL
11
#define C1 WC_JOHAB_MAP_C1
12
#define GH WC_JOHAB_MAP_GH
13
#define GB WC_JOHAB_MAP_GB
14
#define JJ WC_JOHAB_MAP_JJ
15
#define JB WC_JOHAB_MAP_JB
16
#define HB WC_JOHAB_MAP_HB
17
#define CJ WC_JOHAB_MAP_CJ
18
#define CB WC_JOHAB_MAP_CB
19
20
/*
21
  00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF
22
  C0    GL    GL    GL    C0 -  -     J     J     -     H     -  H     -     -
23
  -     -     J     B     -  -  J     J     B     B     B     B  B     B     -
24
25
  C0    GL    GH    GB    C0 C1 CJ    JJ    JB    CB    HB    CB HB    CB    C1 
26
*/
27
28
wc_uint8 WC_JOHAB_MAP[ 0x100 ] = {
29
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
30
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
31
/*  20 */
32
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
33
/*  30  31 */
34
    GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH,
35
/*  40  41 */
36
    GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
37
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
38
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
39
    GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0,
40
41
/*  80          83  84 */
42
    C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ,
43
/*  90  91 */
44
    JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
45
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
46
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
47
    JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
48
/*              D3  D4          D7  D8                          DF */
49
    JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, 
50
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB,
51
/*                                      F9  FA              FE  FF */
52
    HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1,
53
};
54
55
static wc_uint8 johab1_N_map[ 3 ][ 32 ] = {
56
  { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
57
   15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58
  { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11,
59
    0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 },
60
  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
61
   16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 }
62
};
63
64
static wc_uint8 N_johab1_map[ 3 ][ 32 ] = {
65
  { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,
66
   18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67
  { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22,
68
   23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69
  { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
70
   17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 }
71
};
72
73
wc_wchar_t
74
wc_johab_to_ksx1001(wc_wchar_t cc)
75
432k
{
76
432k
#ifdef USE_UNICODE
77
432k
    static wc_table *t = NULL;
78
432k
#endif
79
80
432k
    switch (cc.ccs) {
81
216k
    case WC_CCS_JOHAB:
82
216k
  return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc));
83
193k
    case WC_CCS_JOHAB_1:
84
202k
    case WC_CCS_JOHAB_2:
85
202k
#ifdef USE_UNICODE
86
202k
  if (WcOption.ucs_conv) {
87
202k
      if (t == NULL)
88
1
    t = wc_get_ucs_table(WC_CCS_KS_X_1001);
89
202k
      cc = wc_any_to_any(cc, t);
90
202k
  } else
91
0
#endif
92
0
      cc.ccs = WC_CCS_UNKNOWN_W;
93
202k
  break;
94
14.0k
    case WC_CCS_JOHAB_3:
95
14.0k
  if (cc.code >= 0x2121)
96
13.5k
      cc.ccs = WC_CCS_KS_X_1001;
97
495
  else
98
495
      cc.ccs = WC_CCS_UNKNOWN_W;
99
14.0k
  break;
100
432k
    }
101
216k
    return cc;
102
432k
}
103
104
wc_wchar_t
105
wc_ksx1001_to_johab(wc_wchar_t cc)
106
2.67k
{
107
2.67k
    cc.code &= 0x7f7f;
108
2.67k
    if ((cc.code >= 0x2121 && cc.code <  0x2421) ||
109
2.67k
  (cc.code >  0x2453 && cc.code <= 0x2C7E) ||
110
2.67k
  (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) {
111
1.58k
  cc.ccs = WC_CCS_JOHAB_3;
112
1.58k
  return cc;
113
1.58k
    }
114
1.09k
#ifdef USE_UNICODE
115
1.09k
    if (WcOption.ucs_conv)
116
1.09k
  cc = wc_ucs_to_johab(wc_any_to_ucs(cc));
117
0
    else
118
0
#endif
119
0
  cc.ccs = WC_CCS_UNKNOWN_W;
120
1.09k
    return cc;
121
2.67k
}
122
123
#ifdef USE_UNICODE
124
wc_wchar_t
125
wc_ucs_to_johab(wc_uint32 ucs)
126
6.45M
{
127
6.45M
    wc_table *t;
128
6.45M
    wc_wchar_t cc;
129
130
6.45M
    if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) {
131
1.53k
  ucs -= WC_C_UCS2_HANGUL;
132
1.53k
  cc.code = WC_N_JOHAB1(ucs);
133
1.53k
  cc.ccs = WC_CCS_JOHAB;
134
6.45M
    } else if (ucs >= 0x3131 && ucs <= 0x3163) {
135
209
  t = wc_get_ucs_table(WC_CCS_JOHAB_2);
136
209
  cc = wc_ucs_to_any(ucs, t);
137
6.45M
    } else {
138
6.45M
  t = wc_get_ucs_table(WC_CCS_JOHAB_3);
139
6.45M
  cc = wc_ucs_to_any(ucs, t);
140
6.45M
    }
141
6.45M
    return cc;
142
6.45M
}
143
#endif
144
145
wc_uint32
146
wc_johab1_to_N(wc_uint32 code)
147
895k
{
148
895k
    wc_uint32 a, b, c;
149
150
895k
    a = johab1_N_map[0][(code >> 10) & 0x1F];
151
895k
    b = johab1_N_map[1][(code >> 5)  & 0x1F];
152
895k
    c = johab1_N_map[2][ code        & 0x1F];
153
895k
    if (a && b && c)
154
849k
  return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1);
155
46.0k
    return WC_C_JOHAB_ERROR;
156
895k
}
157
158
wc_uint32
159
wc_N_to_johab1(wc_uint32 code)
160
352k
{
161
352k
    wc_uint32 a, b, c;
162
163
352k
    a = N_johab1_map[0][(code / 28) / 21 & 0x1F];
164
352k
    b = N_johab1_map[1][(code / 28) % 21 & 0x1F];
165
352k
    c = N_johab1_map[2][ code % 28       & 0x1F];
166
352k
    return 0x8000 | (a << 10) | (b << 5) | c;
167
352k
}
168
169
/* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E
170
  (0x1F21 - 0x207E are not in KS X 1001) */
171
48.9k
#define johab3_to_ksx1001(ub, lb) \
172
48.9k
{ \
173
48.9k
    if (ub < 0xe0) { \
174
10.3k
  ub = ((ub - 0xd8) << 1) + 0x1f; \
175
38.5k
    } else { \
176
38.5k
  ub = ((ub - 0xe0) << 1) + 0x4a; \
177
38.5k
    } \
178
48.9k
    if (lb < 0xa1) { \
179
17.7k
  lb -= (lb < 0x91) ? 0x10 : 0x22; \
180
31.1k
    } else { \
181
31.1k
  ub++; \
182
31.1k
  lb -= 0x80; \
183
31.1k
    } \
184
48.9k
}
185
186
3.26M
#define ksx1001_to_johab3(ub, lb) \
187
3.26M
{ \
188
3.26M
    if (ub < 0x4a) { \
189
3.24M
  ub -= 0x1f; \
190
3.24M
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
191
3.24M
  ub = (ub >> 1) + 0xd8; \
192
3.24M
    } else { \
193
22.1k
  ub -= 0x4a; \
194
22.1k
  lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
195
22.1k
  ub = (ub >> 1) + 0xe0; \
196
22.1k
    } \
197
3.26M
}
198
199
wc_wchar_t
200
wc_johab_to_cs128w(wc_wchar_t cc)
201
944k
{
202
944k
    wc_uint32 n;
203
944k
    wc_uchar ub, lb;
204
205
944k
    if (cc.code < 0xD800) {
206
895k
  n = WC_JOHAB1_N(cc.code);
207
895k
  if (n != WC_C_JOHAB_ERROR) {
208
849k
      cc.code = WC_N_CS94x128(n);
209
849k
      cc.ccs = WC_CCS_JOHAB_1;
210
849k
  } else {
211
46.0k
      n = WC_JOHAB2_N(cc.code);
212
46.0k
      cc.code = WC_N_CS128W(n);
213
46.0k
      cc.ccs = WC_CCS_JOHAB_2;
214
46.0k
  }
215
895k
    } else {
216
48.9k
  ub = cc.code >> 8;
217
48.9k
  lb = cc.code & 0xff;
218
48.9k
  johab3_to_ksx1001(ub, lb);
219
48.9k
  cc.code = ((wc_uint32)ub << 8) | lb;
220
48.9k
  cc.ccs = WC_CCS_JOHAB_3;
221
48.9k
    }
222
944k
    return cc;
223
944k
}
224
225
wc_wchar_t
226
wc_cs128w_to_johab(wc_wchar_t cc)
227
3.63M
{
228
3.63M
    wc_uint32 n;
229
3.63M
    wc_uchar ub, lb;
230
231
3.63M
    switch (cc.ccs) {
232
350k
    case WC_CCS_JOHAB_1:
233
350k
  n = WC_CS94x128_N(cc.code);
234
350k
  cc.code = WC_N_JOHAB1(n);
235
350k
  break;
236
18.6k
    case WC_CCS_JOHAB_2:
237
18.6k
  n = WC_CS128W_N(cc.code);
238
18.6k
  cc.code = WC_N_JOHAB2(n);
239
18.6k
  break;
240
3.26M
    case WC_CCS_JOHAB_3:
241
3.26M
  ub = (cc.code >> 8) & 0x7f;
242
3.26M
  lb = cc.code & 0x7f;
243
3.26M
  ksx1001_to_johab3(ub, lb);
244
3.26M
  cc.code = ((wc_uint32)ub << 8) | lb;
245
3.63M
    }
246
3.63M
    cc.ccs = WC_CCS_JOHAB;
247
3.63M
    return cc;
248
3.63M
}
249
250
Str
251
wc_conv_from_johab(Str is, wc_ces ces)
252
344
{
253
344
    Str os;
254
344
    wc_uchar *sp = (wc_uchar *)is->ptr;
255
344
    wc_uchar *ep = sp + is->length;
256
344
    wc_uchar *p;
257
344
    int state = WC_JOHAB_NOSTATE;
258
259
612
    for (p = sp; p < ep && *p < 0x80; p++)
260
268
        ;
261
344
    if (p == ep)
262
26
  return is;
263
318
    os = Strnew_size(is->length);
264
318
    if (p > sp)
265
11
  Strcat_charp_n(os, is->ptr, (int)(p - sp));
266
267
1.18M
    for (; p < ep; p++) {
268
1.18M
  switch (state) {
269
756k
  case WC_JOHAB_NOSTATE:
270
756k
      switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) {
271
402k
      case WC_JOHAB_MAP_UJ:
272
402k
    state = WC_JOHAB_HANGUL1;
273
402k
    break;
274
30.5k
      case WC_JOHAB_MAP_UH:
275
30.5k
    state = WC_JOHAB_HANJA1;
276
30.5k
    break;
277
98.3k
      case WC_JOHAB_MAP_C1:
278
98.3k
    wtf_push_unknown(os, p, 1);
279
98.3k
    break;
280
224k
      default:
281
224k
    Strcat_char(os, (char)*p);
282
224k
    break;
283
756k
      }
284
756k
      break;
285
756k
  case WC_JOHAB_HANGUL1:
286
402k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) 
287
368k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
288
33.6k
      else
289
33.6k
    wtf_push_unknown(os, p-1, 2);
290
402k
      state = WC_JOHAB_NOSTATE;
291
402k
      break;
292
30.5k
  case WC_JOHAB_HANJA1:
293
30.5k
      if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH)
294
24.2k
    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
295
6.30k
      else
296
6.30k
    wtf_push_unknown(os, p-1, 2);
297
30.5k
      state = WC_JOHAB_NOSTATE;
298
30.5k
      break;
299
1.18M
  }
300
1.18M
    }
301
318
    switch (state) {
302
14
    case WC_JOHAB_HANGUL1:
303
25
    case WC_JOHAB_HANJA1:
304
25
  wtf_push_unknown(os, p-1, 1);
305
25
  break;
306
318
    }
307
318
    return os;
308
318
}
309
310
void
311
wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st)
312
8.75M
{
313
17.2M
  while (1) {
314
17.2M
    switch (cc.ccs) {
315
2.02M
    case WC_CCS_US_ASCII:
316
2.02M
  Strcat_char(os, (char)cc.code);
317
2.02M
  return;
318
0
    case WC_CCS_JOHAB_1:
319
209
    case WC_CCS_JOHAB_2:
320
3.24M
    case WC_CCS_JOHAB_3:
321
3.24M
  cc = wc_cs128w_to_johab(cc);
322
3.24M
    case WC_CCS_JOHAB:
323
3.24M
  Strcat_char(os, (char)(cc.code >> 8));
324
3.24M
  Strcat_char(os, (char)(cc.code & 0xff));
325
3.24M
  return;
326
2.67k
    case WC_CCS_KS_X_1001:
327
2.67k
  cc = wc_ksx1001_to_johab(cc);
328
2.67k
  continue;
329
3.20M
    case WC_CCS_UNKNOWN_W:
330
3.20M
  if (!WcOption.no_replace)
331
3.20M
      Strcat_charp(os, WC_REPLACE_W);
332
3.20M
  return;
333
283k
    case WC_CCS_UNKNOWN:
334
283k
  if (!WcOption.no_replace)
335
283k
      Strcat_charp(os, WC_REPLACE);
336
283k
  return;
337
8.48M
    default:
338
8.48M
#ifdef USE_UNICODE
339
8.48M
  if (WcOption.ucs_conv)
340
8.48M
      cc = wc_any_to_any_ces(cc, st);
341
0
  else
342
0
#endif
343
0
      cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
344
8.48M
  continue;
345
17.2M
    }
346
17.2M
  }
347
8.75M
}
348
349
Str
350
wc_char_conv_from_johab(wc_uchar c, wc_status *st)
351
0
{
352
0
    static Str os;
353
0
    static wc_uchar johabu;
354
355
0
    if (st->state == -1) {
356
0
  st->state = WC_JOHAB_NOSTATE;
357
0
  os = Strnew_size(8);
358
0
    }
359
360
0
    switch (st->state) {
361
0
    case WC_JOHAB_NOSTATE:
362
0
  switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) {
363
0
  case WC_JOHAB_MAP_UJ:
364
0
      johabu = c;
365
0
      st->state = WC_JOHAB_HANGUL1;
366
0
      return NULL;
367
0
  case WC_JOHAB_MAP_UH:
368
0
      johabu = c;
369
0
      st->state = WC_JOHAB_HANJA1;
370
0
      return NULL;
371
0
  case WC_JOHAB_MAP_C1:
372
0
      break;
373
0
  default:
374
0
      Strcat_char(os, (char)c);
375
0
      break;
376
0
  }
377
0
  break;
378
0
    case WC_JOHAB_HANGUL1:
379
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ)
380
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
381
0
  break;
382
0
    case WC_JOHAB_HANJA1:
383
0
  if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH)
384
0
      wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
385
0
  break;
386
0
    }
387
0
    st->state = -1;
388
0
    return os;
389
0
}