Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include "wc.h" |
3 | | #include "wtf.h" |
4 | | #include "iso2022.h" |
5 | | #include "hz.h" |
6 | | #ifdef USE_UNICODE |
7 | | #include "ucs.h" |
8 | | #include "utf8.h" |
9 | | #include "utf7.h" |
10 | | #endif |
11 | | |
12 | | char *WcReplace = "?"; |
13 | | char *WcReplaceW = "??"; |
14 | | |
15 | | static Str wc_conv_to_ces(Str is, wc_ces ces); |
16 | | |
17 | | Str |
18 | | wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces) |
19 | 10.6k | { |
20 | 10.6k | if (f_ces != WC_CES_WTF) |
21 | 10.6k | is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces); |
22 | 10.6k | if (t_ces != WC_CES_WTF) |
23 | 10.6k | return wc_conv_to_ces(is, t_ces); |
24 | 0 | else |
25 | 0 | return is; |
26 | 10.6k | } |
27 | | |
28 | | Str |
29 | | wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces) |
30 | 0 | { |
31 | 0 | Str os; |
32 | 0 | wc_option opt = WcOption; |
33 | |
|
34 | 0 | WcOption.strict_iso2022 = WC_TRUE; |
35 | 0 | WcOption.no_replace = WC_TRUE; |
36 | 0 | WcOption.fix_width_conv = WC_FALSE; |
37 | 0 | os = wc_Str_conv(is, f_ces, t_ces); |
38 | 0 | WcOption = opt; |
39 | 0 | return os; |
40 | 0 | } |
41 | | |
42 | | static Str |
43 | | wc_conv_to_ces(Str is, wc_ces ces) |
44 | 10.6k | { |
45 | 10.6k | Str os; |
46 | 10.6k | wc_uchar *sp = (wc_uchar *)is->ptr; |
47 | 10.6k | wc_uchar *ep = sp + is->length; |
48 | 10.6k | wc_uchar *p; |
49 | 10.6k | wc_status st; |
50 | | |
51 | 10.6k | switch (ces) { |
52 | 371 | case WC_CES_HZ_GB_2312: |
53 | 1.45k | for (p = sp; p < ep && *p != '~' && *p < 0x80; p++) |
54 | 1.07k | ; |
55 | 371 | break; |
56 | 64 | case WC_CES_TCVN_5712: |
57 | 270 | case WC_CES_VISCII_11: |
58 | 311 | case WC_CES_VPS: |
59 | 15.5k | for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++) |
60 | 15.1k | ; |
61 | 311 | break; |
62 | 9.97k | default: |
63 | 1.37M | for (p = sp; p < ep && *p < 0x80; p++) |
64 | 1.36M | ; |
65 | 9.97k | break; |
66 | 10.6k | } |
67 | 10.6k | if (p == ep) |
68 | 2.05k | return is; |
69 | | |
70 | 8.60k | os = Strnew_size(is->length); |
71 | 8.60k | if (p > sp) |
72 | 2.08k | p--; /* for precompose */ |
73 | 8.60k | if (p > sp) |
74 | 1.29k | Strcat_charp_n(os, is->ptr, (int)(p - sp)); |
75 | | |
76 | 8.60k | wc_output_init(ces, &st); |
77 | | |
78 | 8.60k | switch (ces) { |
79 | 116 | case WC_CES_ISO_2022_JP: |
80 | 207 | case WC_CES_ISO_2022_JP_2: |
81 | 209 | case WC_CES_ISO_2022_JP_3: |
82 | 481 | case WC_CES_ISO_2022_CN: |
83 | 703 | case WC_CES_ISO_2022_KR: |
84 | 1.05k | case WC_CES_HZ_GB_2312: |
85 | 1.10k | case WC_CES_TCVN_5712: |
86 | 1.26k | case WC_CES_VISCII_11: |
87 | 1.30k | case WC_CES_VPS: |
88 | 1.30k | #ifdef USE_UNICODE |
89 | 1.76k | case WC_CES_UTF_8: |
90 | 2.39k | case WC_CES_UTF_7: |
91 | 2.39k | #endif |
92 | 83.4M | while (p < ep) |
93 | 83.4M | (*st.ces_info->push_to)(os, wtf_parse(&p), &st); |
94 | 2.39k | break; |
95 | 6.21k | default: |
96 | 94.0M | while (p < ep) { |
97 | 94.0M | if (*p < 0x80 && wtf_width(p + 1)) { |
98 | 9.55M | Strcat_char(os, (char)*p); |
99 | 9.55M | p++; |
100 | 9.55M | } else |
101 | 84.4M | (*st.ces_info->push_to)(os, wtf_parse(&p), &st); |
102 | 94.0M | } |
103 | 6.21k | break; |
104 | 8.60k | } |
105 | | |
106 | 8.60k | wc_push_end(os, &st); |
107 | | |
108 | 8.60k | return os; |
109 | 8.60k | } |
110 | | |
111 | | Str |
112 | | wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces) |
113 | 10.6k | { |
114 | 10.6k | wc_ces detect; |
115 | | |
116 | 10.6k | if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) { |
117 | 0 | *f_ces = WC_CES_WTF; |
118 | 0 | detect = WC_CES_WTF; |
119 | 10.6k | } else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) { |
120 | 0 | *f_ces = hint; |
121 | 0 | detect = hint; |
122 | 10.6k | } else { |
123 | 10.6k | if (*f_ces & WC_CES_T_8BIT) |
124 | 7.43k | hint = *f_ces; |
125 | 10.6k | detect = wc_auto_detect(is->ptr, is->length, hint); |
126 | 10.6k | if (WcOption.auto_detect == WC_OPT_DETECT_ON) { |
127 | 10.6k | if ((detect & WC_CES_T_8BIT) || |
128 | 10.6k | ((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT))) |
129 | 9.51k | *f_ces = detect; |
130 | 10.6k | } else { |
131 | 0 | if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT)) |
132 | 0 | *f_ces = detect; |
133 | 0 | } |
134 | 10.6k | } |
135 | 10.6k | return wc_Str_conv(is, detect, t_ces); |
136 | 10.6k | } |
137 | | |
138 | | void |
139 | | wc_push_end(Str os, wc_status *st) |
140 | 8.60k | { |
141 | 8.60k | if (st->ces_info->id & WC_CES_T_ISO_2022) |
142 | 703 | wc_push_to_iso2022_end(os, st); |
143 | 7.90k | else if (st->ces_info->id == WC_CES_HZ_GB_2312) |
144 | 347 | wc_push_to_hz_end(os, st); |
145 | 7.55k | #ifdef USE_UNICODE |
146 | 7.55k | else if (st->ces_info->id == WC_CES_UTF_8) |
147 | 465 | wc_push_to_utf8_end(os, st); |
148 | 7.08k | else if (st->ces_info->id == WC_CES_UTF_7) |
149 | 623 | wc_push_to_utf7_end(os, st); |
150 | 8.60k | #endif |
151 | 8.60k | } |
152 | | |