Line | Count | Source |
1 | | //! Convert a string in IBM codepage 437 to UTF-8 |
2 | | |
3 | | /// Trait to convert IBM codepage 437 to the target type |
4 | | pub trait FromCp437 { |
5 | | /// Target type |
6 | | type Target; |
7 | | |
8 | | /// Function that does the conversion from cp437. |
9 | | /// Generally allocations will be avoided if all data falls into the ASCII range. |
10 | | #[allow(clippy::wrong_self_convention)] |
11 | | fn from_cp437(self) -> Self::Target; |
12 | | } |
13 | | |
14 | | impl<'a> FromCp437 for &'a [u8] { |
15 | | type Target = ::std::borrow::Cow<'a, str>; |
16 | | |
17 | 0 | fn from_cp437(self) -> Self::Target { |
18 | 0 | if self.iter().all(|c| *c < 0x80) { |
19 | 0 | ::std::str::from_utf8(self).unwrap().into() |
20 | | } else { |
21 | 0 | self.iter().map(|c| to_char(*c)).collect::<String>().into() |
22 | | } |
23 | 0 | } |
24 | | } |
25 | | |
26 | | impl FromCp437 for Vec<u8> { |
27 | | type Target = String; |
28 | | |
29 | 418k | fn from_cp437(self) -> Self::Target { |
30 | 14.6M | if self.iter().all(|c| *c < 0x80) { |
31 | 297k | String::from_utf8(self).unwrap() |
32 | | } else { |
33 | 121k | self.into_iter().map(to_char).collect() |
34 | | } |
35 | 418k | } |
36 | | } |
37 | | |
38 | 3.98M | fn to_char(input: u8) -> char { |
39 | 3.98M | let output = match input { |
40 | 3.98M | 0x00..=0x7f => input as u32, |
41 | 11.2k | 0x80 => 0x00c7, |
42 | 35.2k | 0x81 => 0x00fc, |
43 | 2.86k | 0x82 => 0x00e9, |
44 | 459 | 0x83 => 0x00e2, |
45 | 4.50k | 0x84 => 0x00e4, |
46 | 12.9k | 0x85 => 0x00e0, |
47 | 3.28k | 0x86 => 0x00e5, |
48 | 8.16k | 0x87 => 0x00e7, |
49 | 465 | 0x88 => 0x00ea, |
50 | 2.32k | 0x89 => 0x00eb, |
51 | 5.23k | 0x8a => 0x00e8, |
52 | 548 | 0x8b => 0x00ef, |
53 | 4.58k | 0x8c => 0x00ee, |
54 | 1.08k | 0x8d => 0x00ec, |
55 | 732 | 0x8e => 0x00c4, |
56 | 507 | 0x8f => 0x00c5, |
57 | 1.20k | 0x90 => 0x00c9, |
58 | 739 | 0x91 => 0x00e6, |
59 | 298 | 0x92 => 0x00c6, |
60 | 729 | 0x93 => 0x00f4, |
61 | 582 | 0x94 => 0x00f6, |
62 | 2.17k | 0x95 => 0x00f2, |
63 | 4.04k | 0x96 => 0x00fb, |
64 | 642 | 0x97 => 0x00f9, |
65 | 880 | 0x98 => 0x00ff, |
66 | 2.33k | 0x99 => 0x00d6, |
67 | 18.9k | 0x9a => 0x00dc, |
68 | 1.03k | 0x9b => 0x00a2, |
69 | 2.37k | 0x9c => 0x00a3, |
70 | 1.20k | 0x9d => 0x00a5, |
71 | 1.58k | 0x9e => 0x20a7, |
72 | 833 | 0x9f => 0x0192, |
73 | 5.01k | 0xa0 => 0x00e1, |
74 | 552 | 0xa1 => 0x00ed, |
75 | 14.2k | 0xa2 => 0x00f3, |
76 | 556 | 0xa3 => 0x00fa, |
77 | 6.16k | 0xa4 => 0x00f1, |
78 | 858 | 0xa5 => 0x00d1, |
79 | 2.76k | 0xa6 => 0x00aa, |
80 | 2.49k | 0xa7 => 0x00ba, |
81 | 595 | 0xa8 => 0x00bf, |
82 | 3.70k | 0xa9 => 0x2310, |
83 | 2.88k | 0xaa => 0x00ac, |
84 | 1.97k | 0xab => 0x00bd, |
85 | 8.45k | 0xac => 0x00bc, |
86 | 1.62k | 0xad => 0x00a1, |
87 | 4.50k | 0xae => 0x00ab, |
88 | 5.57k | 0xaf => 0x00bb, |
89 | 2.70k | 0xb0 => 0x2591, |
90 | 2.29k | 0xb1 => 0x2592, |
91 | 2.65k | 0xb2 => 0x2593, |
92 | 1.43k | 0xb3 => 0x2502, |
93 | 8.81k | 0xb4 => 0x2524, |
94 | 3.33k | 0xb5 => 0x2561, |
95 | 1.77k | 0xb6 => 0x2562, |
96 | 1.21k | 0xb7 => 0x2556, |
97 | 538 | 0xb8 => 0x2555, |
98 | 104k | 0xb9 => 0x2563, |
99 | 506 | 0xba => 0x2551, |
100 | 1.11k | 0xbb => 0x2557, |
101 | 28.9k | 0xbc => 0x255d, |
102 | 10.5k | 0xbd => 0x255c, |
103 | 864 | 0xbe => 0x255b, |
104 | 10.1k | 0xbf => 0x2510, |
105 | 2.91k | 0xc0 => 0x2514, |
106 | 2.86k | 0xc1 => 0x2534, |
107 | 4.29k | 0xc2 => 0x252c, |
108 | 13.5k | 0xc3 => 0x251c, |
109 | 8.82k | 0xc4 => 0x2500, |
110 | 1.21k | 0xc5 => 0x253c, |
111 | 127k | 0xc6 => 0x255e, |
112 | 481 | 0xc7 => 0x255f, |
113 | 755 | 0xc8 => 0x255a, |
114 | 1.30k | 0xc9 => 0x2554, |
115 | 739 | 0xca => 0x2569, |
116 | 3.79k | 0xcb => 0x2566, |
117 | 1.40k | 0xcc => 0x2560, |
118 | 8.32k | 0xcd => 0x2550, |
119 | 6.36k | 0xce => 0x256c, |
120 | 2.09k | 0xcf => 0x2567, |
121 | 2.10k | 0xd0 => 0x2568, |
122 | 427k | 0xd1 => 0x2564, |
123 | 936 | 0xd2 => 0x2565, |
124 | 3.15k | 0xd3 => 0x2559, |
125 | 4.89k | 0xd4 => 0x2558, |
126 | 1.50k | 0xd5 => 0x2552, |
127 | 1.58k | 0xd6 => 0x2553, |
128 | 1.00k | 0xd7 => 0x256b, |
129 | 415 | 0xd8 => 0x256a, |
130 | 2.12k | 0xd9 => 0x2518, |
131 | 3.38k | 0xda => 0x250c, |
132 | 1.97k | 0xdb => 0x2588, |
133 | 738 | 0xdc => 0x2584, |
134 | 474 | 0xdd => 0x258c, |
135 | 510 | 0xde => 0x2590, |
136 | 1.92k | 0xdf => 0x2580, |
137 | 5.49k | 0xe0 => 0x03b1, |
138 | 622 | 0xe1 => 0x00df, |
139 | 3.36k | 0xe2 => 0x0393, |
140 | 348 | 0xe3 => 0x03c0, |
141 | 595 | 0xe4 => 0x03a3, |
142 | 1.08k | 0xe5 => 0x03c3, |
143 | 7.64k | 0xe6 => 0x00b5, |
144 | 418 | 0xe7 => 0x03c4, |
145 | 2.92k | 0xe8 => 0x03a6, |
146 | 1.00k | 0xe9 => 0x0398, |
147 | 6.20k | 0xea => 0x03a9, |
148 | 362 | 0xeb => 0x03b4, |
149 | 506 | 0xec => 0x221e, |
150 | 957 | 0xed => 0x03c6, |
151 | 719 | 0xee => 0x03b5, |
152 | 8.88k | 0xef => 0x2229, |
153 | 844 | 0xf0 => 0x2261, |
154 | 4.26k | 0xf1 => 0x00b1, |
155 | 1.25k | 0xf2 => 0x2265, |
156 | 3.77k | 0xf3 => 0x2264, |
157 | 12.4k | 0xf4 => 0x2320, |
158 | 4.24k | 0xf5 => 0x2321, |
159 | 2.75k | 0xf6 => 0x00f7, |
160 | 7.47k | 0xf7 => 0x2248, |
161 | 7.36k | 0xf8 => 0x00b0, |
162 | 8.04k | 0xf9 => 0x2219, |
163 | 9.85k | 0xfa => 0x00b7, |
164 | 8.43k | 0xfb => 0x221a, |
165 | 5.55k | 0xfc => 0x207f, |
166 | 11.8k | 0xfd => 0x00b2, |
167 | 18.5k | 0xfe => 0x25a0, |
168 | 356k | 0xff => 0x00a0, |
169 | | }; |
170 | 3.98M | ::std::char::from_u32(output).unwrap() |
171 | 3.98M | } |
172 | | |
173 | | #[cfg(test)] |
174 | | mod test { |
175 | | #[test] |
176 | | fn to_char_valid() { |
177 | | for i in 0x00_u32..0x100 { |
178 | | super::to_char(i as u8); |
179 | | } |
180 | | } |
181 | | |
182 | | #[test] |
183 | | fn ascii() { |
184 | | for i in 0x00..0x80 { |
185 | | assert_eq!(super::to_char(i), i as char); |
186 | | } |
187 | | } |
188 | | |
189 | | #[test] |
190 | | fn example_slice() { |
191 | | use super::FromCp437; |
192 | | let data = b"Cura\x87ao"; |
193 | | assert!(::std::str::from_utf8(data).is_err()); |
194 | | assert_eq!(data.from_cp437(), "Curaçao"); |
195 | | } |
196 | | |
197 | | #[test] |
198 | | fn example_vec() { |
199 | | use super::FromCp437; |
200 | | let data = vec![0xCC, 0xCD, 0xCD, 0xB9]; |
201 | | assert!(String::from_utf8(data.clone()).is_err()); |
202 | | assert_eq!(&data.from_cp437(), "╠══╣"); |
203 | | } |
204 | | } |