Line | Count | Source |
1 | | //! Convert a string in IBM codepage 437 to UTF-8 |
2 | | |
3 | | /// Trait to convert IBM codepage 437 to the target type |
4 | | pub trait FromCp437 { |
5 | | /// Target type |
6 | | type Target; |
7 | | |
8 | | /// Function that does the conversion from cp437. |
9 | | /// Generally allocations will be avoided if all data falls into the ASCII range. |
10 | | #[allow(clippy::wrong_self_convention)] |
11 | | fn from_cp437(self) -> Self::Target; |
12 | | } |
13 | | |
14 | | impl<'a> FromCp437 for &'a [u8] { |
15 | | type Target = ::std::borrow::Cow<'a, str>; |
16 | | |
17 | 0 | fn from_cp437(self) -> Self::Target { |
18 | 0 | if self.iter().all(|c| *c < 0x80) { |
19 | 0 | ::std::str::from_utf8(self).unwrap().into() |
20 | | } else { |
21 | 0 | self.iter().map(|c| to_char(*c)).collect::<String>().into() |
22 | | } |
23 | 0 | } |
24 | | } |
25 | | |
26 | | impl FromCp437 for Vec<u8> { |
27 | | type Target = String; |
28 | | |
29 | 357k | fn from_cp437(self) -> Self::Target { |
30 | 14.8M | if self.iter().all(|c| *c < 0x80) { |
31 | 260k | String::from_utf8(self).unwrap() |
32 | | } else { |
33 | 97.5k | self.into_iter().map(to_char).collect() |
34 | | } |
35 | 357k | } |
36 | | } |
37 | | |
38 | 3.91M | fn to_char(input: u8) -> char { |
39 | 3.91M | let output = match input { |
40 | 3.91M | 0x00..=0x7f => input as u32, |
41 | 10.9k | 0x80 => 0x00c7, |
42 | 27.8k | 0x81 => 0x00fc, |
43 | 2.50k | 0x82 => 0x00e9, |
44 | 360 | 0x83 => 0x00e2, |
45 | 3.46k | 0x84 => 0x00e4, |
46 | 10.2k | 0x85 => 0x00e0, |
47 | 3.78k | 0x86 => 0x00e5, |
48 | 6.50k | 0x87 => 0x00e7, |
49 | 452 | 0x88 => 0x00ea, |
50 | 1.95k | 0x89 => 0x00eb, |
51 | 5.02k | 0x8a => 0x00e8, |
52 | 538 | 0x8b => 0x00ef, |
53 | 3.87k | 0x8c => 0x00ee, |
54 | 1.12k | 0x8d => 0x00ec, |
55 | 730 | 0x8e => 0x00c4, |
56 | 562 | 0x8f => 0x00c5, |
57 | 1.19k | 0x90 => 0x00c9, |
58 | 799 | 0x91 => 0x00e6, |
59 | 247 | 0x92 => 0x00c6, |
60 | 755 | 0x93 => 0x00f4, |
61 | 591 | 0x94 => 0x00f6, |
62 | 2.21k | 0x95 => 0x00f2, |
63 | 3.95k | 0x96 => 0x00fb, |
64 | 837 | 0x97 => 0x00f9, |
65 | 893 | 0x98 => 0x00ff, |
66 | 2.37k | 0x99 => 0x00d6, |
67 | 15.3k | 0x9a => 0x00dc, |
68 | 1.17k | 0x9b => 0x00a2, |
69 | 2.43k | 0x9c => 0x00a3, |
70 | 1.34k | 0x9d => 0x00a5, |
71 | 1.49k | 0x9e => 0x20a7, |
72 | 582 | 0x9f => 0x0192, |
73 | 5.31k | 0xa0 => 0x00e1, |
74 | 477 | 0xa1 => 0x00ed, |
75 | 12.0k | 0xa2 => 0x00f3, |
76 | 593 | 0xa3 => 0x00fa, |
77 | 5.06k | 0xa4 => 0x00f1, |
78 | 854 | 0xa5 => 0x00d1, |
79 | 2.60k | 0xa6 => 0x00aa, |
80 | 2.26k | 0xa7 => 0x00ba, |
81 | 602 | 0xa8 => 0x00bf, |
82 | 3.52k | 0xa9 => 0x2310, |
83 | 2.67k | 0xaa => 0x00ac, |
84 | 1.74k | 0xab => 0x00bd, |
85 | 7.71k | 0xac => 0x00bc, |
86 | 1.41k | 0xad => 0x00a1, |
87 | 4.59k | 0xae => 0x00ab, |
88 | 5.47k | 0xaf => 0x00bb, |
89 | 2.59k | 0xb0 => 0x2591, |
90 | 2.34k | 0xb1 => 0x2592, |
91 | 2.84k | 0xb2 => 0x2593, |
92 | 1.36k | 0xb3 => 0x2502, |
93 | 7.98k | 0xb4 => 0x2524, |
94 | 3.08k | 0xb5 => 0x2561, |
95 | 2.01k | 0xb6 => 0x2562, |
96 | 1.20k | 0xb7 => 0x2556, |
97 | 505 | 0xb8 => 0x2555, |
98 | 88.0k | 0xb9 => 0x2563, |
99 | 526 | 0xba => 0x2551, |
100 | 1.12k | 0xbb => 0x2557, |
101 | 30.3k | 0xbc => 0x255d, |
102 | 9.16k | 0xbd => 0x255c, |
103 | 847 | 0xbe => 0x255b, |
104 | 9.51k | 0xbf => 0x2510, |
105 | 2.77k | 0xc0 => 0x2514, |
106 | 2.70k | 0xc1 => 0x2534, |
107 | 4.52k | 0xc2 => 0x252c, |
108 | 13.5k | 0xc3 => 0x251c, |
109 | 9.00k | 0xc4 => 0x2500, |
110 | 1.09k | 0xc5 => 0x253c, |
111 | 150k | 0xc6 => 0x255e, |
112 | 534 | 0xc7 => 0x255f, |
113 | 782 | 0xc8 => 0x255a, |
114 | 1.23k | 0xc9 => 0x2554, |
115 | 804 | 0xca => 0x2569, |
116 | 4.03k | 0xcb => 0x2566, |
117 | 1.38k | 0xcc => 0x2560, |
118 | 8.73k | 0xcd => 0x2550, |
119 | 6.37k | 0xce => 0x256c, |
120 | 2.20k | 0xcf => 0x2567, |
121 | 2.01k | 0xd0 => 0x2568, |
122 | 427k | 0xd1 => 0x2564, |
123 | 1.15k | 0xd2 => 0x2565, |
124 | 3.28k | 0xd3 => 0x2559, |
125 | 4.83k | 0xd4 => 0x2558, |
126 | 1.58k | 0xd5 => 0x2552, |
127 | 1.39k | 0xd6 => 0x2553, |
128 | 999 | 0xd7 => 0x256b, |
129 | 421 | 0xd8 => 0x256a, |
130 | 1.80k | 0xd9 => 0x2518, |
131 | 2.90k | 0xda => 0x250c, |
132 | 2.41k | 0xdb => 0x2588, |
133 | 788 | 0xdc => 0x2584, |
134 | 718 | 0xdd => 0x258c, |
135 | 489 | 0xde => 0x2590, |
136 | 1.57k | 0xdf => 0x2580, |
137 | 4.49k | 0xe0 => 0x03b1, |
138 | 615 | 0xe1 => 0x00df, |
139 | 3.42k | 0xe2 => 0x0393, |
140 | 343 | 0xe3 => 0x03c0, |
141 | 557 | 0xe4 => 0x03a3, |
142 | 1.08k | 0xe5 => 0x03c3, |
143 | 6.93k | 0xe6 => 0x00b5, |
144 | 399 | 0xe7 => 0x03c4, |
145 | 2.46k | 0xe8 => 0x03a6, |
146 | 1.18k | 0xe9 => 0x0398, |
147 | 8.10k | 0xea => 0x03a9, |
148 | 409 | 0xeb => 0x03b4, |
149 | 498 | 0xec => 0x221e, |
150 | 922 | 0xed => 0x03c6, |
151 | 628 | 0xee => 0x03b5, |
152 | 8.44k | 0xef => 0x2229, |
153 | 781 | 0xf0 => 0x2261, |
154 | 4.69k | 0xf1 => 0x00b1, |
155 | 1.15k | 0xf2 => 0x2265, |
156 | 3.22k | 0xf3 => 0x2264, |
157 | 9.97k | 0xf4 => 0x2320, |
158 | 4.08k | 0xf5 => 0x2321, |
159 | 2.94k | 0xf6 => 0x00f7, |
160 | 7.60k | 0xf7 => 0x2248, |
161 | 6.46k | 0xf8 => 0x00b0, |
162 | 7.57k | 0xf9 => 0x2219, |
163 | 8.42k | 0xfa => 0x00b7, |
164 | 7.84k | 0xfb => 0x221a, |
165 | 4.99k | 0xfc => 0x207f, |
166 | 10.3k | 0xfd => 0x00b2, |
167 | 16.6k | 0xfe => 0x25a0, |
168 | 359k | 0xff => 0x00a0, |
169 | | }; |
170 | 3.91M | ::std::char::from_u32(output).unwrap() |
171 | 3.91M | } |
172 | | |
173 | | #[cfg(test)] |
174 | | mod test { |
175 | | #[test] |
176 | | fn to_char_valid() { |
177 | | for i in 0x00_u32..0x100 { |
178 | | super::to_char(i as u8); |
179 | | } |
180 | | } |
181 | | |
182 | | #[test] |
183 | | fn ascii() { |
184 | | for i in 0x00..0x80 { |
185 | | assert_eq!(super::to_char(i), i as char); |
186 | | } |
187 | | } |
188 | | |
189 | | #[test] |
190 | | fn example_slice() { |
191 | | use super::FromCp437; |
192 | | let data = b"Cura\x87ao"; |
193 | | assert!(::std::str::from_utf8(data).is_err()); |
194 | | assert_eq!(data.from_cp437(), "Curaçao"); |
195 | | } |
196 | | |
197 | | #[test] |
198 | | fn example_vec() { |
199 | | use super::FromCp437; |
200 | | let data = vec![0xCC, 0xCD, 0xCD, 0xB9]; |
201 | | assert!(String::from_utf8(data.clone()).is_err()); |
202 | | assert_eq!(&data.from_cp437(), "╠══╣"); |
203 | | } |
204 | | } |