/src/u-boot/lib/efi_loader/efi_unicode_collation.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: GPL-2.0+ |
2 | | /* |
3 | | * EFI Unicode collation protocol |
4 | | * |
5 | | * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de> |
6 | | */ |
7 | | |
8 | | #define LOG_CATEGORY LOGC_EFI |
9 | | |
10 | | #include <charset.h> |
11 | | #include <cp1250.h> |
12 | | #include <cp437.h> |
13 | | #include <efi_loader.h> |
14 | | |
15 | | /* Characters that may not be used in FAT 8.3 file names */ |
16 | | static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f"; |
17 | | |
18 | | /* |
19 | | * EDK2 assumes codepage 1250 when creating FAT 8.3 file names. |
20 | | * Linux defaults to codepage 437 for FAT 8.3 file names. |
21 | | */ |
22 | | #if CONFIG_FAT_DEFAULT_CODEPAGE == 1250 |
23 | | /* Unicode code points for code page 1250 characters 0x80 - 0xff */ |
24 | | static const u16 codepage[] = CP1250; |
25 | | #else |
26 | | /* Unicode code points for code page 437 characters 0x80 - 0xff */ |
27 | | static const u16 *codepage = codepage_437; |
28 | | #endif |
29 | | |
30 | | /* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */ |
31 | | const efi_guid_t efi_guid_unicode_collation_protocol2 = |
32 | | EFI_UNICODE_COLLATION_PROTOCOL2_GUID; |
33 | | |
34 | | /** |
35 | | * efi_stri_coll() - compare utf-16 strings case-insenitively |
36 | | * |
37 | | * @this: unicode collation protocol instance |
38 | | * @s1: first string |
39 | | * @s2: second string |
40 | | * |
41 | | * This function implements the StriColl() service of the |
42 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
43 | | * |
44 | | * See the Unified Extensible Firmware Interface (UEFI) specification for |
45 | | * details. |
46 | | * |
47 | | * Return: 0: s1 == s2, > 0: s1 > s2, < 0: s1 < s2 |
48 | | */ |
49 | | static efi_intn_t EFIAPI efi_stri_coll( |
50 | | struct efi_unicode_collation_protocol *this, u16 *s1, u16 *s2) |
51 | | { |
52 | | s32 c1, c2; |
53 | | efi_intn_t ret = 0; |
54 | | |
55 | | EFI_ENTRY("%p, %ls, %ls", this, s1, s2); |
56 | | for (; *s1 | *s2; ++s1, ++s2) { |
57 | | c1 = utf_to_upper(*s1); |
58 | | c2 = utf_to_upper(*s2); |
59 | | if (c1 < c2) { |
60 | | ret = -1; |
61 | | goto out; |
62 | | } else if (c1 > c2) { |
63 | | ret = 1; |
64 | | goto out; |
65 | | } |
66 | | } |
67 | | out: |
68 | | EFI_EXIT(EFI_SUCCESS); |
69 | | return ret; |
70 | | } |
71 | | |
72 | | /** |
73 | | * next_lower() - get next codepoint converted to lower case |
74 | | * |
75 | | * @string: pointer to u16 string, on return advanced by one codepoint |
76 | | * Return: first codepoint of string converted to lower case |
77 | | */ |
78 | | static s32 next_lower(const u16 **string) |
79 | 0 | { |
80 | 0 | return utf_to_lower(utf16_get(string)); |
81 | 0 | } |
82 | | |
83 | | /** |
84 | | * metai_match() - compare utf-16 string with a pattern string case-insenitively |
85 | | * |
86 | | * @string: string to compare |
87 | | * @pattern: pattern string |
88 | | * |
89 | | * The pattern string may use these: |
90 | | * - * matches >= 0 characters |
91 | | * - ? matches 1 character |
92 | | * - [<char1><char2>...<charN>] match any character in the set |
93 | | * - [<char1>-<char2>] matches any character in the range |
94 | | * |
95 | | * This function is called my efi_metai_match(). |
96 | | * |
97 | | * For '*' pattern searches this function calls itself recursively. |
98 | | * Performance-wise this is suboptimal, especially for multiple '*' wildcards. |
99 | | * But it results in simple code. |
100 | | * |
101 | | * Return: true if the string is matched. |
102 | | */ |
103 | | static bool metai_match(const u16 *string, const u16 *pattern) |
104 | 0 | { |
105 | 0 | s32 first, s, p; |
106 | |
|
107 | 0 | for (; *string && *pattern;) { |
108 | 0 | const u16 *string_old = string; |
109 | |
|
110 | 0 | s = next_lower(&string); |
111 | 0 | p = next_lower(&pattern); |
112 | |
|
113 | 0 | switch (p) { |
114 | 0 | case '*': |
115 | | /* Match 0 or more characters */ |
116 | 0 | for (;; s = next_lower(&string)) { |
117 | 0 | if (metai_match(string_old, pattern)) |
118 | 0 | return true; |
119 | 0 | if (!s) |
120 | 0 | return false; |
121 | 0 | string_old = string; |
122 | 0 | } |
123 | 0 | case '?': |
124 | | /* Match any one character */ |
125 | 0 | break; |
126 | 0 | case '[': |
127 | | /* Match any character in the set */ |
128 | 0 | p = next_lower(&pattern); |
129 | 0 | first = p; |
130 | 0 | if (first == ']') |
131 | | /* Empty set */ |
132 | 0 | return false; |
133 | 0 | p = next_lower(&pattern); |
134 | 0 | if (p == '-') { |
135 | | /* Range */ |
136 | 0 | p = next_lower(&pattern); |
137 | 0 | if (s < first || s > p) |
138 | 0 | return false; |
139 | 0 | p = next_lower(&pattern); |
140 | 0 | if (p != ']') |
141 | 0 | return false; |
142 | 0 | } else { |
143 | | /* Set */ |
144 | 0 | bool hit = false; |
145 | |
|
146 | 0 | if (s == first) |
147 | 0 | hit = true; |
148 | 0 | for (; p && p != ']'; |
149 | 0 | p = next_lower(&pattern)) { |
150 | 0 | if (p == s) |
151 | 0 | hit = true; |
152 | 0 | } |
153 | 0 | if (!hit || p != ']') |
154 | 0 | return false; |
155 | 0 | } |
156 | 0 | break; |
157 | 0 | default: |
158 | | /* Match one character */ |
159 | 0 | if (p != s) |
160 | 0 | return false; |
161 | 0 | } |
162 | 0 | } |
163 | 0 | if (!*pattern && !*string) |
164 | 0 | return true; |
165 | 0 | return false; |
166 | 0 | } |
167 | | |
168 | | /** |
169 | | * efi_metai_match() - compare utf-16 string with a pattern string |
170 | | * case-insenitively |
171 | | * |
172 | | * @this: unicode collation protocol instance |
173 | | * @string: string to compare |
174 | | * @pattern: pattern string |
175 | | * |
176 | | * The pattern string may use these: |
177 | | * - * matches >= 0 characters |
178 | | * - ? matches 1 character |
179 | | * - [<char1><char2>...<charN>] match any character in the set |
180 | | * - [<char1>-<char2>] matches any character in the range |
181 | | * |
182 | | * This function implements the MetaMatch() service of the |
183 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
184 | | * |
185 | | * Return: true if the string is matched. |
186 | | */ |
187 | | static bool EFIAPI efi_metai_match(struct efi_unicode_collation_protocol *this, |
188 | | const u16 *string, const u16 *pattern) |
189 | | { |
190 | | bool ret; |
191 | | |
192 | | EFI_ENTRY("%p, %ls, %ls", this, string, pattern); |
193 | | ret = metai_match(string, pattern); |
194 | | EFI_EXIT(EFI_SUCCESS); |
195 | | return ret; |
196 | | } |
197 | | |
198 | | /** |
199 | | * efi_str_lwr() - convert to lower case |
200 | | * |
201 | | * @this: unicode collation protocol instance |
202 | | * @string: string to convert |
203 | | * |
204 | | * The conversion is done in place. As long as upper and lower letters use the |
205 | | * same number of words this does not pose a problem. |
206 | | * |
207 | | * This function implements the StrLwr() service of the |
208 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
209 | | */ |
210 | | static void EFIAPI efi_str_lwr(struct efi_unicode_collation_protocol *this, |
211 | | u16 *string) |
212 | | { |
213 | | EFI_ENTRY("%p, %ls", this, string); |
214 | | for (; *string; ++string) |
215 | | *string = utf_to_lower(*string); |
216 | | EFI_EXIT(EFI_SUCCESS); |
217 | | } |
218 | | |
219 | | /** |
220 | | * efi_str_upr() - convert to upper case |
221 | | * |
222 | | * @this: unicode collation protocol instance |
223 | | * @string: string to convert |
224 | | * |
225 | | * The conversion is done in place. As long as upper and lower letters use the |
226 | | * same number of words this does not pose a problem. |
227 | | * |
228 | | * This function implements the StrUpr() service of the |
229 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
230 | | */ |
231 | | static void EFIAPI efi_str_upr(struct efi_unicode_collation_protocol *this, |
232 | | u16 *string) |
233 | | { |
234 | | EFI_ENTRY("%p, %ls", this, string); |
235 | | for (; *string; ++string) |
236 | | *string = utf_to_upper(*string); |
237 | | EFI_EXIT(EFI_SUCCESS); |
238 | | } |
239 | | |
240 | | /** |
241 | | * efi_fat_to_str() - convert an 8.3 file name from an OEM codepage to Unicode |
242 | | * |
243 | | * @this: unicode collation protocol instance |
244 | | * @fat_size: size of the string to convert |
245 | | * @fat: string to convert |
246 | | * @string: converted string |
247 | | * |
248 | | * This function implements the FatToStr() service of the |
249 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
250 | | */ |
251 | | static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this, |
252 | | efi_uintn_t fat_size, char *fat, u16 *string) |
253 | | { |
254 | | efi_uintn_t i; |
255 | | u16 c; |
256 | | |
257 | | EFI_ENTRY("%p, %zu, %s, %p", this, fat_size, fat, string); |
258 | | for (i = 0; i < fat_size; ++i) { |
259 | | c = (unsigned char)fat[i]; |
260 | | if (c > 0x80) |
261 | | c = codepage[c - 0x60]; |
262 | | string[i] = c; |
263 | | if (!c) |
264 | | break; |
265 | | } |
266 | | string[i] = 0; |
267 | | EFI_EXIT(EFI_SUCCESS); |
268 | | } |
269 | | |
270 | | /** |
271 | | * efi_str_to_fat() - convert a utf-16 string to legal characters for a FAT |
272 | | * file name in an OEM code page |
273 | | * |
274 | | * @this: unicode collation protocol instance |
275 | | * @string: Unicode string to convert |
276 | | * @fat_size: size of the target buffer |
277 | | * @fat: converted string |
278 | | * |
279 | | * This function implements the StrToFat() service of the |
280 | | * EFI_UNICODE_COLLATION_PROTOCOL2. |
281 | | * |
282 | | * Return: true if an illegal character was substituted by '_'. |
283 | | */ |
284 | | static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this, |
285 | | const u16 *string, efi_uintn_t fat_size, |
286 | | char *fat) |
287 | | { |
288 | | efi_uintn_t i; |
289 | | s32 c; |
290 | | bool ret = false; |
291 | | |
292 | | EFI_ENTRY("%p, %ls, %zu, %p", this, string, fat_size, fat); |
293 | | for (i = 0; i < fat_size;) { |
294 | | c = utf16_get(&string); |
295 | | switch (c) { |
296 | | /* Ignore period and space */ |
297 | | case '.': |
298 | | case ' ': |
299 | | continue; |
300 | | case 0: |
301 | | break; |
302 | | } |
303 | | c = utf_to_upper(c); |
304 | | if (utf_to_cp(&c, codepage) || |
305 | | (c && (c < 0x20 || strchr(illegal, c)))) { |
306 | | ret = true; |
307 | | c = '_'; |
308 | | } |
309 | | |
310 | | fat[i] = c; |
311 | | if (!c) |
312 | | break; |
313 | | ++i; |
314 | | } |
315 | | EFI_EXIT(EFI_SUCCESS); |
316 | | return ret; |
317 | | } |
318 | | |
319 | | const struct efi_unicode_collation_protocol efi_unicode_collation_protocol2 = { |
320 | | .stri_coll = efi_stri_coll, |
321 | | .metai_match = efi_metai_match, |
322 | | .str_lwr = efi_str_lwr, |
323 | | .str_upr = efi_str_upr, |
324 | | .fat_to_str = efi_fat_to_str, |
325 | | .str_to_fat = efi_str_to_fat, |
326 | | .supported_languages = "en", |
327 | | }; |