Coverage Report

Created: 2026-03-11 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/u-boot/lib/efi_loader/efi_unicode_collation.c
Line
Count
Source
1
// SPDX-License-Identifier: GPL-2.0+
2
/*
3
 * EFI Unicode collation protocol
4
 *
5
 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
6
 */
7
8
#define LOG_CATEGORY LOGC_EFI
9
10
#include <charset.h>
11
#include <cp1250.h>
12
#include <cp437.h>
13
#include <efi_loader.h>
14
15
/* Characters that may not be used in FAT 8.3 file names */
16
static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f";
17
18
/*
19
 * EDK2 assumes codepage 1250 when creating FAT 8.3 file names.
20
 * Linux defaults to codepage 437 for FAT 8.3 file names.
21
 */
22
#if CONFIG_FAT_DEFAULT_CODEPAGE == 1250
23
/* Unicode code points for code page 1250 characters 0x80 - 0xff */
24
static const u16 codepage[] = CP1250;
25
#else
26
/* Unicode code points for code page 437 characters 0x80 - 0xff */
27
static const u16 *codepage = codepage_437;
28
#endif
29
30
/* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */
31
const efi_guid_t efi_guid_unicode_collation_protocol2 =
32
  EFI_UNICODE_COLLATION_PROTOCOL2_GUID;
33
34
/**
35
 * efi_stri_coll() - compare utf-16 strings case-insenitively
36
 *
37
 * @this: unicode collation protocol instance
38
 * @s1:   first string
39
 * @s2:   second string
40
 *
41
 * This function implements the StriColl() service of the
42
 * EFI_UNICODE_COLLATION_PROTOCOL2.
43
 *
44
 * See the Unified Extensible Firmware Interface (UEFI) specification for
45
 * details.
46
 *
47
 * Return:  0: s1 == s2, > 0: s1 > s2, < 0: s1 < s2
48
 */
49
static efi_intn_t EFIAPI efi_stri_coll(
50
    struct efi_unicode_collation_protocol *this, u16 *s1, u16 *s2)
51
{
52
  s32 c1, c2;
53
  efi_intn_t ret = 0;
54
55
  EFI_ENTRY("%p, %ls, %ls", this, s1, s2);
56
  for (; *s1 | *s2; ++s1, ++s2) {
57
    c1 = utf_to_upper(*s1);
58
    c2 = utf_to_upper(*s2);
59
    if (c1 < c2) {
60
      ret = -1;
61
      goto out;
62
    } else if (c1 > c2) {
63
      ret = 1;
64
      goto out;
65
    }
66
  }
67
out:
68
  EFI_EXIT(EFI_SUCCESS);
69
  return ret;
70
}
71
72
/**
73
 * next_lower() - get next codepoint converted to lower case
74
 *
75
 * @string: pointer to u16 string, on return advanced by one codepoint
76
 * Return:  first codepoint of string converted to lower case
77
 */
78
static s32 next_lower(const u16 **string)
79
0
{
80
0
  return utf_to_lower(utf16_get(string));
81
0
}
82
83
/**
84
 * metai_match() - compare utf-16 string with a pattern string case-insenitively
85
 *
86
 * @string: string to compare
87
 * @pattern:  pattern string
88
 *
89
 * The pattern string may use these:
90
 *  - * matches >= 0 characters
91
 *  - ? matches 1 character
92
 *  - [<char1><char2>...<charN>] match any character in the set
93
 *  - [<char1>-<char2>] matches any character in the range
94
 *
95
 * This function is called my efi_metai_match().
96
 *
97
 * For '*' pattern searches this function calls itself recursively.
98
 * Performance-wise this is suboptimal, especially for multiple '*' wildcards.
99
 * But it results in simple code.
100
 *
101
 * Return:  true if the string is matched.
102
 */
103
static bool metai_match(const u16 *string, const u16 *pattern)
104
0
{
105
0
  s32 first, s, p;
106
107
0
  for (; *string && *pattern;) {
108
0
    const u16 *string_old = string;
109
110
0
    s = next_lower(&string);
111
0
    p = next_lower(&pattern);
112
113
0
    switch (p) {
114
0
    case '*':
115
      /* Match 0 or more characters */
116
0
      for (;; s = next_lower(&string)) {
117
0
        if (metai_match(string_old, pattern))
118
0
          return true;
119
0
        if (!s)
120
0
          return false;
121
0
        string_old = string;
122
0
      }
123
0
    case '?':
124
      /* Match any one character */
125
0
      break;
126
0
    case '[':
127
      /* Match any character in the set */
128
0
      p = next_lower(&pattern);
129
0
      first = p;
130
0
      if (first == ']')
131
        /* Empty set */
132
0
        return false;
133
0
      p = next_lower(&pattern);
134
0
      if (p == '-') {
135
        /* Range */
136
0
        p = next_lower(&pattern);
137
0
        if (s < first || s > p)
138
0
          return false;
139
0
        p = next_lower(&pattern);
140
0
        if (p != ']')
141
0
          return false;
142
0
      } else {
143
        /* Set */
144
0
        bool hit = false;
145
146
0
        if (s == first)
147
0
          hit = true;
148
0
        for (; p && p != ']';
149
0
             p = next_lower(&pattern)) {
150
0
          if (p == s)
151
0
            hit = true;
152
0
        }
153
0
        if (!hit || p != ']')
154
0
          return false;
155
0
      }
156
0
      break;
157
0
    default:
158
      /* Match one character */
159
0
      if (p != s)
160
0
        return false;
161
0
    }
162
0
  }
163
0
  if (!*pattern && !*string)
164
0
    return true;
165
0
  return false;
166
0
}
167
168
/**
169
 * efi_metai_match() - compare utf-16 string with a pattern string
170
 *           case-insenitively
171
 *
172
 * @this: unicode collation protocol instance
173
 * @string: string to compare
174
 * @pattern:  pattern string
175
 *
176
 * The pattern string may use these:
177
 *  - * matches >= 0 characters
178
 *  - ? matches 1 character
179
 *  - [<char1><char2>...<charN>] match any character in the set
180
 *  - [<char1>-<char2>] matches any character in the range
181
 *
182
 * This function implements the MetaMatch() service of the
183
 * EFI_UNICODE_COLLATION_PROTOCOL2.
184
 *
185
 * Return:  true if the string is matched.
186
 */
187
static bool EFIAPI efi_metai_match(struct efi_unicode_collation_protocol *this,
188
           const u16 *string, const u16 *pattern)
189
{
190
  bool ret;
191
192
  EFI_ENTRY("%p, %ls, %ls", this, string, pattern);
193
  ret =  metai_match(string, pattern);
194
  EFI_EXIT(EFI_SUCCESS);
195
  return ret;
196
}
197
198
/**
199
 * efi_str_lwr() - convert to lower case
200
 *
201
 * @this: unicode collation protocol instance
202
 * @string: string to convert
203
 *
204
 * The conversion is done in place. As long as upper and lower letters use the
205
 * same number of words this does not pose a problem.
206
 *
207
 * This function implements the StrLwr() service of the
208
 * EFI_UNICODE_COLLATION_PROTOCOL2.
209
 */
210
static void EFIAPI efi_str_lwr(struct efi_unicode_collation_protocol *this,
211
             u16 *string)
212
{
213
  EFI_ENTRY("%p, %ls", this, string);
214
  for (; *string; ++string)
215
    *string = utf_to_lower(*string);
216
  EFI_EXIT(EFI_SUCCESS);
217
}
218
219
/**
220
 * efi_str_upr() - convert to upper case
221
 *
222
 * @this: unicode collation protocol instance
223
 * @string: string to convert
224
 *
225
 * The conversion is done in place. As long as upper and lower letters use the
226
 * same number of words this does not pose a problem.
227
 *
228
 * This function implements the StrUpr() service of the
229
 * EFI_UNICODE_COLLATION_PROTOCOL2.
230
 */
231
static void EFIAPI efi_str_upr(struct efi_unicode_collation_protocol *this,
232
             u16 *string)
233
{
234
  EFI_ENTRY("%p, %ls", this, string);
235
  for (; *string; ++string)
236
    *string = utf_to_upper(*string);
237
  EFI_EXIT(EFI_SUCCESS);
238
}
239
240
/**
241
 * efi_fat_to_str() - convert an 8.3 file name from an OEM codepage to Unicode
242
 *
243
 * @this: unicode collation protocol instance
244
 * @fat_size: size of the string to convert
245
 * @fat:  string to convert
246
 * @string: converted string
247
 *
248
 * This function implements the FatToStr() service of the
249
 * EFI_UNICODE_COLLATION_PROTOCOL2.
250
 */
251
static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this,
252
          efi_uintn_t fat_size, char *fat, u16 *string)
253
{
254
  efi_uintn_t i;
255
  u16 c;
256
257
  EFI_ENTRY("%p, %zu, %s, %p", this, fat_size, fat, string);
258
  for (i = 0; i < fat_size; ++i) {
259
    c = (unsigned char)fat[i];
260
    if (c > 0x80)
261
      c = codepage[c - 0x60];
262
    string[i] = c;
263
    if (!c)
264
      break;
265
  }
266
  string[i] = 0;
267
  EFI_EXIT(EFI_SUCCESS);
268
}
269
270
/**
271
 * efi_str_to_fat() - convert a utf-16 string to legal characters for a FAT
272
 *                    file name in an OEM code page
273
 *
274
 * @this: unicode collation protocol instance
275
 * @string: Unicode string to convert
276
 * @fat_size: size of the target buffer
277
 * @fat:  converted string
278
 *
279
 * This function implements the StrToFat() service of the
280
 * EFI_UNICODE_COLLATION_PROTOCOL2.
281
 *
282
 * Return:  true if an illegal character was substituted by '_'.
283
 */
284
static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this,
285
          const u16 *string, efi_uintn_t fat_size,
286
          char *fat)
287
{
288
  efi_uintn_t i;
289
  s32 c;
290
  bool ret = false;
291
292
  EFI_ENTRY("%p, %ls, %zu, %p", this, string, fat_size, fat);
293
  for (i = 0; i < fat_size;) {
294
    c = utf16_get(&string);
295
    switch (c) {
296
    /* Ignore period and space */
297
    case '.':
298
    case ' ':
299
      continue;
300
    case 0:
301
      break;
302
    }
303
    c = utf_to_upper(c);
304
    if (utf_to_cp(&c, codepage) ||
305
        (c && (c < 0x20 || strchr(illegal, c)))) {
306
      ret = true;
307
      c = '_';
308
    }
309
310
    fat[i] = c;
311
    if (!c)
312
      break;
313
    ++i;
314
  }
315
  EFI_EXIT(EFI_SUCCESS);
316
  return ret;
317
}
318
319
const struct efi_unicode_collation_protocol efi_unicode_collation_protocol2 = {
320
  .stri_coll = efi_stri_coll,
321
  .metai_match = efi_metai_match,
322
  .str_lwr = efi_str_lwr,
323
  .str_upr = efi_str_upr,
324
  .fat_to_str = efi_fat_to_str,
325
  .str_to_fat = efi_str_to_fat,
326
  .supported_languages = "en",
327
};