Coverage Report

Created: 2025-06-22 06:55

/src/libxslt/libxslt/xsltlocale.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * xsltlocale.c: locale handling
3
 *
4
 * Reference:
5
 * RFC 3066: Tags for the Identification of Languages
6
 * http://www.ietf.org/rfc/rfc3066.txt
7
 * ISO 639-1, ISO 3166-1
8
 *
9
 * Author: Nick Wellnhofer
10
 * winapi port: Roumen Petrov
11
 */
12
13
#define IN_LIBXSLT
14
#include "libxslt.h"
15
16
#include <string.h>
17
#include <libxml/xmlmemory.h>
18
#include <libxml/threads.h>
19
20
#include "xsltlocale.h"
21
#include "xsltutils.h"
22
23
#ifdef HAVE_STRXFRM_L
24
25
  #define XSLT_LOCALE_POSIX
26
27
  #ifdef HAVE_LOCALE_H
28
    #include <locale.h>
29
  #endif
30
  #ifdef HAVE_XLOCALE_H
31
    #include <xlocale.h>
32
  #endif
33
34
#elif defined(_WIN32)
35
36
  #define XSLT_LOCALE_WINAPI
37
38
  #include <windows.h>
39
  #include <winnls.h>
40
41
#else
42
43
  #define XSLT_LOCALE_NONE
44
45
#endif
46
47
0
#define TOUPPER(c) (c & ~0x20)
48
0
#define TOLOWER(c) (c | 0x20)
49
0
#define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
50
51
/*without terminating null character*/
52
0
#define XSLTMAX_ISO639LANGLEN   8
53
0
#define XSLTMAX_ISO3166CNTRYLEN   8
54
          /* <lang>-<cntry> */
55
#define XSLTMAX_LANGTAGLEN    (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
56
57
static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
58
59
#ifdef XSLT_LOCALE_WINAPI
60
xmlRMutexPtr xsltLocaleMutex = NULL;
61
62
struct xsltRFC1766Info_s {
63
      /*note typedef unsigned char xmlChar !*/
64
    xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
65
    LCID       lcid;
66
};
67
typedef struct xsltRFC1766Info_s xsltRFC1766Info;
68
69
static int xsltLocaleListSize = 0;
70
static xsltRFC1766Info *xsltLocaleList = NULL;
71
72
73
static void *
74
xslt_locale_WINAPI(const xmlChar *languageTag) {
75
    int k;
76
    xsltRFC1766Info *p = xsltLocaleList;
77
78
    for (k=0; k<xsltLocaleListSize; k++, p++)
79
  if (xmlStrcmp(p->tag, languageTag) == 0)
80
            return(&p->lcid);
81
    return(NULL);
82
}
83
84
static void xsltEnumSupportedLocales(void);
85
#endif
86
87
/**
88
 * xsltFreeLocales:
89
 *
90
 * Cleanup function for the locale support on shutdown
91
 */
92
void
93
0
xsltFreeLocales(void) {
94
#ifdef XSLT_LOCALE_WINAPI
95
    xmlRMutexLock(xsltLocaleMutex);
96
    xmlFree(xsltLocaleList);
97
    xsltLocaleList = NULL;
98
    xmlRMutexUnlock(xsltLocaleMutex);
99
#endif
100
0
}
101
102
/**
103
 * xsltNewLocale:
104
 * @languageTag: RFC 3066 language tag
105
 *
106
 * Creates a new locale of an opaque system dependent type based on the
107
 * language tag.
108
 *
109
 * Returns the locale or NULL on error or if no matching locale was found
110
 */
111
void *
112
0
xsltNewLocale(const xmlChar *languageTag, int lowerFirst ATTRIBUTE_UNUSED) {
113
0
#ifdef XSLT_LOCALE_POSIX
114
0
    locale_t locale;
115
0
    char localeName[XSLTMAX_LANGTAGLEN+7]; /* 7 chars for ".UTF-8\0" */
116
0
    const xmlChar *p = languageTag;
117
0
    const char *region = NULL;
118
0
    char *q = localeName;
119
0
    int i, llen;
120
121
    /* Convert something like "pt-br" to "pt_BR.UTF-8" */
122
123
0
    if (languageTag == NULL)
124
0
  return(NULL);
125
126
0
    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
127
0
  *q++ = TOLOWER(*p++);
128
129
0
    if (i == 0)
130
0
  return(NULL);
131
132
0
    llen = i;
133
134
0
    if (*p) {
135
0
  if (*p++ != '-')
136
0
      return(NULL);
137
0
        *q++ = '_';
138
139
0
  for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
140
0
      *q++ = TOUPPER(*p++);
141
142
0
  if (i == 0 || *p)
143
0
      return(NULL);
144
145
0
        memcpy(q, ".UTF-8", 7);
146
0
        locale = newlocale(LC_ALL_MASK, localeName, NULL);
147
0
        if (locale != NULL)
148
0
            return(locale);
149
150
        /* Continue without using country code */
151
152
0
        q = localeName + llen;
153
0
    }
154
155
    /* Try locale without territory, e.g. for Esperanto (eo) */
156
157
0
    memcpy(q, ".UTF-8", 7);
158
0
    locale = newlocale(LC_ALL_MASK, localeName, NULL);
159
0
    if (locale != NULL)
160
0
        return(locale);
161
162
    /* Try to find most common country for language */
163
164
0
    if (llen != 2)
165
0
        return(NULL);
166
167
0
    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
168
0
    if (region == NULL)
169
0
        return(NULL);
170
171
0
    q = localeName + llen;
172
0
    *q++ = '_';
173
0
    *q++ = region[0];
174
0
    *q++ = region[1];
175
0
    memcpy(q, ".UTF-8", 7);
176
0
    locale = newlocale(LC_ALL_MASK, localeName, NULL);
177
178
0
    return(locale);
179
0
#endif
180
181
#ifdef XSLT_LOCALE_WINAPI
182
{
183
    void          *locale = NULL;
184
    xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
185
    xmlChar       *q = localeName;
186
    const xmlChar *p = languageTag;
187
    int           i, llen;
188
    const xmlChar *region = NULL;
189
190
    if (languageTag == NULL) goto end;
191
192
    xsltEnumSupportedLocales();
193
194
    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
195
  *q++ = TOLOWER(*p++);
196
    if (i == 0) goto end;
197
198
    llen = i;
199
    *q++ = '-';
200
    if (*p) { /*if country tag is given*/
201
  if (*p++ != '-') goto end;
202
203
  for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
204
      *q++ = TOUPPER(*p++);
205
  if (i == 0 || *p) goto end;
206
207
  *q = '\0';
208
  locale = xslt_locale_WINAPI(localeName);
209
  if (locale != (xsltLocale)0) goto end;
210
    }
211
    /* Try to find most common country for language */
212
    region = xsltDefaultRegion(localeName);
213
    if (region == NULL) goto end;
214
215
    strcpy((char *) localeName + llen + 1, (char *) region);
216
    locale = xslt_locale_WINAPI(localeName);
217
end:
218
    return(locale);
219
}
220
#endif
221
222
#ifdef XSLT_LOCALE_NONE
223
    return(NULL);
224
#endif
225
0
}
226
227
static const xmlChar*
228
0
xsltDefaultRegion(const xmlChar *localeName) {
229
0
    xmlChar c;
230
    /* region should be xmlChar, but gcc warns on all string assignments */
231
0
    const char *region = NULL;
232
233
0
    c = localeName[1];
234
    /* This is based on the locales from glibc 2.3.3 */
235
236
0
    switch (localeName[0]) {
237
0
        case 'a':
238
0
            if (c == 'a' || c == 'm') region = "ET";
239
0
            else if (c == 'f') region = "ZA";
240
0
            else if (c == 'n') region = "ES";
241
0
            else if (c == 'r') region = "AE";
242
0
            else if (c == 'z') region = "AZ";
243
0
            break;
244
0
        case 'b':
245
0
            if (c == 'e') region = "BY";
246
0
            else if (c == 'g') region = "BG";
247
0
            else if (c == 'n') region = "BD";
248
0
            else if (c == 'r') region = "FR";
249
0
            else if (c == 's') region = "BA";
250
0
            break;
251
0
        case 'c':
252
0
            if (c == 'a') region = "ES";
253
0
            else if (c == 's') region = "CZ";
254
0
            else if (c == 'y') region = "GB";
255
0
            break;
256
0
        case 'd':
257
0
            if (c == 'a') region = "DK";
258
0
            else if (c == 'e') region = "DE";
259
0
            break;
260
0
        case 'e':
261
0
            if (c == 'l') region = "GR";
262
0
            else if (c == 'n' || c == 'o') region = "US";
263
0
            else if (c == 's' || c == 'u') region = "ES";
264
0
            else if (c == 't') region = "EE";
265
0
            break;
266
0
        case 'f':
267
0
            if (c == 'a') region = "IR";
268
0
            else if (c == 'i') region = "FI";
269
0
            else if (c == 'o') region = "FO";
270
0
            else if (c == 'r') region = "FR";
271
0
            break;
272
0
        case 'g':
273
0
            if (c == 'a') region = "IE";
274
0
            else if (c == 'l') region = "ES";
275
0
            else if (c == 'v') region = "GB";
276
0
            break;
277
0
        case 'h':
278
0
            if (c == 'e') region = "IL";
279
0
            else if (c == 'i') region = "IN";
280
0
            else if (c == 'r') region = "HT";
281
0
            else if (c == 'u') region = "HU";
282
0
            break;
283
0
        case 'i':
284
0
            if (c == 'd') region = "ID";
285
0
            else if (c == 's') region = "IS";
286
0
            else if (c == 't') region = "IT";
287
0
            else if (c == 'w') region = "IL";
288
0
            break;
289
0
        case 'j':
290
0
            if (c == 'a') region = "JP";
291
0
            break;
292
0
        case 'k':
293
0
            if (c == 'l') region = "GL";
294
0
            else if (c == 'o') region = "KR";
295
0
            else if (c == 'w') region = "GB";
296
0
            break;
297
0
        case 'l':
298
0
            if (c == 't') region = "LT";
299
0
            else if (c == 'v') region = "LV";
300
0
            break;
301
0
        case 'm':
302
0
            if (c == 'k') region = "MK";
303
0
            else if (c == 'l' || c == 'r') region = "IN";
304
0
            else if (c == 'n') region = "MN";
305
0
            else if (c == 's') region = "MY";
306
0
            else if (c == 't') region = "MT";
307
0
            break;
308
0
        case 'n':
309
0
            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
310
0
            else if (c == 'e') region = "NP";
311
0
            else if (c == 'l') region = "NL";
312
0
            break;
313
0
        case 'o':
314
0
            if (c == 'm') region = "ET";
315
0
            break;
316
0
        case 'p':
317
0
            if (c == 'a') region = "IN";
318
0
            else if (c == 'l') region = "PL";
319
0
            else if (c == 't') region = "PT";
320
0
            break;
321
0
        case 'r':
322
0
            if (c == 'o') region = "RO";
323
0
            else if (c == 'u') region = "RU";
324
0
            break;
325
0
        case 's':
326
0
            switch (c) {
327
0
                case 'e': region = "NO"; break;
328
0
                case 'h': region = "YU"; break;
329
0
                case 'k': region = "SK"; break;
330
0
                case 'l': region = "SI"; break;
331
0
                case 'o': region = "ET"; break;
332
0
                case 'q': region = "AL"; break;
333
0
                case 't': region = "ZA"; break;
334
0
                case 'v': region = "SE"; break;
335
0
            }
336
0
            break;
337
0
        case 't':
338
0
            if (c == 'a' || c == 'e') region = "IN";
339
0
            else if (c == 'h') region = "TH";
340
0
            else if (c == 'i') region = "ER";
341
0
            else if (c == 'r') region = "TR";
342
0
            else if (c == 't') region = "RU";
343
0
            break;
344
0
        case 'u':
345
0
            if (c == 'k') region = "UA";
346
0
            else if (c == 'r') region = "PK";
347
0
            break;
348
0
        case 'v':
349
0
            if (c == 'i') region = "VN";
350
0
            break;
351
0
        case 'w':
352
0
            if (c == 'a') region = "BE";
353
0
            break;
354
0
        case 'x':
355
0
            if (c == 'h') region = "ZA";
356
0
            break;
357
0
        case 'z':
358
0
            if (c == 'h') region = "CN";
359
0
            else if (c == 'u') region = "ZA";
360
0
            break;
361
0
    }
362
0
    return((xmlChar *)region);
363
0
}
364
365
/**
366
 * xsltFreeLocale:
367
 * @locale: the locale to free
368
 *
369
 * Frees a locale created with xsltNewLocale
370
 */
371
void
372
0
xsltFreeLocale(void *locale) {
373
0
#ifdef XSLT_LOCALE_POSIX
374
0
    if (locale != NULL)
375
0
        freelocale(locale);
376
#else
377
    (void) locale;
378
#endif
379
0
}
380
381
/**
382
 * xsltStrxfrm:
383
 * @locale: locale created with xsltNewLocale
384
 * @string: UTF-8 string to transform
385
 *
386
 * Transforms a string according to locale. The transformed string must be
387
 * freed with xmlFree.
388
 *
389
 * Returns the transformed string or NULL on error
390
 */
391
xmlChar *
392
xsltStrxfrm(void *vlocale, const xmlChar *string)
393
0
{
394
#ifdef XSLT_LOCALE_NONE
395
    return(NULL);
396
#else
397
0
    xmlChar *xstr;
398
399
0
#ifdef XSLT_LOCALE_POSIX
400
0
    size_t xstrlen, r;
401
402
0
    xstrlen = strxfrm_l(NULL, (const char *)string, 0, vlocale) + 1;
403
0
    xstr = (xmlChar *) xmlMalloc(xstrlen);
404
0
    if (xstr == NULL) {
405
0
  xsltTransformError(NULL, NULL, NULL,
406
0
      "xsltStrxfrm : out of memory error\n");
407
0
  return(NULL);
408
0
    }
409
410
0
    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, vlocale);
411
412
0
    if (r >= xstrlen) {
413
0
  xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
414
0
        xmlFree(xstr);
415
0
        return(NULL);
416
0
    }
417
0
#endif
418
419
#ifdef XSLT_LOCALE_WINAPI
420
    int wstrlen, xstrlen, r;
421
    wchar_t *wstr;
422
    LCID *lcid = vlocale;
423
424
    wstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
425
    if (wstrlen == 0) {
426
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
427
        return(NULL);
428
    }
429
    wstr = (wchar_t *) xmlMalloc(wstrlen * sizeof(wchar_t));
430
    if (wstr == NULL) {
431
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
432
        return(NULL);
433
    }
434
    r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, wstr, wstrlen);
435
    if (r == 0) {
436
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
437
        xmlFree(wstr);
438
        return(NULL);
439
    }
440
    /* This returns the size in bytes. */
441
    xstrlen = LCMapStringW(*lcid, LCMAP_SORTKEY, wstr, wstrlen, NULL, 0);
442
    if (xstrlen == 0) {
443
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : LCMapStringW failed\n");
444
        xmlFree(wstr);
445
        return(NULL);
446
    }
447
    xstr = (xmlChar*) xmlMalloc(xstrlen);
448
    if (xstr == NULL) {
449
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
450
        xmlFree(wstr);
451
        return(NULL);
452
    }
453
    r = LCMapStringW(*lcid, LCMAP_SORTKEY, wstr, wstrlen, (wchar_t *) xstr,
454
                     xstrlen);
455
    xmlFree(wstr);
456
    if (r == 0) {
457
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : LCMapStringW failed\n");
458
        xmlFree(xstr);
459
        return(NULL);
460
    }
461
#endif /* XSLT_LOCALE_WINAPI */
462
463
0
    return(xstr);
464
0
#endif /* XSLT_LOCALE_NONE */
465
0
}
466
467
/**
468
 * xsltLocaleStrcmp:
469
 * @locale: unused
470
 * @str1: a string transformed with xsltStrxfrm
471
 * @str2: a string transformed with xsltStrxfrm
472
 *
473
 * DEPRECATED: Same as xmlStrcmp.
474
 *
475
 * Compares two strings transformed with xsltStrxfrm.
476
 *
477
 * Returns a value < 0 if str1 sorts before str2,
478
 *         a value > 0 if str1 sorts after str2,
479
 *         0 if str1 and str2 are equal wrt sorting
480
 */
481
int
482
0
xsltLocaleStrcmp(void *locale, const xmlChar *str1, const xmlChar *str2) {
483
0
    (void)locale;
484
0
    return(xmlStrcmp(str1, str2));
485
0
}
486
487
#ifdef XSLT_LOCALE_WINAPI
488
/**
489
 * xsltCountSupportedLocales:
490
 * @lcid: not used
491
 *
492
 * callback used to count locales
493
 *
494
 * Returns TRUE
495
 */
496
static BOOL CALLBACK
497
xsltCountSupportedLocales(LPSTR lcid) {
498
    (void) lcid;
499
    ++xsltLocaleListSize;
500
    return(TRUE);
501
}
502
503
/**
504
 * xsltIterateSupportedLocales:
505
 * @lcid: not used
506
 *
507
 * callback used to track locales
508
 *
509
 * Returns TRUE if not at the end of the array
510
 */
511
static BOOL CALLBACK
512
xsltIterateSupportedLocales(LPSTR lcid) {
513
    static int count = 0;
514
    xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
515
    xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
516
    int        k, l;
517
    xsltRFC1766Info *p = xsltLocaleList + count;
518
519
    k = sscanf(lcid, "%lx", (unsigned long*)&p->lcid);
520
    if (k < 1) goto end;
521
    /*don't count terminating null character*/
522
    k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
523
                       (char *) iso639lang, sizeof(iso639lang));
524
    if (--k < 1) goto end;
525
    l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
526
                       (char *) iso3136ctry, sizeof(iso3136ctry));
527
    if (--l < 1) goto end;
528
529
    {  /*fill results*/
530
  xmlChar    *q = p->tag;
531
  memcpy(q, iso639lang, k);
532
  q += k;
533
  *q++ = '-';
534
  memcpy(q, iso3136ctry, l);
535
  q += l;
536
  *q = '\0';
537
    }
538
    ++count;
539
end:
540
    return((count < xsltLocaleListSize) ? TRUE : FALSE);
541
}
542
543
544
static void
545
xsltEnumSupportedLocales(void) {
546
    xmlRMutexLock(xsltLocaleMutex);
547
    if (xsltLocaleListSize <= 0) {
548
  size_t len;
549
550
  EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
551
552
  len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
553
  xsltLocaleList = xmlMalloc(len);
554
  memset(xsltLocaleList, 0, len);
555
  EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
556
    }
557
    xmlRMutexUnlock(xsltLocaleMutex);
558
}
559
560
#endif /*def XSLT_LOCALE_WINAPI*/