Coverage Report

Created: 2024-08-17 06:44

/src/libxslt/libxslt/xsltlocale.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * xsltlocale.c: locale handling
3
 *
4
 * Reference:
5
 * RFC 3066: Tags for the Identification of Languages
6
 * http://www.ietf.org/rfc/rfc3066.txt
7
 * ISO 639-1, ISO 3166-1
8
 *
9
 * Author: Nick Wellnhofer
10
 * winapi port: Roumen Petrov
11
 */
12
13
#define IN_LIBXSLT
14
#include "libxslt.h"
15
16
#include <string.h>
17
#include <libxml/xmlmemory.h>
18
19
#include "xsltlocale.h"
20
#include "xsltutils.h"
21
22
#ifdef HAVE_STRXFRM_L
23
24
  #define XSLT_LOCALE_POSIX
25
26
  #ifdef HAVE_LOCALE_H
27
    #include <locale.h>
28
  #endif
29
  #ifdef HAVE_XLOCALE_H
30
    #include <xlocale.h>
31
  #endif
32
33
#elif defined(_WIN32)
34
35
  #define XSLT_LOCALE_WINAPI
36
37
  #include <windows.h>
38
  #include <winnls.h>
39
40
#else
41
42
  #define XSLT_LOCALE_NONE
43
44
#endif
45
46
0
#define TOUPPER(c) (c & ~0x20)
47
0
#define TOLOWER(c) (c | 0x20)
48
0
#define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
49
50
/*without terminating null character*/
51
0
#define XSLTMAX_ISO639LANGLEN   8
52
0
#define XSLTMAX_ISO3166CNTRYLEN   8
53
          /* <lang>-<cntry> */
54
#define XSLTMAX_LANGTAGLEN    (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
55
56
static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
57
58
#ifdef XSLT_LOCALE_WINAPI
59
xmlRMutexPtr xsltLocaleMutex = NULL;
60
61
struct xsltRFC1766Info_s {
62
      /*note typedef unsigned char xmlChar !*/
63
    xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
64
    LCID       lcid;
65
};
66
typedef struct xsltRFC1766Info_s xsltRFC1766Info;
67
68
static int xsltLocaleListSize = 0;
69
static xsltRFC1766Info *xsltLocaleList = NULL;
70
71
72
static void *
73
xslt_locale_WINAPI(const xmlChar *languageTag) {
74
    int k;
75
    xsltRFC1766Info *p = xsltLocaleList;
76
77
    for (k=0; k<xsltLocaleListSize; k++, p++)
78
  if (xmlStrcmp(p->tag, languageTag) == 0)
79
            return(&p->lcid);
80
    return(NULL);
81
}
82
83
static void xsltEnumSupportedLocales(void);
84
#endif
85
86
/**
87
 * xsltFreeLocales:
88
 *
89
 * Cleanup function for the locale support on shutdown
90
 */
91
void
92
0
xsltFreeLocales(void) {
93
#ifdef XSLT_LOCALE_WINAPI
94
    xmlRMutexLock(xsltLocaleMutex);
95
    xmlFree(xsltLocaleList);
96
    xsltLocaleList = NULL;
97
    xmlRMutexUnlock(xsltLocaleMutex);
98
#endif
99
0
}
100
101
/**
102
 * xsltNewLocale:
103
 * @languageTag: RFC 3066 language tag
104
 *
105
 * Creates a new locale of an opaque system dependent type based on the
106
 * language tag.
107
 *
108
 * Returns the locale or NULL on error or if no matching locale was found
109
 */
110
void *
111
0
xsltNewLocale(const xmlChar *languageTag, int lowerFirst ATTRIBUTE_UNUSED) {
112
0
#ifdef XSLT_LOCALE_POSIX
113
0
    locale_t locale;
114
0
    char localeName[XSLTMAX_LANGTAGLEN+7]; /* 7 chars for ".UTF-8\0" */
115
0
    const xmlChar *p = languageTag;
116
0
    const char *region = NULL;
117
0
    char *q = localeName;
118
0
    int i, llen;
119
120
    /* Convert something like "pt-br" to "pt_BR.UTF-8" */
121
122
0
    if (languageTag == NULL)
123
0
  return(NULL);
124
125
0
    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
126
0
  *q++ = TOLOWER(*p++);
127
128
0
    if (i == 0)
129
0
  return(NULL);
130
131
0
    llen = i;
132
133
0
    if (*p) {
134
0
  if (*p++ != '-')
135
0
      return(NULL);
136
0
        *q++ = '_';
137
138
0
  for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
139
0
      *q++ = TOUPPER(*p++);
140
141
0
  if (i == 0 || *p)
142
0
      return(NULL);
143
144
0
        memcpy(q, ".UTF-8", 7);
145
0
        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
146
0
        if (locale != NULL)
147
0
            return(locale);
148
149
        /* Continue without using country code */
150
151
0
        q = localeName + llen;
152
0
    }
153
154
    /* Try locale without territory, e.g. for Esperanto (eo) */
155
156
0
    memcpy(q, ".UTF-8", 7);
157
0
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
158
0
    if (locale != NULL)
159
0
        return(locale);
160
161
    /* Try to find most common country for language */
162
163
0
    if (llen != 2)
164
0
        return(NULL);
165
166
0
    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
167
0
    if (region == NULL)
168
0
        return(NULL);
169
170
0
    q = localeName + llen;
171
0
    *q++ = '_';
172
0
    *q++ = region[0];
173
0
    *q++ = region[1];
174
0
    memcpy(q, ".UTF-8", 7);
175
0
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
176
177
0
    return(locale);
178
0
#endif
179
180
#ifdef XSLT_LOCALE_WINAPI
181
{
182
    void          *locale = NULL;
183
    xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
184
    xmlChar       *q = localeName;
185
    const xmlChar *p = languageTag;
186
    int           i, llen;
187
    const xmlChar *region = NULL;
188
189
    if (languageTag == NULL) goto end;
190
191
    xsltEnumSupportedLocales();
192
193
    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
194
  *q++ = TOLOWER(*p++);
195
    if (i == 0) goto end;
196
197
    llen = i;
198
    *q++ = '-';
199
    if (*p) { /*if country tag is given*/
200
  if (*p++ != '-') goto end;
201
202
  for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
203
      *q++ = TOUPPER(*p++);
204
  if (i == 0 || *p) goto end;
205
206
  *q = '\0';
207
  locale = xslt_locale_WINAPI(localeName);
208
  if (locale != (xsltLocale)0) goto end;
209
    }
210
    /* Try to find most common country for language */
211
    region = xsltDefaultRegion(localeName);
212
    if (region == NULL) goto end;
213
214
    strcpy((char *) localeName + llen + 1, (char *) region);
215
    locale = xslt_locale_WINAPI(localeName);
216
end:
217
    return(locale);
218
}
219
#endif
220
221
#ifdef XSLT_LOCALE_NONE
222
    return(NULL);
223
#endif
224
0
}
225
226
static const xmlChar*
227
0
xsltDefaultRegion(const xmlChar *localeName) {
228
0
    xmlChar c;
229
    /* region should be xmlChar, but gcc warns on all string assignments */
230
0
    const char *region = NULL;
231
232
0
    c = localeName[1];
233
    /* This is based on the locales from glibc 2.3.3 */
234
235
0
    switch (localeName[0]) {
236
0
        case 'a':
237
0
            if (c == 'a' || c == 'm') region = "ET";
238
0
            else if (c == 'f') region = "ZA";
239
0
            else if (c == 'n') region = "ES";
240
0
            else if (c == 'r') region = "AE";
241
0
            else if (c == 'z') region = "AZ";
242
0
            break;
243
0
        case 'b':
244
0
            if (c == 'e') region = "BY";
245
0
            else if (c == 'g') region = "BG";
246
0
            else if (c == 'n') region = "BD";
247
0
            else if (c == 'r') region = "FR";
248
0
            else if (c == 's') region = "BA";
249
0
            break;
250
0
        case 'c':
251
0
            if (c == 'a') region = "ES";
252
0
            else if (c == 's') region = "CZ";
253
0
            else if (c == 'y') region = "GB";
254
0
            break;
255
0
        case 'd':
256
0
            if (c == 'a') region = "DK";
257
0
            else if (c == 'e') region = "DE";
258
0
            break;
259
0
        case 'e':
260
0
            if (c == 'l') region = "GR";
261
0
            else if (c == 'n' || c == 'o') region = "US";
262
0
            else if (c == 's' || c == 'u') region = "ES";
263
0
            else if (c == 't') region = "EE";
264
0
            break;
265
0
        case 'f':
266
0
            if (c == 'a') region = "IR";
267
0
            else if (c == 'i') region = "FI";
268
0
            else if (c == 'o') region = "FO";
269
0
            else if (c == 'r') region = "FR";
270
0
            break;
271
0
        case 'g':
272
0
            if (c == 'a') region = "IE";
273
0
            else if (c == 'l') region = "ES";
274
0
            else if (c == 'v') region = "GB";
275
0
            break;
276
0
        case 'h':
277
0
            if (c == 'e') region = "IL";
278
0
            else if (c == 'i') region = "IN";
279
0
            else if (c == 'r') region = "HT";
280
0
            else if (c == 'u') region = "HU";
281
0
            break;
282
0
        case 'i':
283
0
            if (c == 'd') region = "ID";
284
0
            else if (c == 's') region = "IS";
285
0
            else if (c == 't') region = "IT";
286
0
            else if (c == 'w') region = "IL";
287
0
            break;
288
0
        case 'j':
289
0
            if (c == 'a') region = "JP";
290
0
            break;
291
0
        case 'k':
292
0
            if (c == 'l') region = "GL";
293
0
            else if (c == 'o') region = "KR";
294
0
            else if (c == 'w') region = "GB";
295
0
            break;
296
0
        case 'l':
297
0
            if (c == 't') region = "LT";
298
0
            else if (c == 'v') region = "LV";
299
0
            break;
300
0
        case 'm':
301
0
            if (c == 'k') region = "MK";
302
0
            else if (c == 'l' || c == 'r') region = "IN";
303
0
            else if (c == 'n') region = "MN";
304
0
            else if (c == 's') region = "MY";
305
0
            else if (c == 't') region = "MT";
306
0
            break;
307
0
        case 'n':
308
0
            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
309
0
            else if (c == 'e') region = "NP";
310
0
            else if (c == 'l') region = "NL";
311
0
            break;
312
0
        case 'o':
313
0
            if (c == 'm') region = "ET";
314
0
            break;
315
0
        case 'p':
316
0
            if (c == 'a') region = "IN";
317
0
            else if (c == 'l') region = "PL";
318
0
            else if (c == 't') region = "PT";
319
0
            break;
320
0
        case 'r':
321
0
            if (c == 'o') region = "RO";
322
0
            else if (c == 'u') region = "RU";
323
0
            break;
324
0
        case 's':
325
0
            switch (c) {
326
0
                case 'e': region = "NO"; break;
327
0
                case 'h': region = "YU"; break;
328
0
                case 'k': region = "SK"; break;
329
0
                case 'l': region = "SI"; break;
330
0
                case 'o': region = "ET"; break;
331
0
                case 'q': region = "AL"; break;
332
0
                case 't': region = "ZA"; break;
333
0
                case 'v': region = "SE"; break;
334
0
            }
335
0
            break;
336
0
        case 't':
337
0
            if (c == 'a' || c == 'e') region = "IN";
338
0
            else if (c == 'h') region = "TH";
339
0
            else if (c == 'i') region = "ER";
340
0
            else if (c == 'r') region = "TR";
341
0
            else if (c == 't') region = "RU";
342
0
            break;
343
0
        case 'u':
344
0
            if (c == 'k') region = "UA";
345
0
            else if (c == 'r') region = "PK";
346
0
            break;
347
0
        case 'v':
348
0
            if (c == 'i') region = "VN";
349
0
            break;
350
0
        case 'w':
351
0
            if (c == 'a') region = "BE";
352
0
            break;
353
0
        case 'x':
354
0
            if (c == 'h') region = "ZA";
355
0
            break;
356
0
        case 'z':
357
0
            if (c == 'h') region = "CN";
358
0
            else if (c == 'u') region = "ZA";
359
0
            break;
360
0
    }
361
0
    return((xmlChar *)region);
362
0
}
363
364
/**
365
 * xsltFreeLocale:
366
 * @locale: the locale to free
367
 *
368
 * Frees a locale created with xsltNewLocale
369
 */
370
void
371
0
xsltFreeLocale(void *locale) {
372
0
#ifdef XSLT_LOCALE_POSIX
373
0
    if (locale != NULL)
374
0
        freelocale(locale);
375
#else
376
    (void) locale;
377
#endif
378
0
}
379
380
/**
381
 * xsltStrxfrm:
382
 * @locale: locale created with xsltNewLocale
383
 * @string: UTF-8 string to transform
384
 *
385
 * Transforms a string according to locale. The transformed string must be
386
 * freed with xmlFree.
387
 *
388
 * Returns the transformed string or NULL on error
389
 */
390
xmlChar *
391
xsltStrxfrm(void *vlocale, const xmlChar *string)
392
0
{
393
#ifdef XSLT_LOCALE_NONE
394
    return(NULL);
395
#else
396
0
    xmlChar *xstr;
397
398
0
#ifdef XSLT_LOCALE_POSIX
399
0
    size_t xstrlen, r;
400
401
0
    xstrlen = strxfrm_l(NULL, (const char *)string, 0, vlocale) + 1;
402
0
    xstr = (xmlChar *) xmlMalloc(xstrlen);
403
0
    if (xstr == NULL) {
404
0
  xsltTransformError(NULL, NULL, NULL,
405
0
      "xsltStrxfrm : out of memory error\n");
406
0
  return(NULL);
407
0
    }
408
409
0
    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, vlocale);
410
411
0
    if (r >= xstrlen) {
412
0
  xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
413
0
        xmlFree(xstr);
414
0
        return(NULL);
415
0
    }
416
0
#endif
417
418
#ifdef XSLT_LOCALE_WINAPI
419
    int wstrlen, xstrlen, r;
420
    wchar_t *wstr;
421
    LCID *lcid = vlocale;
422
423
    wstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
424
    if (wstrlen == 0) {
425
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
426
        return(NULL);
427
    }
428
    wstr = (wchar_t *) xmlMalloc(wstrlen * sizeof(wchar_t));
429
    if (wstr == NULL) {
430
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
431
        return(NULL);
432
    }
433
    r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, wstr, wstrlen);
434
    if (r == 0) {
435
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
436
        xmlFree(wstr);
437
        return(NULL);
438
    }
439
    /* This returns the size in bytes. */
440
    xstrlen = LCMapStringW(*lcid, LCMAP_SORTKEY, wstr, wstrlen, NULL, 0);
441
    if (xstrlen == 0) {
442
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : LCMapStringW failed\n");
443
        xmlFree(wstr);
444
        return(NULL);
445
    }
446
    xstr = (xmlChar*) xmlMalloc(xstrlen);
447
    if (xstr == NULL) {
448
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
449
        xmlFree(wstr);
450
        return(NULL);
451
    }
452
    r = LCMapStringW(*lcid, LCMAP_SORTKEY, wstr, wstrlen, (wchar_t *) xstr,
453
                     xstrlen);
454
    xmlFree(wstr);
455
    if (r == 0) {
456
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : LCMapStringW failed\n");
457
        xmlFree(xstr);
458
        return(NULL);
459
    }
460
#endif /* XSLT_LOCALE_WINAPI */
461
462
0
    return(xstr);
463
0
#endif /* XSLT_LOCALE_NONE */
464
0
}
465
466
/**
467
 * xsltLocaleStrcmp:
468
 * @locale: unused
469
 * @str1: a string transformed with xsltStrxfrm
470
 * @str2: a string transformed with xsltStrxfrm
471
 *
472
 * DEPRECATED: Same as xmlStrcmp.
473
 *
474
 * Compares two strings transformed with xsltStrxfrm.
475
 *
476
 * Returns a value < 0 if str1 sorts before str2,
477
 *         a value > 0 if str1 sorts after str2,
478
 *         0 if str1 and str2 are equal wrt sorting
479
 */
480
int
481
0
xsltLocaleStrcmp(void *locale, const xmlChar *str1, const xmlChar *str2) {
482
0
    (void)locale;
483
0
    return(xmlStrcmp(str1, str2));
484
0
}
485
486
#ifdef XSLT_LOCALE_WINAPI
487
/**
488
 * xsltCountSupportedLocales:
489
 * @lcid: not used
490
 *
491
 * callback used to count locales
492
 *
493
 * Returns TRUE
494
 */
495
static BOOL CALLBACK
496
xsltCountSupportedLocales(LPSTR lcid) {
497
    (void) lcid;
498
    ++xsltLocaleListSize;
499
    return(TRUE);
500
}
501
502
/**
503
 * xsltIterateSupportedLocales:
504
 * @lcid: not used
505
 *
506
 * callback used to track locales
507
 *
508
 * Returns TRUE if not at the end of the array
509
 */
510
static BOOL CALLBACK
511
xsltIterateSupportedLocales(LPSTR lcid) {
512
    static int count = 0;
513
    xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
514
    xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
515
    int        k, l;
516
    xsltRFC1766Info *p = xsltLocaleList + count;
517
518
    k = sscanf(lcid, "%lx", (unsigned long*)&p->lcid);
519
    if (k < 1) goto end;
520
    /*don't count terminating null character*/
521
    k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
522
                       (char *) iso639lang, sizeof(iso639lang));
523
    if (--k < 1) goto end;
524
    l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
525
                       (char *) iso3136ctry, sizeof(iso3136ctry));
526
    if (--l < 1) goto end;
527
528
    {  /*fill results*/
529
  xmlChar    *q = p->tag;
530
  memcpy(q, iso639lang, k);
531
  q += k;
532
  *q++ = '-';
533
  memcpy(q, iso3136ctry, l);
534
  q += l;
535
  *q = '\0';
536
    }
537
    ++count;
538
end:
539
    return((count < xsltLocaleListSize) ? TRUE : FALSE);
540
}
541
542
543
static void
544
xsltEnumSupportedLocales(void) {
545
    xmlRMutexLock(xsltLocaleMutex);
546
    if (xsltLocaleListSize <= 0) {
547
  size_t len;
548
549
  EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
550
551
  len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
552
  xsltLocaleList = xmlMalloc(len);
553
  memset(xsltLocaleList, 0, len);
554
  EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
555
    }
556
    xmlRMutexUnlock(xsltLocaleMutex);
557
}
558
559
#endif /*def XSLT_LOCALE_WINAPI*/