Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/loclikely.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1997-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  loclikely.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2010feb25
16
*   created by: Markus W. Scherer
17
*
18
*   Code for likely and minimized locale subtags, separated out from other .cpp files
19
*   that then do not depend on resource bundle code and likely-subtags data.
20
*/
21
22
#include "unicode/utypes.h"
23
#include "unicode/locid.h"
24
#include "unicode/putil.h"
25
#include "unicode/uchar.h"
26
#include "unicode/uloc.h"
27
#include "unicode/ures.h"
28
#include "unicode/uscript.h"
29
#include "cmemory.h"
30
#include "cstring.h"
31
#include "ulocimp.h"
32
#include "ustr_imp.h"
33
34
/**
35
 * This function looks for the localeID in the likelySubtags resource.
36
 *
37
 * @param localeID The tag to find.
38
 * @param buffer A buffer to hold the matching entry
39
 * @param bufferLength The length of the output buffer
40
 * @return A pointer to "buffer" if found, or a null pointer if not.
41
 */
42
static const char*  U_CALLCONV
43
findLikelySubtags(const char* localeID,
44
                  char* buffer,
45
                  int32_t bufferLength,
46
12
                  UErrorCode* err) {
47
12
    const char* result = NULL;
48
12
49
12
    if (!U_FAILURE(*err)) {
50
12
        int32_t resLen = 0;
51
12
        const UChar* s = NULL;
52
12
        UErrorCode tmpErr = U_ZERO_ERROR;
53
12
        UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
54
12
        if (U_SUCCESS(tmpErr)) {
55
12
            s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
56
12
57
12
            if (U_FAILURE(tmpErr)) {
58
6
                /*
59
6
                 * If a resource is missing, it's not really an error, it's
60
6
                 * just that we don't have any data for that particular locale ID.
61
6
                 */
62
6
                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
63
0
                    *err = tmpErr;
64
0
                }
65
6
            }
66
6
            else if (resLen >= bufferLength) {
67
0
                /* The buffer should never overflow. */
68
0
                *err = U_INTERNAL_PROGRAM_ERROR;
69
0
            }
70
6
            else {
71
6
                u_UCharsToChars(s, buffer, resLen + 1);
72
6
                result = buffer;
73
6
            }
74
12
75
12
            ures_close(subtags);
76
12
        } else {
77
0
            *err = tmpErr;
78
0
        }
79
12
    }
80
12
81
12
    return result;
82
12
}
83
84
/**
85
 * Append a tag to a buffer, adding the separator if necessary.  The buffer
86
 * must be large enough to contain the resulting tag plus any separator
87
 * necessary. The tag must not be a zero-length string.
88
 *
89
 * @param tag The tag to add.
90
 * @param tagLength The length of the tag.
91
 * @param buffer The output buffer.
92
 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
93
 **/
94
static void U_CALLCONV
95
appendTag(
96
    const char* tag,
97
    int32_t tagLength,
98
    char* buffer,
99
36
    int32_t* bufferLength) {
100
36
101
36
    if (*bufferLength > 0) {
102
18
        buffer[*bufferLength] = '_';
103
18
        ++(*bufferLength);
104
18
    }
105
36
106
36
    uprv_memmove(
107
36
        &buffer[*bufferLength],
108
36
        tag,
109
36
        tagLength);
110
36
111
36
    *bufferLength += tagLength;
112
36
}
113
114
/**
115
 * These are the canonical strings for unknown languages, scripts and regions.
116
 **/
117
static const char* const unknownLanguage = "und";
118
static const char* const unknownScript = "Zzzz";
119
static const char* const unknownRegion = "ZZ";
120
121
/**
122
 * Create a tag string from the supplied parameters.  The lang, script and region
123
 * parameters may be NULL pointers. If they are, their corresponding length parameters
124
 * must be less than or equal to 0.
125
 *
126
 * If any of the language, script or region parameters are empty, and the alternateTags
127
 * parameter is not NULL, it will be parsed for potential language, script and region tags
128
 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
129
 * it contains no language tag, the default tag for the unknown language is used.
130
 *
131
 * If the length of the new string exceeds the capacity of the output buffer, 
132
 * the function copies as many bytes to the output buffer as it can, and returns
133
 * the error U_BUFFER_OVERFLOW_ERROR.
134
 *
135
 * If an illegal argument is provided, the function returns the error
136
 * U_ILLEGAL_ARGUMENT_ERROR.
137
 *
138
 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
139
 * the tag string fits in the output buffer, but the null terminator doesn't.
140
 *
141
 * @param lang The language tag to use.
142
 * @param langLength The length of the language tag.
143
 * @param script The script tag to use.
144
 * @param scriptLength The length of the script tag.
145
 * @param region The region tag to use.
146
 * @param regionLength The length of the region tag.
147
 * @param trailing Any trailing data to append to the new tag.
148
 * @param trailingLength The length of the trailing data.
149
 * @param alternateTags A string containing any alternate tags.
150
 * @param tag The output buffer.
151
 * @param tagCapacity The capacity of the output buffer.
152
 * @param err A pointer to a UErrorCode for error reporting.
153
 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
154
 **/
155
static int32_t U_CALLCONV
156
createTagStringWithAlternates(
157
    const char* lang,
158
    int32_t langLength,
159
    const char* script,
160
    int32_t scriptLength,
161
    const char* region,
162
    int32_t regionLength,
163
    const char* trailing,
164
    int32_t trailingLength,
165
    const char* alternateTags,
166
    char* tag,
167
    int32_t tagCapacity,
168
18
    UErrorCode* err) {
169
18
170
18
    if (U_FAILURE(*err)) {
171
0
        goto error;
172
0
    }
173
18
    else if (tag == NULL ||
174
18
             tagCapacity <= 0 ||
175
18
             langLength >= ULOC_LANG_CAPACITY ||
176
18
             scriptLength >= ULOC_SCRIPT_CAPACITY ||
177
18
             regionLength >= ULOC_COUNTRY_CAPACITY) {
178
0
        goto error;
179
0
    }
180
18
    else {
181
18
        /**
182
18
         * ULOC_FULLNAME_CAPACITY will provide enough capacity
183
18
         * that we can build a string that contains the language,
184
18
         * script and region code without worrying about overrunning
185
18
         * the user-supplied buffer.
186
18
         **/
187
18
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
188
18
        int32_t tagLength = 0;
189
18
        int32_t capacityRemaining = tagCapacity;
190
18
        UBool regionAppended = FALSE;
191
18
192
18
        if (langLength > 0) {
193
12
            appendTag(
194
12
                lang,
195
12
                langLength,
196
12
                tagBuffer,
197
12
                &tagLength);
198
12
        }
199
6
        else if (alternateTags == NULL) {
200
0
            /*
201
0
             * Append the value for an unknown language, if
202
0
             * we found no language.
203
0
             */
204
0
            appendTag(
205
0
                unknownLanguage,
206
0
                (int32_t)uprv_strlen(unknownLanguage),
207
0
                tagBuffer,
208
0
                &tagLength);
209
0
        }
210
6
        else {
211
6
            /*
212
6
             * Parse the alternateTags string for the language.
213
6
             */
214
6
            char alternateLang[ULOC_LANG_CAPACITY];
215
6
            int32_t alternateLangLength = sizeof(alternateLang);
216
6
217
6
            alternateLangLength =
218
6
                uloc_getLanguage(
219
6
                    alternateTags,
220
6
                    alternateLang,
221
6
                    alternateLangLength,
222
6
                    err);
223
6
            if(U_FAILURE(*err) ||
224
6
                alternateLangLength >= ULOC_LANG_CAPACITY) {
225
0
                goto error;
226
0
            }
227
6
            else if (alternateLangLength == 0) {
228
0
                /*
229
0
                 * Append the value for an unknown language, if
230
0
                 * we found no language.
231
0
                 */
232
0
                appendTag(
233
0
                    unknownLanguage,
234
0
                    (int32_t)uprv_strlen(unknownLanguage),
235
0
                    tagBuffer,
236
0
                    &tagLength);
237
0
            }
238
6
            else {
239
6
                appendTag(
240
6
                    alternateLang,
241
6
                    alternateLangLength,
242
6
                    tagBuffer,
243
6
                    &tagLength);
244
6
            }
245
6
        }
246
18
247
18
        if (scriptLength > 0) {
248
6
            appendTag(
249
6
                script,
250
6
                scriptLength,
251
6
                tagBuffer,
252
6
                &tagLength);
253
6
        }
254
12
        else if (alternateTags != NULL) {
255
3
            /*
256
3
             * Parse the alternateTags string for the script.
257
3
             */
258
3
            char alternateScript[ULOC_SCRIPT_CAPACITY];
259
3
260
3
            const int32_t alternateScriptLength =
261
3
                uloc_getScript(
262
3
                    alternateTags,
263
3
                    alternateScript,
264
3
                    sizeof(alternateScript),
265
3
                    err);
266
3
267
3
            if (U_FAILURE(*err) ||
268
3
                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
269
0
                goto error;
270
0
            }
271
3
            else if (alternateScriptLength > 0) {
272
3
                appendTag(
273
3
                    alternateScript,
274
3
                    alternateScriptLength,
275
3
                    tagBuffer,
276
3
                    &tagLength);
277
3
            }
278
3
        }
279
18
280
18
        if (regionLength > 0) {
281
6
            appendTag(
282
6
                region,
283
6
                regionLength,
284
6
                tagBuffer,
285
6
                &tagLength);
286
6
287
6
            regionAppended = TRUE;
288
6
        }
289
12
        else if (alternateTags != NULL) {
290
3
            /*
291
3
             * Parse the alternateTags string for the region.
292
3
             */
293
3
            char alternateRegion[ULOC_COUNTRY_CAPACITY];
294
3
295
3
            const int32_t alternateRegionLength =
296
3
                uloc_getCountry(
297
3
                    alternateTags,
298
3
                    alternateRegion,
299
3
                    sizeof(alternateRegion),
300
3
                    err);
301
3
            if (U_FAILURE(*err) ||
302
3
                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
303
0
                goto error;
304
0
            }
305
3
            else if (alternateRegionLength > 0) {
306
3
                appendTag(
307
3
                    alternateRegion,
308
3
                    alternateRegionLength,
309
3
                    tagBuffer,
310
3
                    &tagLength);
311
3
312
3
                regionAppended = TRUE;
313
3
            }
314
3
        }
315
18
316
18
        {
317
18
            const int32_t toCopy =
318
18
                tagLength >= tagCapacity ? tagCapacity : tagLength;
319
18
320
18
            /**
321
18
             * Copy the partial tag from our internal buffer to the supplied
322
18
             * target.
323
18
             **/
324
18
            uprv_memcpy(
325
18
                tag,
326
18
                tagBuffer,
327
18
                toCopy);
328
18
329
18
            capacityRemaining -= toCopy;
330
18
        }
331
18
332
18
        if (trailingLength > 0) {
333
0
            if (*trailing != '@' && capacityRemaining > 0) {
334
0
                tag[tagLength++] = '_';
335
0
                --capacityRemaining;
336
0
                if (capacityRemaining > 0 && !regionAppended) {
337
0
                    /* extra separator is required */
338
0
                    tag[tagLength++] = '_';
339
0
                    --capacityRemaining;
340
0
                }
341
0
            }
342
0
343
0
            if (capacityRemaining > 0) {
344
0
                /*
345
0
                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
346
0
                 * don't know if the user-supplied buffers overlap.
347
0
                 */
348
0
                const int32_t toCopy =
349
0
                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
350
0
351
0
                uprv_memmove(
352
0
                    &tag[tagLength],
353
0
                    trailing,
354
0
                    toCopy);
355
0
            }
356
0
        }
357
18
358
18
        tagLength += trailingLength;
359
18
360
18
        return u_terminateChars(
361
18
                    tag,
362
18
                    tagCapacity,
363
18
                    tagLength,
364
18
                    err);
365
0
    }
366
0
367
0
error:
368
0
369
0
    /**
370
0
     * An overflow indicates the locale ID passed in
371
0
     * is ill-formed.  If we got here, and there was
372
0
     * no previous error, it's an implicit overflow.
373
0
     **/
374
0
    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
375
0
        U_SUCCESS(*err)) {
376
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
377
0
    }
378
0
379
0
    return -1;
380
18
}
381
382
/**
383
 * Create a tag string from the supplied parameters.  The lang, script and region
384
 * parameters may be NULL pointers. If they are, their corresponding length parameters
385
 * must be less than or equal to 0.  If the lang parameter is an empty string, the
386
 * default value for an unknown language is written to the output buffer.
387
 *
388
 * If the length of the new string exceeds the capacity of the output buffer, 
389
 * the function copies as many bytes to the output buffer as it can, and returns
390
 * the error U_BUFFER_OVERFLOW_ERROR.
391
 *
392
 * If an illegal argument is provided, the function returns the error
393
 * U_ILLEGAL_ARGUMENT_ERROR.
394
 *
395
 * @param lang The language tag to use.
396
 * @param langLength The length of the language tag.
397
 * @param script The script tag to use.
398
 * @param scriptLength The length of the script tag.
399
 * @param region The region tag to use.
400
 * @param regionLength The length of the region tag.
401
 * @param trailing Any trailing data to append to the new tag.
402
 * @param trailingLength The length of the trailing data.
403
 * @param tag The output buffer.
404
 * @param tagCapacity The capacity of the output buffer.
405
 * @param err A pointer to a UErrorCode for error reporting.
406
 * @return The length of the tag string, which may be greater than tagCapacity.
407
 **/
408
static int32_t U_CALLCONV
409
createTagString(
410
    const char* lang,
411
    int32_t langLength,
412
    const char* script,
413
    int32_t scriptLength,
414
    const char* region,
415
    int32_t regionLength,
416
    const char* trailing,
417
    int32_t trailingLength,
418
    char* tag,
419
    int32_t tagCapacity,
420
    UErrorCode* err)
421
12
{
422
12
    return createTagStringWithAlternates(
423
12
                lang,
424
12
                langLength,
425
12
                script,
426
12
                scriptLength,
427
12
                region,
428
12
                regionLength,
429
12
                trailing,
430
12
                trailingLength,
431
12
                NULL,
432
12
                tag,
433
12
                tagCapacity,
434
12
                err);
435
12
}
436
437
/**
438
 * Parse the language, script, and region subtags from a tag string, and copy the
439
 * results into the corresponding output parameters. The buffers are null-terminated,
440
 * unless overflow occurs.
441
 *
442
 * The langLength, scriptLength, and regionLength parameters are input/output
443
 * parameters, and must contain the capacity of their corresponding buffers on
444
 * input.  On output, they will contain the actual length of the buffers, not
445
 * including the null terminator.
446
 *
447
 * If the length of any of the output subtags exceeds the capacity of the corresponding
448
 * buffer, the function copies as many bytes to the output buffer as it can, and returns
449
 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
450
 * occurs.
451
 *
452
 * If an illegal argument is provided, the function returns the error
453
 * U_ILLEGAL_ARGUMENT_ERROR.
454
 *
455
 * @param localeID The locale ID to parse.
456
 * @param lang The language tag buffer.
457
 * @param langLength The length of the language tag.
458
 * @param script The script tag buffer.
459
 * @param scriptLength The length of the script tag.
460
 * @param region The region tag buffer.
461
 * @param regionLength The length of the region tag.
462
 * @param err A pointer to a UErrorCode for error reporting.
463
 * @return The number of chars of the localeID parameter consumed.
464
 **/
465
static int32_t U_CALLCONV
466
parseTagString(
467
    const char* localeID,
468
    char* lang,
469
    int32_t* langLength,
470
    char* script,
471
    int32_t* scriptLength,
472
    char* region,
473
    int32_t* regionLength,
474
    UErrorCode* err)
475
6
{
476
6
    const char* position = localeID;
477
6
    int32_t subtagLength = 0;
478
6
479
6
    if(U_FAILURE(*err) ||
480
6
       localeID == NULL ||
481
6
       lang == NULL ||
482
6
       langLength == NULL ||
483
6
       script == NULL ||
484
6
       scriptLength == NULL ||
485
6
       region == NULL ||
486
6
       regionLength == NULL) {
487
0
        goto error;
488
0
    }
489
6
490
6
    subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
491
6
    u_terminateChars(lang, *langLength, subtagLength, err);
492
6
493
6
    /*
494
6
     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
495
6
     * to be an error, because it indicates the user-supplied tag is
496
6
     * not well-formed.
497
6
     */
498
6
    if(U_FAILURE(*err)) {
499
0
        goto error;
500
0
    }
501
6
502
6
    *langLength = subtagLength;
503
6
504
6
    /*
505
6
     * If no language was present, use the value of unknownLanguage
506
6
     * instead.  Otherwise, move past any separator.
507
6
     */
508
6
    if (*langLength == 0) {
509
0
        uprv_strcpy(
510
0
            lang,
511
0
            unknownLanguage);
512
0
        *langLength = (int32_t)uprv_strlen(lang);
513
0
    }
514
6
    if (_isIDSeparator(*position)) {
515
6
        ++position;
516
6
    }
517
6
518
6
    subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
519
6
    u_terminateChars(script, *scriptLength, subtagLength, err);
520
6
521
6
    if(U_FAILURE(*err)) {
522
0
        goto error;
523
0
    }
524
6
525
6
    *scriptLength = subtagLength;
526
6
527
6
    if (*scriptLength > 0) {
528
3
        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
529
0
            /**
530
0
             * If the script part is the "unknown" script, then don't return it.
531
0
             **/
532
0
            *scriptLength = 0;
533
0
        }
534
3
535
3
        /*
536
3
         * Move past any separator.
537
3
         */
538
3
        if (_isIDSeparator(*position)) {
539
0
            ++position;
540
0
        }    
541
3
    }
542
6
543
6
    subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
544
6
    u_terminateChars(region, *regionLength, subtagLength, err);
545
6
546
6
    if(U_FAILURE(*err)) {
547
0
        goto error;
548
0
    }
549
6
550
6
    *regionLength = subtagLength;
551
6
552
6
    if (*regionLength > 0) {
553
3
        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
554
0
            /**
555
0
             * If the region part is the "unknown" region, then don't return it.
556
0
             **/
557
0
            *regionLength = 0;
558
0
        }
559
3
    } else if (*position != 0 && *position != '@') {
560
0
        /* back up over consumed trailing separator */
561
0
        --position;
562
0
    }
563
6
564
6
exit:
565
6
566
6
    return (int32_t)(position - localeID);
567
0
568
0
error:
569
0
570
0
    /**
571
0
     * If we get here, we have no explicit error, it's the result of an
572
0
     * illegal argument.
573
0
     **/
574
0
    if (!U_FAILURE(*err)) {
575
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
576
0
    }
577
0
578
0
    goto exit;
579
6
}
580
581
static int32_t U_CALLCONV
582
createLikelySubtagsString(
583
    const char* lang,
584
    int32_t langLength,
585
    const char* script,
586
    int32_t scriptLength,
587
    const char* region,
588
    int32_t regionLength,
589
    const char* variants,
590
    int32_t variantsLength,
591
    char* tag,
592
    int32_t tagCapacity,
593
    UErrorCode* err)
594
6
{
595
6
    /**
596
6
     * ULOC_FULLNAME_CAPACITY will provide enough capacity
597
6
     * that we can build a string that contains the language,
598
6
     * script and region code without worrying about overrunning
599
6
     * the user-supplied buffer.
600
6
     **/
601
6
    char tagBuffer[ULOC_FULLNAME_CAPACITY];
602
6
    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
603
6
604
6
    if(U_FAILURE(*err)) {
605
0
        goto error;
606
0
    }
607
6
608
6
    /**
609
6
     * Try the language with the script and region first.
610
6
     **/
611
6
    if (scriptLength > 0 && regionLength > 0) {
612
0
613
0
        const char* likelySubtags = NULL;
614
0
615
0
        createTagString(
616
0
            lang,
617
0
            langLength,
618
0
            script,
619
0
            scriptLength,
620
0
            region,
621
0
            regionLength,
622
0
            NULL,
623
0
            0,
624
0
            tagBuffer,
625
0
            sizeof(tagBuffer),
626
0
            err);
627
0
        if(U_FAILURE(*err)) {
628
0
            goto error;
629
0
        }
630
0
631
0
        likelySubtags =
632
0
            findLikelySubtags(
633
0
                tagBuffer,
634
0
                likelySubtagsBuffer,
635
0
                sizeof(likelySubtagsBuffer),
636
0
                err);
637
0
        if(U_FAILURE(*err)) {
638
0
            goto error;
639
0
        }
640
0
641
0
        if (likelySubtags != NULL) {
642
0
            /* Always use the language tag from the
643
0
               maximal string, since it may be more
644
0
               specific than the one provided. */
645
0
            return createTagStringWithAlternates(
646
0
                        NULL,
647
0
                        0,
648
0
                        NULL,
649
0
                        0,
650
0
                        NULL,
651
0
                        0,
652
0
                        variants,
653
0
                        variantsLength,
654
0
                        likelySubtags,
655
0
                        tag,
656
0
                        tagCapacity,
657
0
                        err);
658
0
        }
659
6
    }
660
6
661
6
    /**
662
6
     * Try the language with just the script.
663
6
     **/
664
6
    if (scriptLength > 0) {
665
3
666
3
        const char* likelySubtags = NULL;
667
3
668
3
        createTagString(
669
3
            lang,
670
3
            langLength,
671
3
            script,
672
3
            scriptLength,
673
3
            NULL,
674
3
            0,
675
3
            NULL,
676
3
            0,
677
3
            tagBuffer,
678
3
            sizeof(tagBuffer),
679
3
            err);
680
3
        if(U_FAILURE(*err)) {
681
0
            goto error;
682
0
        }
683
3
684
3
        likelySubtags =
685
3
            findLikelySubtags(
686
3
                tagBuffer,
687
3
                likelySubtagsBuffer,
688
3
                sizeof(likelySubtagsBuffer),
689
3
                err);
690
3
        if(U_FAILURE(*err)) {
691
0
            goto error;
692
0
        }
693
3
694
3
        if (likelySubtags != NULL) {
695
0
            /* Always use the language tag from the
696
0
               maximal string, since it may be more
697
0
               specific than the one provided. */
698
0
            return createTagStringWithAlternates(
699
0
                        NULL,
700
0
                        0,
701
0
                        NULL,
702
0
                        0,
703
0
                        region,
704
0
                        regionLength,
705
0
                        variants,
706
0
                        variantsLength,
707
0
                        likelySubtags,
708
0
                        tag,
709
0
                        tagCapacity,
710
0
                        err);
711
0
        }
712
6
    }
713
6
714
6
    /**
715
6
     * Try the language with just the region.
716
6
     **/
717
6
    if (regionLength > 0) {
718
3
719
3
        const char* likelySubtags = NULL;
720
3
721
3
        createTagString(
722
3
            lang,
723
3
            langLength,
724
3
            NULL,
725
3
            0,
726
3
            region,
727
3
            regionLength,
728
3
            NULL,
729
3
            0,
730
3
            tagBuffer,
731
3
            sizeof(tagBuffer),
732
3
            err);
733
3
        if(U_FAILURE(*err)) {
734
0
            goto error;
735
0
        }
736
3
737
3
        likelySubtags =
738
3
            findLikelySubtags(
739
3
                tagBuffer,
740
3
                likelySubtagsBuffer,
741
3
                sizeof(likelySubtagsBuffer),
742
3
                err);
743
3
        if(U_FAILURE(*err)) {
744
0
            goto error;
745
0
        }
746
3
747
3
        if (likelySubtags != NULL) {
748
0
            /* Always use the language tag from the
749
0
               maximal string, since it may be more
750
0
               specific than the one provided. */
751
0
            return createTagStringWithAlternates(
752
0
                        NULL,
753
0
                        0,
754
0
                        script,
755
0
                        scriptLength,
756
0
                        NULL,
757
0
                        0,
758
0
                        variants,
759
0
                        variantsLength,
760
0
                        likelySubtags,
761
0
                        tag,
762
0
                        tagCapacity,
763
0
                        err);
764
0
        }
765
6
    }
766
6
767
6
    /**
768
6
     * Finally, try just the language.
769
6
     **/
770
6
    {
771
6
        const char* likelySubtags = NULL;
772
6
773
6
        createTagString(
774
6
            lang,
775
6
            langLength,
776
6
            NULL,
777
6
            0,
778
6
            NULL,
779
6
            0,
780
6
            NULL,
781
6
            0,
782
6
            tagBuffer,
783
6
            sizeof(tagBuffer),
784
6
            err);
785
6
        if(U_FAILURE(*err)) {
786
0
            goto error;
787
0
        }
788
6
789
6
        likelySubtags =
790
6
            findLikelySubtags(
791
6
                tagBuffer,
792
6
                likelySubtagsBuffer,
793
6
                sizeof(likelySubtagsBuffer),
794
6
                err);
795
6
        if(U_FAILURE(*err)) {
796
0
            goto error;
797
0
        }
798
6
799
6
        if (likelySubtags != NULL) {
800
6
            /* Always use the language tag from the
801
6
               maximal string, since it may be more
802
6
               specific than the one provided. */
803
6
            return createTagStringWithAlternates(
804
6
                        NULL,
805
6
                        0,
806
6
                        script,
807
6
                        scriptLength,
808
6
                        region,
809
6
                        regionLength,
810
6
                        variants,
811
6
                        variantsLength,
812
6
                        likelySubtags,
813
6
                        tag,
814
6
                        tagCapacity,
815
6
                        err);
816
6
        }
817
0
    }
818
0
819
0
    return u_terminateChars(
820
0
                tag,
821
0
                tagCapacity,
822
0
                0,
823
0
                err);
824
0
825
0
error:
826
0
827
0
    if (!U_FAILURE(*err)) {
828
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
829
0
    }
830
0
831
0
    return -1;
832
0
}
833
834
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
835
6
    {   int32_t count = 0; \
836
6
        int32_t i; \
837
6
        for (i = 0; i < trailingLength; i++) { \
838
0
            if (trailing[i] == '-' || trailing[i] == '_') { \
839
0
                count = 0; \
840
0
                if (count > 8) { \
841
0
                    goto error; \
842
0
                } \
843
0
            } else if (trailing[i] == '@') { \
844
0
                break; \
845
0
            } else if (count > 8) { \
846
0
                goto error; \
847
0
            } else { \
848
0
                count++; \
849
0
            } \
850
0
        } \
851
6
    }
852
853
static int32_t
854
_uloc_addLikelySubtags(const char*    localeID,
855
         char* maximizedLocaleID,
856
         int32_t maximizedLocaleIDCapacity,
857
         UErrorCode* err)
858
6
{
859
6
    char lang[ULOC_LANG_CAPACITY];
860
6
    int32_t langLength = sizeof(lang);
861
6
    char script[ULOC_SCRIPT_CAPACITY];
862
6
    int32_t scriptLength = sizeof(script);
863
6
    char region[ULOC_COUNTRY_CAPACITY];
864
6
    int32_t regionLength = sizeof(region);
865
6
    const char* trailing = "";
866
6
    int32_t trailingLength = 0;
867
6
    int32_t trailingIndex = 0;
868
6
    int32_t resultLength = 0;
869
6
870
6
    if(U_FAILURE(*err)) {
871
0
        goto error;
872
0
    }
873
6
    else if (localeID == NULL ||
874
6
             maximizedLocaleID == NULL ||
875
6
             maximizedLocaleIDCapacity <= 0) {
876
0
        goto error;
877
0
    }
878
6
879
6
    trailingIndex = parseTagString(
880
6
        localeID,
881
6
        lang,
882
6
        &langLength,
883
6
        script,
884
6
        &scriptLength,
885
6
        region,
886
6
        &regionLength,
887
6
        err);
888
6
    if(U_FAILURE(*err)) {
889
0
        /* Overflow indicates an illegal argument error */
890
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
891
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
892
0
        }
893
0
894
0
        goto error;
895
0
    }
896
6
897
6
    /* Find the length of the trailing portion. */
898
6
    while (_isIDSeparator(localeID[trailingIndex])) {
899
0
        trailingIndex++;
900
0
    }
901
6
    trailing = &localeID[trailingIndex];
902
6
    trailingLength = (int32_t)uprv_strlen(trailing);
903
6
904
6
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
905
6
906
6
    resultLength =
907
6
        createLikelySubtagsString(
908
6
            lang,
909
6
            langLength,
910
6
            script,
911
6
            scriptLength,
912
6
            region,
913
6
            regionLength,
914
6
            trailing,
915
6
            trailingLength,
916
6
            maximizedLocaleID,
917
6
            maximizedLocaleIDCapacity,
918
6
            err);
919
6
920
6
    if (resultLength == 0) {
921
0
        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
922
0
923
0
        /*
924
0
         * If we get here, we need to return localeID.
925
0
         */
926
0
        uprv_memcpy(
927
0
            maximizedLocaleID,
928
0
            localeID,
929
0
            localIDLength <= maximizedLocaleIDCapacity ? 
930
0
                localIDLength : maximizedLocaleIDCapacity);
931
0
932
0
        resultLength =
933
0
            u_terminateChars(
934
0
                maximizedLocaleID,
935
0
                maximizedLocaleIDCapacity,
936
0
                localIDLength,
937
0
                err);
938
0
    }
939
6
940
6
    return resultLength;
941
0
942
0
error:
943
0
944
0
    if (!U_FAILURE(*err)) {
945
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
946
0
    }
947
0
948
0
    return -1;
949
6
}
950
951
static int32_t
952
_uloc_minimizeSubtags(const char*    localeID,
953
         char* minimizedLocaleID,
954
         int32_t minimizedLocaleIDCapacity,
955
         UErrorCode* err)
956
0
{
957
0
    /**
958
0
     * ULOC_FULLNAME_CAPACITY will provide enough capacity
959
0
     * that we can build a string that contains the language,
960
0
     * script and region code without worrying about overrunning
961
0
     * the user-supplied buffer.
962
0
     **/
963
0
    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
964
0
    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
965
0
966
0
    char lang[ULOC_LANG_CAPACITY];
967
0
    int32_t langLength = sizeof(lang);
968
0
    char script[ULOC_SCRIPT_CAPACITY];
969
0
    int32_t scriptLength = sizeof(script);
970
0
    char region[ULOC_COUNTRY_CAPACITY];
971
0
    int32_t regionLength = sizeof(region);
972
0
    const char* trailing = "";
973
0
    int32_t trailingLength = 0;
974
0
    int32_t trailingIndex = 0;
975
0
976
0
    if(U_FAILURE(*err)) {
977
0
        goto error;
978
0
    }
979
0
    else if (localeID == NULL ||
980
0
             minimizedLocaleID == NULL ||
981
0
             minimizedLocaleIDCapacity <= 0) {
982
0
        goto error;
983
0
    }
984
0
985
0
    trailingIndex =
986
0
        parseTagString(
987
0
            localeID,
988
0
            lang,
989
0
            &langLength,
990
0
            script,
991
0
            &scriptLength,
992
0
            region,
993
0
            &regionLength,
994
0
            err);
995
0
    if(U_FAILURE(*err)) {
996
0
997
0
        /* Overflow indicates an illegal argument error */
998
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
999
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
1000
0
        }
1001
0
1002
0
        goto error;
1003
0
    }
1004
0
1005
0
    /* Find the spot where the variants or the keywords begin, if any. */
1006
0
    while (_isIDSeparator(localeID[trailingIndex])) {
1007
0
        trailingIndex++;
1008
0
    }
1009
0
    trailing = &localeID[trailingIndex];
1010
0
    trailingLength = (int32_t)uprv_strlen(trailing);
1011
0
1012
0
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1013
0
1014
0
    createTagString(
1015
0
        lang,
1016
0
        langLength,
1017
0
        script,
1018
0
        scriptLength,
1019
0
        region,
1020
0
        regionLength,
1021
0
        NULL,
1022
0
        0,
1023
0
        maximizedTagBuffer,
1024
0
        maximizedTagBufferLength,
1025
0
        err);
1026
0
    if(U_FAILURE(*err)) {
1027
0
        goto error;
1028
0
    }
1029
0
1030
0
    /**
1031
0
     * First, we need to first get the maximization
1032
0
     * from AddLikelySubtags.
1033
0
     **/
1034
0
    maximizedTagBufferLength =
1035
0
        uloc_addLikelySubtags(
1036
0
            maximizedTagBuffer,
1037
0
            maximizedTagBuffer,
1038
0
            maximizedTagBufferLength,
1039
0
            err);
1040
0
1041
0
    if(U_FAILURE(*err)) {
1042
0
        goto error;
1043
0
    }
1044
0
1045
0
    /**
1046
0
     * Start first with just the language.
1047
0
     **/
1048
0
    {
1049
0
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1050
0
1051
0
        const int32_t tagBufferLength =
1052
0
            createLikelySubtagsString(
1053
0
                lang,
1054
0
                langLength,
1055
0
                NULL,
1056
0
                0,
1057
0
                NULL,
1058
0
                0,
1059
0
                NULL,
1060
0
                0,
1061
0
                tagBuffer,
1062
0
                sizeof(tagBuffer),
1063
0
                err);
1064
0
1065
0
        if(U_FAILURE(*err)) {
1066
0
            goto error;
1067
0
        }
1068
0
        else if (uprv_strnicmp(
1069
0
                    maximizedTagBuffer,
1070
0
                    tagBuffer,
1071
0
                    tagBufferLength) == 0) {
1072
0
1073
0
            return createTagString(
1074
0
                        lang,
1075
0
                        langLength,
1076
0
                        NULL,
1077
0
                        0,
1078
0
                        NULL,
1079
0
                        0,
1080
0
                        trailing,
1081
0
                        trailingLength,
1082
0
                        minimizedLocaleID,
1083
0
                        minimizedLocaleIDCapacity,
1084
0
                        err);
1085
0
        }
1086
0
    }
1087
0
1088
0
    /**
1089
0
     * Next, try the language and region.
1090
0
     **/
1091
0
    if (regionLength > 0) {
1092
0
1093
0
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1094
0
1095
0
        const int32_t tagBufferLength =
1096
0
            createLikelySubtagsString(
1097
0
                lang,
1098
0
                langLength,
1099
0
                NULL,
1100
0
                0,
1101
0
                region,
1102
0
                regionLength,
1103
0
                NULL,
1104
0
                0,
1105
0
                tagBuffer,
1106
0
                sizeof(tagBuffer),
1107
0
                err);
1108
0
1109
0
        if(U_FAILURE(*err)) {
1110
0
            goto error;
1111
0
        }
1112
0
        else if (uprv_strnicmp(
1113
0
                    maximizedTagBuffer,
1114
0
                    tagBuffer,
1115
0
                    tagBufferLength) == 0) {
1116
0
1117
0
            return createTagString(
1118
0
                        lang,
1119
0
                        langLength,
1120
0
                        NULL,
1121
0
                        0,
1122
0
                        region,
1123
0
                        regionLength,
1124
0
                        trailing,
1125
0
                        trailingLength,
1126
0
                        minimizedLocaleID,
1127
0
                        minimizedLocaleIDCapacity,
1128
0
                        err);
1129
0
        }
1130
0
    }
1131
0
1132
0
    /**
1133
0
     * Finally, try the language and script.  This is our last chance,
1134
0
     * since trying with all three subtags would only yield the
1135
0
     * maximal version that we already have.
1136
0
     **/
1137
0
    if (scriptLength > 0 && regionLength > 0) {
1138
0
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
1139
0
1140
0
        const int32_t tagBufferLength =
1141
0
            createLikelySubtagsString(
1142
0
                lang,
1143
0
                langLength,
1144
0
                script,
1145
0
                scriptLength,
1146
0
                NULL,
1147
0
                0,
1148
0
                NULL,
1149
0
                0,
1150
0
                tagBuffer,
1151
0
                sizeof(tagBuffer),
1152
0
                err);
1153
0
1154
0
        if(U_FAILURE(*err)) {
1155
0
            goto error;
1156
0
        }
1157
0
        else if (uprv_strnicmp(
1158
0
                    maximizedTagBuffer,
1159
0
                    tagBuffer,
1160
0
                    tagBufferLength) == 0) {
1161
0
1162
0
            return createTagString(
1163
0
                        lang,
1164
0
                        langLength,
1165
0
                        script,
1166
0
                        scriptLength,
1167
0
                        NULL,
1168
0
                        0,
1169
0
                        trailing,
1170
0
                        trailingLength,
1171
0
                        minimizedLocaleID,
1172
0
                        minimizedLocaleIDCapacity,
1173
0
                        err);
1174
0
        }
1175
0
    }
1176
0
1177
0
    {
1178
0
        /**
1179
0
         * If we got here, return the locale ID parameter.
1180
0
         **/
1181
0
        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1182
0
1183
0
        uprv_memcpy(
1184
0
            minimizedLocaleID,
1185
0
            localeID,
1186
0
            localeIDLength <= minimizedLocaleIDCapacity ? 
1187
0
                localeIDLength : minimizedLocaleIDCapacity);
1188
0
1189
0
        return u_terminateChars(
1190
0
                    minimizedLocaleID,
1191
0
                    minimizedLocaleIDCapacity,
1192
0
                    localeIDLength,
1193
0
                    err);
1194
0
    }
1195
0
1196
0
error:
1197
0
1198
0
    if (!U_FAILURE(*err)) {
1199
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1200
0
    }
1201
0
1202
0
    return -1;
1203
0
1204
0
1205
0
}
1206
1207
static UBool
1208
do_canonicalize(const char*    localeID,
1209
         char* buffer,
1210
         int32_t bufferCapacity,
1211
         UErrorCode* err)
1212
6
{
1213
6
    uloc_canonicalize(
1214
6
        localeID,
1215
6
        buffer,
1216
6
        bufferCapacity,
1217
6
        err);
1218
6
1219
6
    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1220
6
        *err == U_BUFFER_OVERFLOW_ERROR) {
1221
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1222
0
1223
0
        return FALSE;
1224
0
    }
1225
6
    else if (U_FAILURE(*err)) {
1226
0
1227
0
        return FALSE;
1228
0
    }
1229
6
    else {
1230
6
        return TRUE;
1231
6
    }
1232
6
}
1233
1234
U_CAPI int32_t U_EXPORT2
1235
uloc_addLikelySubtags(const char*    localeID,
1236
         char* maximizedLocaleID,
1237
         int32_t maximizedLocaleIDCapacity,
1238
         UErrorCode* err)
1239
6
{
1240
6
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241
6
1242
6
    if (!do_canonicalize(
1243
6
        localeID,
1244
6
        localeBuffer,
1245
6
        sizeof(localeBuffer),
1246
6
        err)) {
1247
0
        return -1;
1248
0
    }
1249
6
    else {
1250
6
        return _uloc_addLikelySubtags(
1251
6
                    localeBuffer,
1252
6
                    maximizedLocaleID,
1253
6
                    maximizedLocaleIDCapacity,
1254
6
                    err);
1255
6
    }    
1256
6
}
1257
1258
U_CAPI int32_t U_EXPORT2
1259
uloc_minimizeSubtags(const char*    localeID,
1260
         char* minimizedLocaleID,
1261
         int32_t minimizedLocaleIDCapacity,
1262
         UErrorCode* err)
1263
0
{
1264
0
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1265
0
1266
0
    if (!do_canonicalize(
1267
0
        localeID,
1268
0
        localeBuffer,
1269
0
        sizeof(localeBuffer),
1270
0
        err)) {
1271
0
        return -1;
1272
0
    }
1273
0
    else {
1274
0
        return _uloc_minimizeSubtags(
1275
0
                    localeBuffer,
1276
0
                    minimizedLocaleID,
1277
0
                    minimizedLocaleIDCapacity,
1278
0
                    err);
1279
0
    }    
1280
0
}
1281
1282
// Pairs of (language subtag, + or -) for finding out fast if common languages
1283
// are LTR (minus) or RTL (plus).
1284
static const char LANG_DIR_STRING[] =
1285
        "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1286
1287
// Implemented here because this calls uloc_addLikelySubtags().
1288
U_CAPI UBool U_EXPORT2
1289
0
uloc_isRightToLeft(const char *locale) {
1290
0
    UErrorCode errorCode = U_ZERO_ERROR;
1291
0
    char script[8];
1292
0
    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1293
0
    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294
0
            scriptLength == 0) {
1295
0
        // Fastpath: We know the likely scripts and their writing direction
1296
0
        // for some common languages.
1297
0
        errorCode = U_ZERO_ERROR;
1298
0
        char lang[8];
1299
0
        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1300
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1301
0
                langLength == 0) {
1302
0
            return FALSE;
1303
0
        }
1304
0
        const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1305
0
        if (langPtr != NULL) {
1306
0
            switch (langPtr[langLength]) {
1307
0
            case '-': return FALSE;
1308
0
            case '+': return TRUE;
1309
0
            default: break;  // partial match of a longer code
1310
0
            }
1311
0
        }
1312
0
        // Otherwise, find the likely script.
1313
0
        errorCode = U_ZERO_ERROR;
1314
0
        char likely[ULOC_FULLNAME_CAPACITY];
1315
0
        (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1316
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1317
0
            return FALSE;
1318
0
        }
1319
0
        scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1320
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1321
0
                scriptLength == 0) {
1322
0
            return FALSE;
1323
0
        }
1324
0
    }
1325
0
    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1326
0
    return uscript_isRightToLeft(scriptCode);
1327
0
}
1328
1329
U_NAMESPACE_BEGIN
1330
1331
UBool
1332
0
Locale::isRightToLeft() const {
1333
0
    return uloc_isRightToLeft(getBaseName());
1334
0
}
1335
1336
U_NAMESPACE_END
1337
1338
// The following must at least allow for rg key value (6) plus terminator (1).
1339
0
#define ULOC_RG_BUFLEN 8
1340
1341
U_CAPI int32_t U_EXPORT2
1342
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1343
0
                                     char *region, int32_t regionCapacity, UErrorCode* status) {
1344
0
    if (U_FAILURE(*status)) {
1345
0
        return 0;
1346
0
    }
1347
0
    char rgBuf[ULOC_RG_BUFLEN];
1348
0
    UErrorCode rgStatus = U_ZERO_ERROR;
1349
0
1350
0
    // First check for rg keyword value
1351
0
    int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1352
0
    if (U_FAILURE(rgStatus) || rgLen != 6) {
1353
0
        rgLen = 0;
1354
0
    } else {
1355
0
        // rgBuf guaranteed to be zero terminated here, with text len 6
1356
0
        char *rgPtr = rgBuf;
1357
0
        for (; *rgPtr!= 0; rgPtr++) {
1358
0
            *rgPtr = uprv_toupper(*rgPtr);
1359
0
        }
1360
0
        rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1361
0
    }
1362
0
1363
0
    if (rgLen == 0) {
1364
0
        // No valid rg keyword value, try for unicode_region_subtag
1365
0
        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1366
0
        if (U_FAILURE(*status)) {
1367
0
            rgLen = 0;
1368
0
        } else if (rgLen == 0 && inferRegion) {
1369
0
            // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1370
0
            char locBuf[ULOC_FULLNAME_CAPACITY];
1371
0
            rgStatus = U_ZERO_ERROR;
1372
0
            (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1373
0
            if (U_SUCCESS(rgStatus)) {
1374
0
                rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1375
0
                if (U_FAILURE(*status)) {
1376
0
                    rgLen = 0;
1377
0
                }
1378
0
            }
1379
0
        }
1380
0
    }
1381
0
1382
0
    rgBuf[rgLen] = 0;
1383
0
    uprv_strncpy(region, rgBuf, regionCapacity);
1384
0
    return u_terminateChars(region, regionCapacity, rgLen, status);
1385
0
}
1386