Coverage Report

Created: 2022-11-20 06:20

/src/icu/icu4c/source/common/loclikely.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1997-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  loclikely.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2010feb25
16
*   created by: Markus W. Scherer
17
*
18
*   Code for likely and minimized locale subtags, separated out from other .cpp files
19
*   that then do not depend on resource bundle code and likely-subtags data.
20
*/
21
22
#include "unicode/bytestream.h"
23
#include "unicode/utypes.h"
24
#include "unicode/locid.h"
25
#include "unicode/putil.h"
26
#include "unicode/uchar.h"
27
#include "unicode/uloc.h"
28
#include "unicode/ures.h"
29
#include "unicode/uscript.h"
30
#include "bytesinkutil.h"
31
#include "charstr.h"
32
#include "cmemory.h"
33
#include "cstring.h"
34
#include "ulocimp.h"
35
#include "ustr_imp.h"
36
37
/**
38
 * These are the canonical strings for unknown languages, scripts and regions.
39
 **/
40
static const char* const unknownLanguage = "und";
41
static const char* const unknownScript = "Zzzz";
42
static const char* const unknownRegion = "ZZ";
43
44
/**
45
 * This function looks for the localeID in the likelySubtags resource.
46
 *
47
 * @param localeID The tag to find.
48
 * @param buffer A buffer to hold the matching entry
49
 * @param bufferLength The length of the output buffer
50
 * @return A pointer to "buffer" if found, or a null pointer if not.
51
 */
52
static const char*  U_CALLCONV
53
findLikelySubtags(const char* localeID,
54
                  char* buffer,
55
                  int32_t bufferLength,
56
3.01k
                  UErrorCode* err) {
57
3.01k
    const char* result = NULL;
58
59
3.01k
    if (!U_FAILURE(*err)) {
60
3.01k
        int32_t resLen = 0;
61
3.01k
        const UChar* s = NULL;
62
3.01k
        UErrorCode tmpErr = U_ZERO_ERROR;
63
3.01k
        icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64
3.01k
        if (U_SUCCESS(tmpErr)) {
65
3.01k
            icu::CharString und;
66
3.01k
            if (localeID != NULL) {
67
3.01k
                if (*localeID == '\0') {
68
803
                    localeID = unknownLanguage;
69
2.20k
                } else if (*localeID == '_') {
70
95
                    und.append(unknownLanguage, *err);
71
95
                    und.append(localeID, *err);
72
95
                    if (U_FAILURE(*err)) {
73
0
                        return NULL;
74
0
                    }
75
95
                    localeID = und.data();
76
95
                }
77
3.01k
            }
78
3.01k
            s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80
3.01k
            if (U_FAILURE(tmpErr)) {
81
                /*
82
                 * If a resource is missing, it's not really an error, it's
83
                 * just that we don't have any data for that particular locale ID.
84
                 */
85
1.41k
                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86
0
                    *err = tmpErr;
87
0
                }
88
1.41k
            }
89
1.59k
            else if (resLen >= bufferLength) {
90
                /* The buffer should never overflow. */
91
0
                *err = U_INTERNAL_PROGRAM_ERROR;
92
0
            }
93
1.59k
            else {
94
1.59k
                u_UCharsToChars(s, buffer, resLen + 1);
95
1.59k
                if (resLen >= 3 &&
96
1.59k
                    uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
97
1.59k
                    (resLen == 3 || buffer[3] == '_')) {
98
4
                    uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
99
4
                }
100
1.59k
                result = buffer;
101
1.59k
            }
102
3.01k
        } else {
103
0
            *err = tmpErr;
104
0
        }
105
3.01k
    }
106
107
3.01k
    return result;
108
3.01k
}
109
110
/**
111
 * Append a tag to a buffer, adding the separator if necessary.  The buffer
112
 * must be large enough to contain the resulting tag plus any separator
113
 * necessary. The tag must not be a zero-length string.
114
 *
115
 * @param tag The tag to add.
116
 * @param tagLength The length of the tag.
117
 * @param buffer The output buffer.
118
 * @param bufferLength The length of the output buffer.  This is an input/output parameter.
119
 **/
120
static void U_CALLCONV
121
appendTag(
122
    const char* tag,
123
    int32_t tagLength,
124
    char* buffer,
125
    int32_t* bufferLength,
126
7.11k
    UBool withSeparator) {
127
128
7.11k
    if (withSeparator) {
129
3.40k
        buffer[*bufferLength] = '_';
130
3.40k
        ++(*bufferLength);
131
3.40k
    }
132
133
7.11k
    uprv_memmove(
134
7.11k
        &buffer[*bufferLength],
135
7.11k
        tag,
136
7.11k
        tagLength);
137
138
7.11k
    *bufferLength += tagLength;
139
7.11k
}
140
141
/**
142
 * Create a tag string from the supplied parameters.  The lang, script and region
143
 * parameters may be NULL pointers. If they are, their corresponding length parameters
144
 * must be less than or equal to 0.
145
 *
146
 * If any of the language, script or region parameters are empty, and the alternateTags
147
 * parameter is not NULL, it will be parsed for potential language, script and region tags
148
 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
149
 * it contains no language tag, the default tag for the unknown language is used.
150
 *
151
 * If the length of the new string exceeds the capacity of the output buffer, 
152
 * the function copies as many bytes to the output buffer as it can, and returns
153
 * the error U_BUFFER_OVERFLOW_ERROR.
154
 *
155
 * If an illegal argument is provided, the function returns the error
156
 * U_ILLEGAL_ARGUMENT_ERROR.
157
 *
158
 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159
 * the tag string fits in the output buffer, but the null terminator doesn't.
160
 *
161
 * @param lang The language tag to use.
162
 * @param langLength The length of the language tag.
163
 * @param script The script tag to use.
164
 * @param scriptLength The length of the script tag.
165
 * @param region The region tag to use.
166
 * @param regionLength The length of the region tag.
167
 * @param trailing Any trailing data to append to the new tag.
168
 * @param trailingLength The length of the trailing data.
169
 * @param alternateTags A string containing any alternate tags.
170
 * @param sink The output sink receiving the tag string.
171
 * @param err A pointer to a UErrorCode for error reporting.
172
 **/
173
static void U_CALLCONV
174
createTagStringWithAlternates(
175
    const char* lang,
176
    int32_t langLength,
177
    const char* script,
178
    int32_t scriptLength,
179
    const char* region,
180
    int32_t regionLength,
181
    const char* trailing,
182
    int32_t trailingLength,
183
    const char* alternateTags,
184
    icu::ByteSink& sink,
185
4.60k
    UErrorCode* err) {
186
187
4.60k
    if (U_FAILURE(*err)) {
188
0
        goto error;
189
0
    }
190
4.60k
    else if (langLength >= ULOC_LANG_CAPACITY ||
191
4.60k
             scriptLength >= ULOC_SCRIPT_CAPACITY ||
192
4.60k
             regionLength >= ULOC_COUNTRY_CAPACITY) {
193
0
        goto error;
194
0
    }
195
4.60k
    else {
196
        /**
197
         * ULOC_FULLNAME_CAPACITY will provide enough capacity
198
         * that we can build a string that contains the language,
199
         * script and region code without worrying about overrunning
200
         * the user-supplied buffer.
201
         **/
202
4.60k
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
203
4.60k
        int32_t tagLength = 0;
204
4.60k
        UBool regionAppended = false;
205
206
4.60k
        if (langLength > 0) {
207
2.11k
            appendTag(
208
2.11k
                lang,
209
2.11k
                langLength,
210
2.11k
                tagBuffer,
211
2.11k
                &tagLength,
212
2.11k
                /*withSeparator=*/false);
213
2.11k
        }
214
2.49k
        else if (alternateTags == NULL) {
215
            /*
216
             * Use the empty string for an unknown language, if
217
             * we found no language.
218
             */
219
898
        }
220
1.59k
        else {
221
            /*
222
             * Parse the alternateTags string for the language.
223
             */
224
1.59k
            char alternateLang[ULOC_LANG_CAPACITY];
225
1.59k
            int32_t alternateLangLength = sizeof(alternateLang);
226
227
1.59k
            alternateLangLength =
228
1.59k
                uloc_getLanguage(
229
1.59k
                    alternateTags,
230
1.59k
                    alternateLang,
231
1.59k
                    alternateLangLength,
232
1.59k
                    err);
233
1.59k
            if(U_FAILURE(*err) ||
234
1.59k
                alternateLangLength >= ULOC_LANG_CAPACITY) {
235
0
                goto error;
236
0
            }
237
1.59k
            else if (alternateLangLength == 0) {
238
                /*
239
                 * Use the empty string for an unknown language, if
240
                 * we found no language.
241
                 */
242
4
            }
243
1.59k
            else {
244
1.59k
                appendTag(
245
1.59k
                    alternateLang,
246
1.59k
                    alternateLangLength,
247
1.59k
                    tagBuffer,
248
1.59k
                    &tagLength,
249
1.59k
                    /*withSeparator=*/false);
250
1.59k
            }
251
1.59k
        }
252
253
4.60k
        if (scriptLength > 0) {
254
86
            appendTag(
255
86
                script,
256
86
                scriptLength,
257
86
                tagBuffer,
258
86
                &tagLength,
259
86
                /*withSeparator=*/true);
260
86
        }
261
4.52k
        else if (alternateTags != NULL) {
262
            /*
263
             * Parse the alternateTags string for the script.
264
             */
265
1.58k
            char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267
1.58k
            const int32_t alternateScriptLength =
268
1.58k
                uloc_getScript(
269
1.58k
                    alternateTags,
270
1.58k
                    alternateScript,
271
1.58k
                    sizeof(alternateScript),
272
1.58k
                    err);
273
274
1.58k
            if (U_FAILURE(*err) ||
275
1.58k
                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276
0
                goto error;
277
0
            }
278
1.58k
            else if (alternateScriptLength > 0) {
279
1.58k
                appendTag(
280
1.58k
                    alternateScript,
281
1.58k
                    alternateScriptLength,
282
1.58k
                    tagBuffer,
283
1.58k
                    &tagLength,
284
1.58k
                    /*withSeparator=*/true);
285
1.58k
            }
286
1.58k
        }
287
288
4.60k
        if (regionLength > 0) {
289
200
            appendTag(
290
200
                region,
291
200
                regionLength,
292
200
                tagBuffer,
293
200
                &tagLength,
294
200
                /*withSeparator=*/true);
295
296
200
            regionAppended = true;
297
200
        }
298
4.40k
        else if (alternateTags != NULL) {
299
            /*
300
             * Parse the alternateTags string for the region.
301
             */
302
1.53k
            char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304
1.53k
            const int32_t alternateRegionLength =
305
1.53k
                uloc_getCountry(
306
1.53k
                    alternateTags,
307
1.53k
                    alternateRegion,
308
1.53k
                    sizeof(alternateRegion),
309
1.53k
                    err);
310
1.53k
            if (U_FAILURE(*err) ||
311
1.53k
                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312
0
                goto error;
313
0
            }
314
1.53k
            else if (alternateRegionLength > 0) {
315
1.53k
                appendTag(
316
1.53k
                    alternateRegion,
317
1.53k
                    alternateRegionLength,
318
1.53k
                    tagBuffer,
319
1.53k
                    &tagLength,
320
1.53k
                    /*withSeparator=*/true);
321
322
1.53k
                regionAppended = true;
323
1.53k
            }
324
1.53k
        }
325
326
        /**
327
         * Copy the partial tag from our internal buffer to the supplied
328
         * target.
329
         **/
330
4.60k
        sink.Append(tagBuffer, tagLength);
331
332
4.60k
        if (trailingLength > 0) {
333
1.20k
            if (*trailing != '@') {
334
318
                sink.Append("_", 1);
335
318
                if (!regionAppended) {
336
                    /* extra separator is required */
337
0
                    sink.Append("_", 1);
338
0
                }
339
318
            }
340
341
            /*
342
             * Copy the trailing data into the supplied buffer.
343
             */
344
1.20k
            sink.Append(trailing, trailingLength);
345
1.20k
        }
346
347
4.60k
        return;
348
4.60k
    }
349
350
0
error:
351
352
    /**
353
     * An overflow indicates the locale ID passed in
354
     * is ill-formed.  If we got here, and there was
355
     * no previous error, it's an implicit overflow.
356
     **/
357
0
    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
358
0
        U_SUCCESS(*err)) {
359
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
360
0
    }
361
0
}
362
363
/**
364
 * Create a tag string from the supplied parameters.  The lang, script and region
365
 * parameters may be NULL pointers. If they are, their corresponding length parameters
366
 * must be less than or equal to 0.  If the lang parameter is an empty string, the
367
 * default value for an unknown language is written to the output buffer.
368
 *
369
 * If the length of the new string exceeds the capacity of the output buffer, 
370
 * the function copies as many bytes to the output buffer as it can, and returns
371
 * the error U_BUFFER_OVERFLOW_ERROR.
372
 *
373
 * If an illegal argument is provided, the function returns the error
374
 * U_ILLEGAL_ARGUMENT_ERROR.
375
 *
376
 * @param lang The language tag to use.
377
 * @param langLength The length of the language tag.
378
 * @param script The script tag to use.
379
 * @param scriptLength The length of the script tag.
380
 * @param region The region tag to use.
381
 * @param regionLength The length of the region tag.
382
 * @param trailing Any trailing data to append to the new tag.
383
 * @param trailingLength The length of the trailing data.
384
 * @param sink The output sink receiving the tag string.
385
 * @param err A pointer to a UErrorCode for error reporting.
386
 **/
387
static void U_CALLCONV
388
createTagString(
389
    const char* lang,
390
    int32_t langLength,
391
    const char* script,
392
    int32_t scriptLength,
393
    const char* region,
394
    int32_t regionLength,
395
    const char* trailing,
396
    int32_t trailingLength,
397
    icu::ByteSink& sink,
398
    UErrorCode* err)
399
3.01k
{
400
3.01k
    createTagStringWithAlternates(
401
3.01k
                lang,
402
3.01k
                langLength,
403
3.01k
                script,
404
3.01k
                scriptLength,
405
3.01k
                region,
406
3.01k
                regionLength,
407
3.01k
                trailing,
408
3.01k
                trailingLength,
409
3.01k
                NULL,
410
3.01k
                sink,
411
3.01k
                err);
412
3.01k
}
413
414
/**
415
 * Parse the language, script, and region subtags from a tag string, and copy the
416
 * results into the corresponding output parameters. The buffers are null-terminated,
417
 * unless overflow occurs.
418
 *
419
 * The langLength, scriptLength, and regionLength parameters are input/output
420
 * parameters, and must contain the capacity of their corresponding buffers on
421
 * input.  On output, they will contain the actual length of the buffers, not
422
 * including the null terminator.
423
 *
424
 * If the length of any of the output subtags exceeds the capacity of the corresponding
425
 * buffer, the function copies as many bytes to the output buffer as it can, and returns
426
 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
427
 * occurs.
428
 *
429
 * If an illegal argument is provided, the function returns the error
430
 * U_ILLEGAL_ARGUMENT_ERROR.
431
 *
432
 * @param localeID The locale ID to parse.
433
 * @param lang The language tag buffer.
434
 * @param langLength The length of the language tag.
435
 * @param script The script tag buffer.
436
 * @param scriptLength The length of the script tag.
437
 * @param region The region tag buffer.
438
 * @param regionLength The length of the region tag.
439
 * @param err A pointer to a UErrorCode for error reporting.
440
 * @return The number of chars of the localeID parameter consumed.
441
 **/
442
static int32_t U_CALLCONV
443
parseTagString(
444
    const char* localeID,
445
    char* lang,
446
    int32_t* langLength,
447
    char* script,
448
    int32_t* scriptLength,
449
    char* region,
450
    int32_t* regionLength,
451
    UErrorCode* err)
452
3.24k
{
453
3.24k
    const char* position = localeID;
454
3.24k
    int32_t subtagLength = 0;
455
456
3.24k
    if(U_FAILURE(*err) ||
457
3.24k
       localeID == NULL ||
458
3.24k
       lang == NULL ||
459
3.24k
       langLength == NULL ||
460
3.24k
       script == NULL ||
461
3.24k
       scriptLength == NULL ||
462
3.24k
       region == NULL ||
463
3.24k
       regionLength == NULL) {
464
0
        goto error;
465
0
    }
466
467
3.24k
    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
468
469
    /*
470
     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
471
     * to be an error, because it indicates the user-supplied tag is
472
     * not well-formed.
473
     */
474
3.24k
    if(U_FAILURE(*err)) {
475
0
        goto error;
476
0
    }
477
478
3.24k
    *langLength = subtagLength;
479
480
    /*
481
     * If no language was present, use the empty string instead.
482
     * Otherwise, move past any separator.
483
     */
484
3.24k
    if (_isIDSeparator(*position)) {
485
1.21k
        ++position;
486
1.21k
    }
487
488
3.24k
    subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
489
490
3.24k
    if(U_FAILURE(*err)) {
491
0
        goto error;
492
0
    }
493
494
3.24k
    *scriptLength = subtagLength;
495
496
3.24k
    if (*scriptLength > 0) {
497
56
        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
498
            /**
499
             * If the script part is the "unknown" script, then don't return it.
500
             **/
501
2
            *scriptLength = 0;
502
2
        }
503
504
        /*
505
         * Move past any separator.
506
         */
507
56
        if (_isIDSeparator(*position)) {
508
18
            ++position;
509
18
        }    
510
56
    }
511
512
3.24k
    subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
513
514
3.24k
    if(U_FAILURE(*err)) {
515
0
        goto error;
516
0
    }
517
518
3.24k
    *regionLength = subtagLength;
519
520
3.24k
    if (*regionLength > 0) {
521
137
        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
522
            /**
523
             * If the region part is the "unknown" region, then don't return it.
524
             **/
525
3
            *regionLength = 0;
526
3
        }
527
3.10k
    } else if (*position != 0 && *position != '@') {
528
        /* back up over consumed trailing separator */
529
1.05k
        --position;
530
1.05k
    }
531
532
3.24k
exit:
533
534
3.24k
    return (int32_t)(position - localeID);
535
536
0
error:
537
538
    /**
539
     * If we get here, we have no explicit error, it's the result of an
540
     * illegal argument.
541
     **/
542
0
    if (!U_FAILURE(*err)) {
543
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
544
0
    }
545
546
0
    goto exit;
547
3.24k
}
548
549
static UBool U_CALLCONV
550
createLikelySubtagsString(
551
    const char* lang,
552
    int32_t langLength,
553
    const char* script,
554
    int32_t scriptLength,
555
    const char* region,
556
    int32_t regionLength,
557
    const char* variants,
558
    int32_t variantsLength,
559
    icu::ByteSink& sink,
560
2.85k
    UErrorCode* err) {
561
    /**
562
     * ULOC_FULLNAME_CAPACITY will provide enough capacity
563
     * that we can build a string that contains the language,
564
     * script and region code without worrying about overrunning
565
     * the user-supplied buffer.
566
     **/
567
2.85k
    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
568
569
2.85k
    if(U_FAILURE(*err)) {
570
0
        goto error;
571
0
    }
572
573
    /**
574
     * Try the language with the script and region first.
575
     **/
576
2.85k
    if (scriptLength > 0 && regionLength > 0) {
577
578
19
        const char* likelySubtags = NULL;
579
580
19
        icu::CharString tagBuffer;
581
19
        {
582
19
            icu::CharStringByteSink sink(&tagBuffer);
583
19
            createTagString(
584
19
                lang,
585
19
                langLength,
586
19
                script,
587
19
                scriptLength,
588
19
                region,
589
19
                regionLength,
590
19
                NULL,
591
19
                0,
592
19
                sink,
593
19
                err);
594
19
        }
595
19
        if(U_FAILURE(*err)) {
596
0
            goto error;
597
0
        }
598
599
19
        likelySubtags =
600
19
            findLikelySubtags(
601
19
                tagBuffer.data(),
602
19
                likelySubtagsBuffer,
603
19
                sizeof(likelySubtagsBuffer),
604
19
                err);
605
19
        if(U_FAILURE(*err)) {
606
0
            goto error;
607
0
        }
608
609
19
        if (likelySubtags != NULL) {
610
            /* Always use the language tag from the
611
               maximal string, since it may be more
612
               specific than the one provided. */
613
2
            createTagStringWithAlternates(
614
2
                        NULL,
615
2
                        0,
616
2
                        NULL,
617
2
                        0,
618
2
                        NULL,
619
2
                        0,
620
2
                        variants,
621
2
                        variantsLength,
622
2
                        likelySubtags,
623
2
                        sink,
624
2
                        err);
625
2
            return true;
626
2
        }
627
19
    }
628
629
    /**
630
     * Try the language with just the script.
631
     **/
632
2.85k
    if (scriptLength > 0) {
633
634
52
        const char* likelySubtags = NULL;
635
636
52
        icu::CharString tagBuffer;
637
52
        {
638
52
            icu::CharStringByteSink sink(&tagBuffer);
639
52
            createTagString(
640
52
                lang,
641
52
                langLength,
642
52
                script,
643
52
                scriptLength,
644
52
                NULL,
645
52
                0,
646
52
                NULL,
647
52
                0,
648
52
                sink,
649
52
                err);
650
52
        }
651
52
        if(U_FAILURE(*err)) {
652
0
            goto error;
653
0
        }
654
655
52
        likelySubtags =
656
52
            findLikelySubtags(
657
52
                tagBuffer.data(),
658
52
                likelySubtagsBuffer,
659
52
                sizeof(likelySubtagsBuffer),
660
52
                err);
661
52
        if(U_FAILURE(*err)) {
662
0
            goto error;
663
0
        }
664
665
52
        if (likelySubtags != NULL) {
666
            /* Always use the language tag from the
667
               maximal string, since it may be more
668
               specific than the one provided. */
669
8
            createTagStringWithAlternates(
670
8
                        NULL,
671
8
                        0,
672
8
                        NULL,
673
8
                        0,
674
8
                        region,
675
8
                        regionLength,
676
8
                        variants,
677
8
                        variantsLength,
678
8
                        likelySubtags,
679
8
                        sink,
680
8
                        err);
681
8
            return true;
682
8
        }
683
52
    }
684
685
    /**
686
     * Try the language with just the region.
687
     **/
688
2.84k
    if (regionLength > 0) {
689
690
122
        const char* likelySubtags = NULL;
691
692
122
        icu::CharString tagBuffer;
693
122
        {
694
122
            icu::CharStringByteSink sink(&tagBuffer);
695
122
            createTagString(
696
122
                lang,
697
122
                langLength,
698
122
                NULL,
699
122
                0,
700
122
                region,
701
122
                regionLength,
702
122
                NULL,
703
122
                0,
704
122
                sink,
705
122
                err);
706
122
        }
707
122
        if(U_FAILURE(*err)) {
708
0
            goto error;
709
0
        }
710
711
122
        likelySubtags =
712
122
            findLikelySubtags(
713
122
                tagBuffer.data(),
714
122
                likelySubtagsBuffer,
715
122
                sizeof(likelySubtagsBuffer),
716
122
                err);
717
122
        if(U_FAILURE(*err)) {
718
0
            goto error;
719
0
        }
720
721
122
        if (likelySubtags != NULL) {
722
            /* Always use the language tag from the
723
               maximal string, since it may be more
724
               specific than the one provided. */
725
26
            createTagStringWithAlternates(
726
26
                        NULL,
727
26
                        0,
728
26
                        script,
729
26
                        scriptLength,
730
26
                        NULL,
731
26
                        0,
732
26
                        variants,
733
26
                        variantsLength,
734
26
                        likelySubtags,
735
26
                        sink,
736
26
                        err);
737
26
            return true;
738
26
        }
739
122
    }
740
741
    /**
742
     * Finally, try just the language.
743
     **/
744
2.81k
    {
745
2.81k
        const char* likelySubtags = NULL;
746
747
2.81k
        icu::CharString tagBuffer;
748
2.81k
        {
749
2.81k
            icu::CharStringByteSink sink(&tagBuffer);
750
2.81k
            createTagString(
751
2.81k
                lang,
752
2.81k
                langLength,
753
2.81k
                NULL,
754
2.81k
                0,
755
2.81k
                NULL,
756
2.81k
                0,
757
2.81k
                NULL,
758
2.81k
                0,
759
2.81k
                sink,
760
2.81k
                err);
761
2.81k
        }
762
2.81k
        if(U_FAILURE(*err)) {
763
0
            goto error;
764
0
        }
765
766
2.81k
        likelySubtags =
767
2.81k
            findLikelySubtags(
768
2.81k
                tagBuffer.data(),
769
2.81k
                likelySubtagsBuffer,
770
2.81k
                sizeof(likelySubtagsBuffer),
771
2.81k
                err);
772
2.81k
        if(U_FAILURE(*err)) {
773
0
            goto error;
774
0
        }
775
776
2.81k
        if (likelySubtags != NULL) {
777
            /* Always use the language tag from the
778
               maximal string, since it may be more
779
               specific than the one provided. */
780
1.56k
            createTagStringWithAlternates(
781
1.56k
                        NULL,
782
1.56k
                        0,
783
1.56k
                        script,
784
1.56k
                        scriptLength,
785
1.56k
                        region,
786
1.56k
                        regionLength,
787
1.56k
                        variants,
788
1.56k
                        variantsLength,
789
1.56k
                        likelySubtags,
790
1.56k
                        sink,
791
1.56k
                        err);
792
1.56k
            return true;
793
1.56k
        }
794
2.81k
    }
795
796
1.25k
    return false;
797
798
0
error:
799
800
0
    if (!U_FAILURE(*err)) {
801
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
802
0
    }
803
804
0
    return false;
805
2.81k
}
806
807
3.24k
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
808
3.24k
    int32_t count = 0; \
809
3.24k
    int32_t i; \
810
2.47M
    for (i = 0; i < trailingLength; i++) { \
811
2.47M
        if (trailing[i] == '-' || trailing[i] == '_') { \
812
586k
            count = 0; \
813
586k
            if (count > 8) { \
814
0
                goto error; \
815
0
            } \
816
1.88M
        } else if (trailing[i] == '@') { \
817
1.62k
            break; \
818
1.88M
        } else if (count > 8) { \
819
387
            goto error; \
820
1.88M
        } else { \
821
1.88M
            count++; \
822
1.88M
        } \
823
2.47M
    } \
824
3.24k
} UPRV_BLOCK_MACRO_END
825
826
static UBool
827
_uloc_addLikelySubtags(const char* localeID,
828
                       icu::ByteSink& sink,
829
3.24k
                       UErrorCode* err) {
830
3.24k
    char lang[ULOC_LANG_CAPACITY];
831
3.24k
    int32_t langLength = sizeof(lang);
832
3.24k
    char script[ULOC_SCRIPT_CAPACITY];
833
3.24k
    int32_t scriptLength = sizeof(script);
834
3.24k
    char region[ULOC_COUNTRY_CAPACITY];
835
3.24k
    int32_t regionLength = sizeof(region);
836
3.24k
    const char* trailing = "";
837
3.24k
    int32_t trailingLength = 0;
838
3.24k
    int32_t trailingIndex = 0;
839
3.24k
    UBool success = false;
840
841
3.24k
    if(U_FAILURE(*err)) {
842
0
        goto error;
843
0
    }
844
3.24k
    if (localeID == NULL) {
845
0
        goto error;
846
0
    }
847
848
3.24k
    trailingIndex = parseTagString(
849
3.24k
        localeID,
850
3.24k
        lang,
851
3.24k
        &langLength,
852
3.24k
        script,
853
3.24k
        &scriptLength,
854
3.24k
        region,
855
3.24k
        &regionLength,
856
3.24k
        err);
857
3.24k
    if(U_FAILURE(*err)) {
858
        /* Overflow indicates an illegal argument error */
859
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
860
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
861
0
        }
862
863
0
        goto error;
864
0
    }
865
866
    /* Find the length of the trailing portion. */
867
7.02k
    while (_isIDSeparator(localeID[trailingIndex])) {
868
3.78k
        trailingIndex++;
869
3.78k
    }
870
3.24k
    trailing = &localeID[trailingIndex];
871
3.24k
    trailingLength = (int32_t)uprv_strlen(trailing);
872
873
3.24k
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
874
875
2.85k
    success =
876
2.85k
        createLikelySubtagsString(
877
2.85k
            lang,
878
2.85k
            langLength,
879
2.85k
            script,
880
2.85k
            scriptLength,
881
2.85k
            region,
882
2.85k
            regionLength,
883
2.85k
            trailing,
884
2.85k
            trailingLength,
885
2.85k
            sink,
886
2.85k
            err);
887
888
2.85k
    if (!success) {
889
1.25k
        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
890
891
        /*
892
         * If we get here, we need to return localeID.
893
         */
894
1.25k
        sink.Append(localeID, localIDLength);
895
1.25k
    }
896
897
2.85k
    return success;
898
899
387
error:
900
901
387
    if (!U_FAILURE(*err)) {
902
387
        *err = U_ILLEGAL_ARGUMENT_ERROR;
903
387
    }
904
387
    return false;
905
3.24k
}
906
907
// Add likely subtags to the sink
908
// return true if the value in the sink is produced by a match during the lookup
909
// return false if the value in the sink is the same as input because there are
910
// no match after the lookup.
911
static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
912
913
static void
914
_uloc_minimizeSubtags(const char* localeID,
915
                      icu::ByteSink& sink,
916
0
                      UErrorCode* err) {
917
0
    icu::CharString maximizedTagBuffer;
918
919
0
    char lang[ULOC_LANG_CAPACITY];
920
0
    int32_t langLength = sizeof(lang);
921
0
    char script[ULOC_SCRIPT_CAPACITY];
922
0
    int32_t scriptLength = sizeof(script);
923
0
    char region[ULOC_COUNTRY_CAPACITY];
924
0
    int32_t regionLength = sizeof(region);
925
0
    const char* trailing = "";
926
0
    int32_t trailingLength = 0;
927
0
    int32_t trailingIndex = 0;
928
0
    UBool successGetMax = false;
929
930
0
    if(U_FAILURE(*err)) {
931
0
        goto error;
932
0
    }
933
0
    else if (localeID == NULL) {
934
0
        goto error;
935
0
    }
936
937
0
    trailingIndex =
938
0
        parseTagString(
939
0
            localeID,
940
0
            lang,
941
0
            &langLength,
942
0
            script,
943
0
            &scriptLength,
944
0
            region,
945
0
            &regionLength,
946
0
            err);
947
0
    if(U_FAILURE(*err)) {
948
949
        /* Overflow indicates an illegal argument error */
950
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
951
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
952
0
        }
953
954
0
        goto error;
955
0
    }
956
957
    /* Find the spot where the variants or the keywords begin, if any. */
958
0
    while (_isIDSeparator(localeID[trailingIndex])) {
959
0
        trailingIndex++;
960
0
    }
961
0
    trailing = &localeID[trailingIndex];
962
0
    trailingLength = (int32_t)uprv_strlen(trailing);
963
964
0
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
965
966
0
    {
967
0
        icu::CharString base;
968
0
        {
969
0
            icu::CharStringByteSink baseSink(&base);
970
0
            createTagString(
971
0
                lang,
972
0
                langLength,
973
0
                script,
974
0
                scriptLength,
975
0
                region,
976
0
                regionLength,
977
0
                NULL,
978
0
                0,
979
0
                baseSink,
980
0
                err);
981
0
        }
982
983
        /**
984
         * First, we need to first get the maximization
985
         * from AddLikelySubtags.
986
         **/
987
0
        {
988
0
            icu::CharStringByteSink maxSink(&maximizedTagBuffer);
989
0
            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
990
0
        }
991
0
    }
992
993
0
    if(U_FAILURE(*err)) {
994
0
        goto error;
995
0
    }
996
997
0
    if (!successGetMax) {
998
        /**
999
         * If we got here, return the locale ID parameter unchanged.
1000
         **/
1001
0
        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1002
0
        sink.Append(localeID, localeIDLength);
1003
0
        return;
1004
0
    }
1005
1006
    // In the following, the lang, script, region are referring to those in
1007
    // the maximizedTagBuffer, not the one in the localeID.
1008
0
    langLength = sizeof(lang);
1009
0
    scriptLength = sizeof(script);
1010
0
    regionLength = sizeof(region);
1011
0
    parseTagString(
1012
0
        maximizedTagBuffer.data(),
1013
0
        lang,
1014
0
        &langLength,
1015
0
        script,
1016
0
        &scriptLength,
1017
0
        region,
1018
0
        &regionLength,
1019
0
        err);
1020
0
    if(U_FAILURE(*err)) {
1021
0
        goto error;
1022
0
    }
1023
1024
    /**
1025
     * Start first with just the language.
1026
     **/
1027
0
    {
1028
0
        icu::CharString tagBuffer;
1029
0
        {
1030
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1031
0
            createLikelySubtagsString(
1032
0
                lang,
1033
0
                langLength,
1034
0
                NULL,
1035
0
                0,
1036
0
                NULL,
1037
0
                0,
1038
0
                NULL,
1039
0
                0,
1040
0
                tagSink,
1041
0
                err);
1042
0
        }
1043
1044
0
        if(U_FAILURE(*err)) {
1045
0
            goto error;
1046
0
        }
1047
0
        else if (!tagBuffer.isEmpty() &&
1048
0
                 uprv_strnicmp(
1049
0
                    maximizedTagBuffer.data(),
1050
0
                    tagBuffer.data(),
1051
0
                    tagBuffer.length()) == 0) {
1052
1053
0
            createTagString(
1054
0
                        lang,
1055
0
                        langLength,
1056
0
                        NULL,
1057
0
                        0,
1058
0
                        NULL,
1059
0
                        0,
1060
0
                        trailing,
1061
0
                        trailingLength,
1062
0
                        sink,
1063
0
                        err);
1064
0
            return;
1065
0
        }
1066
0
    }
1067
1068
    /**
1069
     * Next, try the language and region.
1070
     **/
1071
0
    if (regionLength > 0) {
1072
1073
0
        icu::CharString tagBuffer;
1074
0
        {
1075
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1076
0
            createLikelySubtagsString(
1077
0
                lang,
1078
0
                langLength,
1079
0
                NULL,
1080
0
                0,
1081
0
                region,
1082
0
                regionLength,
1083
0
                NULL,
1084
0
                0,
1085
0
                tagSink,
1086
0
                err);
1087
0
        }
1088
1089
0
        if(U_FAILURE(*err)) {
1090
0
            goto error;
1091
0
        }
1092
0
        else if (!tagBuffer.isEmpty() &&
1093
0
                 uprv_strnicmp(
1094
0
                    maximizedTagBuffer.data(),
1095
0
                    tagBuffer.data(),
1096
0
                    tagBuffer.length()) == 0) {
1097
1098
0
            createTagString(
1099
0
                        lang,
1100
0
                        langLength,
1101
0
                        NULL,
1102
0
                        0,
1103
0
                        region,
1104
0
                        regionLength,
1105
0
                        trailing,
1106
0
                        trailingLength,
1107
0
                        sink,
1108
0
                        err);
1109
0
            return;
1110
0
        }
1111
0
    }
1112
1113
    /**
1114
     * Finally, try the language and script.  This is our last chance,
1115
     * since trying with all three subtags would only yield the
1116
     * maximal version that we already have.
1117
     **/
1118
0
    if (scriptLength > 0) {
1119
0
        icu::CharString tagBuffer;
1120
0
        {
1121
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1122
0
            createLikelySubtagsString(
1123
0
                lang,
1124
0
                langLength,
1125
0
                script,
1126
0
                scriptLength,
1127
0
                NULL,
1128
0
                0,
1129
0
                NULL,
1130
0
                0,
1131
0
                tagSink,
1132
0
                err);
1133
0
        }
1134
1135
0
        if(U_FAILURE(*err)) {
1136
0
            goto error;
1137
0
        }
1138
0
        else if (!tagBuffer.isEmpty() &&
1139
0
                 uprv_strnicmp(
1140
0
                    maximizedTagBuffer.data(),
1141
0
                    tagBuffer.data(),
1142
0
                    tagBuffer.length()) == 0) {
1143
1144
0
            createTagString(
1145
0
                        lang,
1146
0
                        langLength,
1147
0
                        script,
1148
0
                        scriptLength,
1149
0
                        NULL,
1150
0
                        0,
1151
0
                        trailing,
1152
0
                        trailingLength,
1153
0
                        sink,
1154
0
                        err);
1155
0
            return;
1156
0
        }
1157
0
    }
1158
1159
0
    {
1160
        /**
1161
         * If we got here, return the max + trail.
1162
         **/
1163
0
        createTagString(
1164
0
                    lang,
1165
0
                    langLength,
1166
0
                    script,
1167
0
                    scriptLength,
1168
0
                    region,
1169
0
                    regionLength,
1170
0
                    trailing,
1171
0
                    trailingLength,
1172
0
                    sink,
1173
0
                    err);
1174
0
        return;
1175
0
    }
1176
1177
0
error:
1178
1179
0
    if (!U_FAILURE(*err)) {
1180
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1181
0
    }
1182
0
}
1183
1184
static int32_t
1185
do_canonicalize(const char*    localeID,
1186
         char* buffer,
1187
         int32_t bufferCapacity,
1188
         UErrorCode* err)
1189
4.21k
{
1190
4.21k
    int32_t canonicalizedSize = uloc_canonicalize(
1191
4.21k
        localeID,
1192
4.21k
        buffer,
1193
4.21k
        bufferCapacity,
1194
4.21k
        err);
1195
1196
4.21k
    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1197
4.21k
        *err == U_BUFFER_OVERFLOW_ERROR) {
1198
812
        return canonicalizedSize;
1199
812
    }
1200
3.40k
    else if (U_FAILURE(*err)) {
1201
1202
165
        return -1;
1203
165
    }
1204
3.24k
    else {
1205
3.24k
        return canonicalizedSize;
1206
3.24k
    }
1207
4.21k
}
1208
1209
U_CAPI int32_t U_EXPORT2
1210
uloc_addLikelySubtags(const char* localeID,
1211
                      char* maximizedLocaleID,
1212
                      int32_t maximizedLocaleIDCapacity,
1213
0
                      UErrorCode* status) {
1214
0
    if (U_FAILURE(*status)) {
1215
0
        return 0;
1216
0
    }
1217
1218
0
    icu::CheckedArrayByteSink sink(
1219
0
            maximizedLocaleID, maximizedLocaleIDCapacity);
1220
1221
0
    ulocimp_addLikelySubtags(localeID, sink, status);
1222
0
    int32_t reslen = sink.NumberOfBytesAppended();
1223
1224
0
    if (U_FAILURE(*status)) {
1225
0
        return sink.Overflowed() ? reslen : -1;
1226
0
    }
1227
1228
0
    if (sink.Overflowed()) {
1229
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1230
0
    } else {
1231
0
        u_terminateChars(
1232
0
                maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1233
0
    }
1234
1235
0
    return reslen;
1236
0
}
1237
1238
static UBool
1239
_ulocimp_addLikelySubtags(const char* localeID,
1240
                          icu::ByteSink& sink,
1241
3.40k
                          UErrorCode* status) {
1242
3.40k
    PreflightingLocaleIDBuffer localeBuffer;
1243
4.21k
    do {
1244
4.21k
        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1245
4.21k
            localeBuffer.getCapacity(), status);
1246
4.21k
    } while (localeBuffer.needToTryAgain(status));
1247
    
1248
3.40k
    if (U_SUCCESS(*status)) {
1249
3.24k
        return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
1250
3.24k
    } else {
1251
165
        return false;
1252
165
    }
1253
3.40k
}
1254
1255
U_CAPI void U_EXPORT2
1256
ulocimp_addLikelySubtags(const char* localeID,
1257
                         icu::ByteSink& sink,
1258
3.40k
                         UErrorCode* status) {
1259
3.40k
    _ulocimp_addLikelySubtags(localeID, sink, status);
1260
3.40k
}
1261
1262
U_CAPI int32_t U_EXPORT2
1263
uloc_minimizeSubtags(const char* localeID,
1264
                     char* minimizedLocaleID,
1265
                     int32_t minimizedLocaleIDCapacity,
1266
0
                     UErrorCode* status) {
1267
0
    if (U_FAILURE(*status)) {
1268
0
        return 0;
1269
0
    }
1270
1271
0
    icu::CheckedArrayByteSink sink(
1272
0
            minimizedLocaleID, minimizedLocaleIDCapacity);
1273
1274
0
    ulocimp_minimizeSubtags(localeID, sink, status);
1275
0
    int32_t reslen = sink.NumberOfBytesAppended();
1276
1277
0
    if (U_FAILURE(*status)) {
1278
0
        return sink.Overflowed() ? reslen : -1;
1279
0
    }
1280
1281
0
    if (sink.Overflowed()) {
1282
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1283
0
    } else {
1284
0
        u_terminateChars(
1285
0
                minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1286
0
    }
1287
1288
0
    return reslen;
1289
0
}
1290
1291
U_CAPI void U_EXPORT2
1292
ulocimp_minimizeSubtags(const char* localeID,
1293
                        icu::ByteSink& sink,
1294
0
                        UErrorCode* status) {
1295
0
    PreflightingLocaleIDBuffer localeBuffer;
1296
0
    do {
1297
0
        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1298
0
            localeBuffer.getCapacity(), status);
1299
0
    } while (localeBuffer.needToTryAgain(status));
1300
    
1301
0
    _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
1302
0
}
1303
1304
// Pairs of (language subtag, + or -) for finding out fast if common languages
1305
// are LTR (minus) or RTL (plus).
1306
static const char LANG_DIR_STRING[] =
1307
        "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1308
1309
// Implemented here because this calls ulocimp_addLikelySubtags().
1310
U_CAPI UBool U_EXPORT2
1311
3.55k
uloc_isRightToLeft(const char *locale) {
1312
3.55k
    UErrorCode errorCode = U_ZERO_ERROR;
1313
3.55k
    char script[8];
1314
3.55k
    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1315
3.55k
    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1316
3.55k
            scriptLength == 0) {
1317
        // Fastpath: We know the likely scripts and their writing direction
1318
        // for some common languages.
1319
3.50k
        errorCode = U_ZERO_ERROR;
1320
3.50k
        char lang[8];
1321
3.50k
        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1322
3.50k
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1323
76
            return false;
1324
76
        }
1325
3.42k
        if (langLength > 0) {
1326
2.57k
            const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1327
2.57k
            if (langPtr != NULL) {
1328
121
                switch (langPtr[langLength]) {
1329
22
                case '-': return false;
1330
1
                case '+': return true;
1331
98
                default: break;  // partial match of a longer code
1332
121
                }
1333
121
            }
1334
2.57k
        }
1335
        // Otherwise, find the likely script.
1336
3.40k
        errorCode = U_ZERO_ERROR;
1337
3.40k
        icu::CharString likely;
1338
3.40k
        {
1339
3.40k
            icu::CharStringByteSink sink(&likely);
1340
3.40k
            ulocimp_addLikelySubtags(locale, sink, &errorCode);
1341
3.40k
        }
1342
3.40k
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1343
552
            return false;
1344
552
        }
1345
2.85k
        scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1346
2.85k
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1347
2.85k
                scriptLength == 0) {
1348
1.22k
            return false;
1349
1.22k
        }
1350
2.85k
    }
1351
1.67k
    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1352
1.67k
    return uscript_isRightToLeft(scriptCode);
1353
3.55k
}
1354
1355
U_NAMESPACE_BEGIN
1356
1357
UBool
1358
0
Locale::isRightToLeft() const {
1359
0
    return uloc_isRightToLeft(getBaseName());
1360
0
}
1361
1362
U_NAMESPACE_END
1363
1364
// The following must at least allow for rg key value (6) plus terminator (1).
1365
14.4k
#define ULOC_RG_BUFLEN 8
1366
1367
U_CAPI int32_t U_EXPORT2
1368
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1369
7.23k
                                     char *region, int32_t regionCapacity, UErrorCode* status) {
1370
7.23k
    if (U_FAILURE(*status)) {
1371
0
        return 0;
1372
0
    }
1373
7.23k
    char rgBuf[ULOC_RG_BUFLEN];
1374
7.23k
    UErrorCode rgStatus = U_ZERO_ERROR;
1375
1376
    // First check for rg keyword value
1377
7.23k
    int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1378
7.23k
    if (U_FAILURE(rgStatus) || rgLen != 6) {
1379
7.23k
        rgLen = 0;
1380
7.23k
    } else {
1381
        // rgBuf guaranteed to be zero terminated here, with text len 6
1382
0
        char *rgPtr = rgBuf;
1383
0
        for (; *rgPtr!= 0; rgPtr++) {
1384
0
            *rgPtr = uprv_toupper(*rgPtr);
1385
0
        }
1386
0
        rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1387
0
    }
1388
1389
7.23k
    if (rgLen == 0) {
1390
        // No valid rg keyword value, try for unicode_region_subtag
1391
7.23k
        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1392
7.23k
        if (U_FAILURE(*status)) {
1393
0
            rgLen = 0;
1394
7.23k
        } else if (rgLen == 0 && inferRegion) {
1395
            // no unicode_region_subtag but inferRegion true, try likely subtags
1396
0
            rgStatus = U_ZERO_ERROR;
1397
0
            icu::CharString locBuf;
1398
0
            {
1399
0
                icu::CharStringByteSink sink(&locBuf);
1400
0
                ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1401
0
            }
1402
0
            if (U_SUCCESS(rgStatus)) {
1403
0
                rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1404
0
                if (U_FAILURE(*status)) {
1405
0
                    rgLen = 0;
1406
0
                }
1407
0
            }
1408
0
        }
1409
7.23k
    }
1410
1411
7.23k
    rgBuf[rgLen] = 0;
1412
7.23k
    uprv_strncpy(region, rgBuf, regionCapacity);
1413
7.23k
    return u_terminateChars(region, regionCapacity, rgLen, status);
1414
7.23k
}
1415