Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/loclikely.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 1997-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  loclikely.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2010feb25
16
*   created by: Markus W. Scherer
17
*
18
*   Code for likely and minimized locale subtags, separated out from other .cpp files
19
*   that then do not depend on resource bundle code and likely-subtags data.
20
*/
21
22
#include "unicode/bytestream.h"
23
#include "unicode/utypes.h"
24
#include "unicode/locid.h"
25
#include "unicode/putil.h"
26
#include "unicode/uchar.h"
27
#include "unicode/uloc.h"
28
#include "unicode/ures.h"
29
#include "unicode/uscript.h"
30
#include "bytesinkutil.h"
31
#include "charstr.h"
32
#include "cmemory.h"
33
#include "cstring.h"
34
#include "ulocimp.h"
35
#include "ustr_imp.h"
36
37
/**
38
 * These are the canonical strings for unknown languages, scripts and regions.
39
 **/
40
static const char* const unknownLanguage = "und";
41
static const char* const unknownScript = "Zzzz";
42
static const char* const unknownRegion = "ZZ";
43
44
/**
45
 * This function looks for the localeID in the likelySubtags resource.
46
 *
47
 * @param localeID The tag to find.
48
 * @param buffer A buffer to hold the matching entry
49
 * @param bufferLength The length of the output buffer
50
 * @return A pointer to "buffer" if found, or a null pointer if not.
51
 */
52
static const char*  U_CALLCONV
53
findLikelySubtags(const char* localeID,
54
                  char* buffer,
55
                  int32_t bufferLength,
56
0
                  UErrorCode* err) {
57
0
    const char* result = NULL;
58
59
0
    if (!U_FAILURE(*err)) {
60
0
        int32_t resLen = 0;
61
0
        const UChar* s = NULL;
62
0
        UErrorCode tmpErr = U_ZERO_ERROR;
63
0
        icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64
0
        if (U_SUCCESS(tmpErr)) {
65
0
            icu::CharString und;
66
0
            if (localeID != NULL) {
67
0
                if (*localeID == '\0') {
68
0
                    localeID = unknownLanguage;
69
0
                } else if (*localeID == '_') {
70
0
                    und.append(unknownLanguage, *err);
71
0
                    und.append(localeID, *err);
72
0
                    if (U_FAILURE(*err)) {
73
0
                        return NULL;
74
0
                    }
75
0
                    localeID = und.data();
76
0
                }
77
0
            }
78
0
            s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80
0
            if (U_FAILURE(tmpErr)) {
81
                /*
82
                 * If a resource is missing, it's not really an error, it's
83
                 * just that we don't have any data for that particular locale ID.
84
                 */
85
0
                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86
0
                    *err = tmpErr;
87
0
                }
88
0
            }
89
0
            else if (resLen >= bufferLength) {
90
                /* The buffer should never overflow. */
91
0
                *err = U_INTERNAL_PROGRAM_ERROR;
92
0
            }
93
0
            else {
94
0
                u_UCharsToChars(s, buffer, resLen + 1);
95
0
                if (resLen >= 3 &&
96
0
                    uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
97
0
                    (resLen == 3 || buffer[3] == '_')) {
98
0
                    uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
99
0
                }
100
0
                result = buffer;
101
0
            }
102
0
        } else {
103
0
            *err = tmpErr;
104
0
        }
105
0
    }
106
107
0
    return result;
108
0
}
109
110
/**
111
 * Append a tag to a buffer, adding the separator if necessary.  The buffer
112
 * must be large enough to contain the resulting tag plus any separator
113
 * necessary. The tag must not be a zero-length string.
114
 *
115
 * @param tag The tag to add.
116
 * @param tagLength The length of the tag.
117
 * @param buffer The output buffer.
118
 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
119
 **/
120
static void U_CALLCONV
121
appendTag(
122
    const char* tag,
123
    int32_t tagLength,
124
    char* buffer,
125
    int32_t* bufferLength,
126
0
    UBool withSeparator) {
127
128
0
    if (withSeparator) {
129
0
        buffer[*bufferLength] = '_';
130
0
        ++(*bufferLength);
131
0
    }
132
133
0
    uprv_memmove(
134
0
        &buffer[*bufferLength],
135
0
        tag,
136
0
        tagLength);
137
138
0
    *bufferLength += tagLength;
139
0
}
140
141
/**
142
 * Create a tag string from the supplied parameters.  The lang, script and region
143
 * parameters may be NULL pointers. If they are, their corresponding length parameters
144
 * must be less than or equal to 0.
145
 *
146
 * If any of the language, script or region parameters are empty, and the alternateTags
147
 * parameter is not NULL, it will be parsed for potential language, script and region tags
148
 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
149
 * it contains no language tag, the default tag for the unknown language is used.
150
 *
151
 * If the length of the new string exceeds the capacity of the output buffer, 
152
 * the function copies as many bytes to the output buffer as it can, and returns
153
 * the error U_BUFFER_OVERFLOW_ERROR.
154
 *
155
 * If an illegal argument is provided, the function returns the error
156
 * U_ILLEGAL_ARGUMENT_ERROR.
157
 *
158
 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159
 * the tag string fits in the output buffer, but the null terminator doesn't.
160
 *
161
 * @param lang The language tag to use.
162
 * @param langLength The length of the language tag.
163
 * @param script The script tag to use.
164
 * @param scriptLength The length of the script tag.
165
 * @param region The region tag to use.
166
 * @param regionLength The length of the region tag.
167
 * @param trailing Any trailing data to append to the new tag.
168
 * @param trailingLength The length of the trailing data.
169
 * @param alternateTags A string containing any alternate tags.
170
 * @param sink The output sink receiving the tag string.
171
 * @param err A pointer to a UErrorCode for error reporting.
172
 **/
173
static void U_CALLCONV
174
createTagStringWithAlternates(
175
    const char* lang,
176
    int32_t langLength,
177
    const char* script,
178
    int32_t scriptLength,
179
    const char* region,
180
    int32_t regionLength,
181
    const char* trailing,
182
    int32_t trailingLength,
183
    const char* alternateTags,
184
    icu::ByteSink& sink,
185
0
    UErrorCode* err) {
186
187
0
    if (U_FAILURE(*err)) {
188
0
        goto error;
189
0
    }
190
0
    else if (langLength >= ULOC_LANG_CAPACITY ||
191
0
             scriptLength >= ULOC_SCRIPT_CAPACITY ||
192
0
             regionLength >= ULOC_COUNTRY_CAPACITY) {
193
0
        goto error;
194
0
    }
195
0
    else {
196
        /**
197
         * ULOC_FULLNAME_CAPACITY will provide enough capacity
198
         * that we can build a string that contains the language,
199
         * script and region code without worrying about overrunning
200
         * the user-supplied buffer.
201
         **/
202
0
        char tagBuffer[ULOC_FULLNAME_CAPACITY];
203
0
        int32_t tagLength = 0;
204
0
        UBool regionAppended = FALSE;
205
206
0
        if (langLength > 0) {
207
0
            appendTag(
208
0
                lang,
209
0
                langLength,
210
0
                tagBuffer,
211
0
                &tagLength,
212
                /*withSeparator=*/FALSE);
213
0
        }
214
0
        else if (alternateTags == NULL) {
215
            /*
216
             * Use the empty string for an unknown language, if
217
             * we found no language.
218
             */
219
0
        }
220
0
        else {
221
            /*
222
             * Parse the alternateTags string for the language.
223
             */
224
0
            char alternateLang[ULOC_LANG_CAPACITY];
225
0
            int32_t alternateLangLength = sizeof(alternateLang);
226
227
0
            alternateLangLength =
228
0
                uloc_getLanguage(
229
0
                    alternateTags,
230
0
                    alternateLang,
231
0
                    alternateLangLength,
232
0
                    err);
233
0
            if(U_FAILURE(*err) ||
234
0
                alternateLangLength >= ULOC_LANG_CAPACITY) {
235
0
                goto error;
236
0
            }
237
0
            else if (alternateLangLength == 0) {
238
                /*
239
                 * Use the empty string for an unknown language, if
240
                 * we found no language.
241
                 */
242
0
            }
243
0
            else {
244
0
                appendTag(
245
0
                    alternateLang,
246
0
                    alternateLangLength,
247
0
                    tagBuffer,
248
0
                    &tagLength,
249
                    /*withSeparator=*/FALSE);
250
0
            }
251
0
        }
252
253
0
        if (scriptLength > 0) {
254
0
            appendTag(
255
0
                script,
256
0
                scriptLength,
257
0
                tagBuffer,
258
0
                &tagLength,
259
                /*withSeparator=*/TRUE);
260
0
        }
261
0
        else if (alternateTags != NULL) {
262
            /*
263
             * Parse the alternateTags string for the script.
264
             */
265
0
            char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267
0
            const int32_t alternateScriptLength =
268
0
                uloc_getScript(
269
0
                    alternateTags,
270
0
                    alternateScript,
271
0
                    sizeof(alternateScript),
272
0
                    err);
273
274
0
            if (U_FAILURE(*err) ||
275
0
                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276
0
                goto error;
277
0
            }
278
0
            else if (alternateScriptLength > 0) {
279
0
                appendTag(
280
0
                    alternateScript,
281
0
                    alternateScriptLength,
282
0
                    tagBuffer,
283
0
                    &tagLength,
284
                    /*withSeparator=*/TRUE);
285
0
            }
286
0
        }
287
288
0
        if (regionLength > 0) {
289
0
            appendTag(
290
0
                region,
291
0
                regionLength,
292
0
                tagBuffer,
293
0
                &tagLength,
294
                /*withSeparator=*/TRUE);
295
296
0
            regionAppended = TRUE;
297
0
        }
298
0
        else if (alternateTags != NULL) {
299
            /*
300
             * Parse the alternateTags string for the region.
301
             */
302
0
            char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304
0
            const int32_t alternateRegionLength =
305
0
                uloc_getCountry(
306
0
                    alternateTags,
307
0
                    alternateRegion,
308
0
                    sizeof(alternateRegion),
309
0
                    err);
310
0
            if (U_FAILURE(*err) ||
311
0
                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312
0
                goto error;
313
0
            }
314
0
            else if (alternateRegionLength > 0) {
315
0
                appendTag(
316
0
                    alternateRegion,
317
0
                    alternateRegionLength,
318
0
                    tagBuffer,
319
0
                    &tagLength,
320
                    /*withSeparator=*/TRUE);
321
322
0
                regionAppended = TRUE;
323
0
            }
324
0
        }
325
326
        /**
327
         * Copy the partial tag from our internal buffer to the supplied
328
         * target.
329
         **/
330
0
        sink.Append(tagBuffer, tagLength);
331
332
0
        if (trailingLength > 0) {
333
0
            if (*trailing != '@') {
334
0
                sink.Append("_", 1);
335
0
                if (!regionAppended) {
336
                    /* extra separator is required */
337
0
                    sink.Append("_", 1);
338
0
                }
339
0
            }
340
341
            /*
342
             * Copy the trailing data into the supplied buffer.
343
             */
344
0
            sink.Append(trailing, trailingLength);
345
0
        }
346
347
0
        return;
348
0
    }
349
350
0
error:
351
352
    /**
353
     * An overflow indicates the locale ID passed in
354
     * is ill-formed.  If we got here, and there was
355
     * no previous error, it's an implicit overflow.
356
     **/
357
0
    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
358
0
        U_SUCCESS(*err)) {
359
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
360
0
    }
361
0
}
362
363
/**
364
 * Create a tag string from the supplied parameters.  The lang, script and region
365
 * parameters may be NULL pointers. If they are, their corresponding length parameters
366
 * must be less than or equal to 0.  If the lang parameter is an empty string, the
367
 * default value for an unknown language is written to the output buffer.
368
 *
369
 * If the length of the new string exceeds the capacity of the output buffer, 
370
 * the function copies as many bytes to the output buffer as it can, and returns
371
 * the error U_BUFFER_OVERFLOW_ERROR.
372
 *
373
 * If an illegal argument is provided, the function returns the error
374
 * U_ILLEGAL_ARGUMENT_ERROR.
375
 *
376
 * @param lang The language tag to use.
377
 * @param langLength The length of the language tag.
378
 * @param script The script tag to use.
379
 * @param scriptLength The length of the script tag.
380
 * @param region The region tag to use.
381
 * @param regionLength The length of the region tag.
382
 * @param trailing Any trailing data to append to the new tag.
383
 * @param trailingLength The length of the trailing data.
384
 * @param sink The output sink receiving the tag string.
385
 * @param err A pointer to a UErrorCode for error reporting.
386
 **/
387
static void U_CALLCONV
388
createTagString(
389
    const char* lang,
390
    int32_t langLength,
391
    const char* script,
392
    int32_t scriptLength,
393
    const char* region,
394
    int32_t regionLength,
395
    const char* trailing,
396
    int32_t trailingLength,
397
    icu::ByteSink& sink,
398
    UErrorCode* err)
399
0
{
400
0
    createTagStringWithAlternates(
401
0
                lang,
402
0
                langLength,
403
0
                script,
404
0
                scriptLength,
405
0
                region,
406
0
                regionLength,
407
0
                trailing,
408
0
                trailingLength,
409
0
                NULL,
410
0
                sink,
411
0
                err);
412
0
}
413
414
/**
415
 * Parse the language, script, and region subtags from a tag string, and copy the
416
 * results into the corresponding output parameters. The buffers are null-terminated,
417
 * unless overflow occurs.
418
 *
419
 * The langLength, scriptLength, and regionLength parameters are input/output
420
 * parameters, and must contain the capacity of their corresponding buffers on
421
 * input.  On output, they will contain the actual length of the buffers, not
422
 * including the null terminator.
423
 *
424
 * If the length of any of the output subtags exceeds the capacity of the corresponding
425
 * buffer, the function copies as many bytes to the output buffer as it can, and returns
426
 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
427
 * occurs.
428
 *
429
 * If an illegal argument is provided, the function returns the error
430
 * U_ILLEGAL_ARGUMENT_ERROR.
431
 *
432
 * @param localeID The locale ID to parse.
433
 * @param lang The language tag buffer.
434
 * @param langLength The length of the language tag.
435
 * @param script The script tag buffer.
436
 * @param scriptLength The length of the script tag.
437
 * @param region The region tag buffer.
438
 * @param regionLength The length of the region tag.
439
 * @param err A pointer to a UErrorCode for error reporting.
440
 * @return The number of chars of the localeID parameter consumed.
441
 **/
442
static int32_t U_CALLCONV
443
parseTagString(
444
    const char* localeID,
445
    char* lang,
446
    int32_t* langLength,
447
    char* script,
448
    int32_t* scriptLength,
449
    char* region,
450
    int32_t* regionLength,
451
    UErrorCode* err)
452
0
{
453
0
    const char* position = localeID;
454
0
    int32_t subtagLength = 0;
455
456
0
    if(U_FAILURE(*err) ||
457
0
       localeID == NULL ||
458
0
       lang == NULL ||
459
0
       langLength == NULL ||
460
0
       script == NULL ||
461
0
       scriptLength == NULL ||
462
0
       region == NULL ||
463
0
       regionLength == NULL) {
464
0
        goto error;
465
0
    }
466
467
0
    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
468
469
    /*
470
     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
471
     * to be an error, because it indicates the user-supplied tag is
472
     * not well-formed.
473
     */
474
0
    if(U_FAILURE(*err)) {
475
0
        goto error;
476
0
    }
477
478
0
    *langLength = subtagLength;
479
480
    /*
481
     * If no language was present, use the empty string instead.
482
     * Otherwise, move past any separator.
483
     */
484
0
    if (_isIDSeparator(*position)) {
485
0
        ++position;
486
0
    }
487
488
0
    subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
489
490
0
    if(U_FAILURE(*err)) {
491
0
        goto error;
492
0
    }
493
494
0
    *scriptLength = subtagLength;
495
496
0
    if (*scriptLength > 0) {
497
0
        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
498
            /**
499
             * If the script part is the "unknown" script, then don't return it.
500
             **/
501
0
            *scriptLength = 0;
502
0
        }
503
504
        /*
505
         * Move past any separator.
506
         */
507
0
        if (_isIDSeparator(*position)) {
508
0
            ++position;
509
0
        }    
510
0
    }
511
512
0
    subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
513
514
0
    if(U_FAILURE(*err)) {
515
0
        goto error;
516
0
    }
517
518
0
    *regionLength = subtagLength;
519
520
0
    if (*regionLength > 0) {
521
0
        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
522
            /**
523
             * If the region part is the "unknown" region, then don't return it.
524
             **/
525
0
            *regionLength = 0;
526
0
        }
527
0
    } else if (*position != 0 && *position != '@') {
528
        /* back up over consumed trailing separator */
529
0
        --position;
530
0
    }
531
532
0
exit:
533
534
0
    return (int32_t)(position - localeID);
535
536
0
error:
537
538
    /**
539
     * If we get here, we have no explicit error, it's the result of an
540
     * illegal argument.
541
     **/
542
0
    if (!U_FAILURE(*err)) {
543
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
544
0
    }
545
546
0
    goto exit;
547
0
}
548
549
static UBool U_CALLCONV
550
createLikelySubtagsString(
551
    const char* lang,
552
    int32_t langLength,
553
    const char* script,
554
    int32_t scriptLength,
555
    const char* region,
556
    int32_t regionLength,
557
    const char* variants,
558
    int32_t variantsLength,
559
    icu::ByteSink& sink,
560
0
    UErrorCode* err) {
561
    /**
562
     * ULOC_FULLNAME_CAPACITY will provide enough capacity
563
     * that we can build a string that contains the language,
564
     * script and region code without worrying about overrunning
565
     * the user-supplied buffer.
566
     **/
567
0
    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
568
569
0
    if(U_FAILURE(*err)) {
570
0
        goto error;
571
0
    }
572
573
    /**
574
     * Try the language with the script and region first.
575
     **/
576
0
    if (scriptLength > 0 && regionLength > 0) {
577
578
0
        const char* likelySubtags = NULL;
579
580
0
        icu::CharString tagBuffer;
581
0
        {
582
0
            icu::CharStringByteSink sink(&tagBuffer);
583
0
            createTagString(
584
0
                lang,
585
0
                langLength,
586
0
                script,
587
0
                scriptLength,
588
0
                region,
589
0
                regionLength,
590
0
                NULL,
591
0
                0,
592
0
                sink,
593
0
                err);
594
0
        }
595
0
        if(U_FAILURE(*err)) {
596
0
            goto error;
597
0
        }
598
599
0
        likelySubtags =
600
0
            findLikelySubtags(
601
0
                tagBuffer.data(),
602
0
                likelySubtagsBuffer,
603
0
                sizeof(likelySubtagsBuffer),
604
0
                err);
605
0
        if(U_FAILURE(*err)) {
606
0
            goto error;
607
0
        }
608
609
0
        if (likelySubtags != NULL) {
610
            /* Always use the language tag from the
611
               maximal string, since it may be more
612
               specific than the one provided. */
613
0
            createTagStringWithAlternates(
614
0
                        NULL,
615
0
                        0,
616
0
                        NULL,
617
0
                        0,
618
0
                        NULL,
619
0
                        0,
620
0
                        variants,
621
0
                        variantsLength,
622
0
                        likelySubtags,
623
0
                        sink,
624
0
                        err);
625
0
            return TRUE;
626
0
        }
627
0
    }
628
629
    /**
630
     * Try the language with just the script.
631
     **/
632
0
    if (scriptLength > 0) {
633
634
0
        const char* likelySubtags = NULL;
635
636
0
        icu::CharString tagBuffer;
637
0
        {
638
0
            icu::CharStringByteSink sink(&tagBuffer);
639
0
            createTagString(
640
0
                lang,
641
0
                langLength,
642
0
                script,
643
0
                scriptLength,
644
0
                NULL,
645
0
                0,
646
0
                NULL,
647
0
                0,
648
0
                sink,
649
0
                err);
650
0
        }
651
0
        if(U_FAILURE(*err)) {
652
0
            goto error;
653
0
        }
654
655
0
        likelySubtags =
656
0
            findLikelySubtags(
657
0
                tagBuffer.data(),
658
0
                likelySubtagsBuffer,
659
0
                sizeof(likelySubtagsBuffer),
660
0
                err);
661
0
        if(U_FAILURE(*err)) {
662
0
            goto error;
663
0
        }
664
665
0
        if (likelySubtags != NULL) {
666
            /* Always use the language tag from the
667
               maximal string, since it may be more
668
               specific than the one provided. */
669
0
            createTagStringWithAlternates(
670
0
                        NULL,
671
0
                        0,
672
0
                        NULL,
673
0
                        0,
674
0
                        region,
675
0
                        regionLength,
676
0
                        variants,
677
0
                        variantsLength,
678
0
                        likelySubtags,
679
0
                        sink,
680
0
                        err);
681
0
            return TRUE;
682
0
        }
683
0
    }
684
685
    /**
686
     * Try the language with just the region.
687
     **/
688
0
    if (regionLength > 0) {
689
690
0
        const char* likelySubtags = NULL;
691
692
0
        icu::CharString tagBuffer;
693
0
        {
694
0
            icu::CharStringByteSink sink(&tagBuffer);
695
0
            createTagString(
696
0
                lang,
697
0
                langLength,
698
0
                NULL,
699
0
                0,
700
0
                region,
701
0
                regionLength,
702
0
                NULL,
703
0
                0,
704
0
                sink,
705
0
                err);
706
0
        }
707
0
        if(U_FAILURE(*err)) {
708
0
            goto error;
709
0
        }
710
711
0
        likelySubtags =
712
0
            findLikelySubtags(
713
0
                tagBuffer.data(),
714
0
                likelySubtagsBuffer,
715
0
                sizeof(likelySubtagsBuffer),
716
0
                err);
717
0
        if(U_FAILURE(*err)) {
718
0
            goto error;
719
0
        }
720
721
0
        if (likelySubtags != NULL) {
722
            /* Always use the language tag from the
723
               maximal string, since it may be more
724
               specific than the one provided. */
725
0
            createTagStringWithAlternates(
726
0
                        NULL,
727
0
                        0,
728
0
                        script,
729
0
                        scriptLength,
730
0
                        NULL,
731
0
                        0,
732
0
                        variants,
733
0
                        variantsLength,
734
0
                        likelySubtags,
735
0
                        sink,
736
0
                        err);
737
0
            return TRUE;
738
0
        }
739
0
    }
740
741
    /**
742
     * Finally, try just the language.
743
     **/
744
0
    {
745
0
        const char* likelySubtags = NULL;
746
747
0
        icu::CharString tagBuffer;
748
0
        {
749
0
            icu::CharStringByteSink sink(&tagBuffer);
750
0
            createTagString(
751
0
                lang,
752
0
                langLength,
753
0
                NULL,
754
0
                0,
755
0
                NULL,
756
0
                0,
757
0
                NULL,
758
0
                0,
759
0
                sink,
760
0
                err);
761
0
        }
762
0
        if(U_FAILURE(*err)) {
763
0
            goto error;
764
0
        }
765
766
0
        likelySubtags =
767
0
            findLikelySubtags(
768
0
                tagBuffer.data(),
769
0
                likelySubtagsBuffer,
770
0
                sizeof(likelySubtagsBuffer),
771
0
                err);
772
0
        if(U_FAILURE(*err)) {
773
0
            goto error;
774
0
        }
775
776
0
        if (likelySubtags != NULL) {
777
            /* Always use the language tag from the
778
               maximal string, since it may be more
779
               specific than the one provided. */
780
0
            createTagStringWithAlternates(
781
0
                        NULL,
782
0
                        0,
783
0
                        script,
784
0
                        scriptLength,
785
0
                        region,
786
0
                        regionLength,
787
0
                        variants,
788
0
                        variantsLength,
789
0
                        likelySubtags,
790
0
                        sink,
791
0
                        err);
792
0
            return TRUE;
793
0
        }
794
0
    }
795
796
0
    return FALSE;
797
798
0
error:
799
800
0
    if (!U_FAILURE(*err)) {
801
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
802
0
    }
803
804
0
    return FALSE;
805
0
}
806
807
0
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
808
0
    int32_t count = 0; \
809
0
    int32_t i; \
810
0
    for (i = 0; i < trailingLength; i++) { \
811
0
        if (trailing[i] == '-' || trailing[i] == '_') { \
812
0
            count = 0; \
813
0
            if (count > 8) { \
814
0
                goto error; \
815
0
            } \
816
0
        } else if (trailing[i] == '@') { \
817
0
            break; \
818
0
        } else if (count > 8) { \
819
0
            goto error; \
820
0
        } else { \
821
0
            count++; \
822
0
        } \
823
0
    } \
824
0
} UPRV_BLOCK_MACRO_END
825
826
static UBool
827
_uloc_addLikelySubtags(const char* localeID,
828
                       icu::ByteSink& sink,
829
0
                       UErrorCode* err) {
830
0
    char lang[ULOC_LANG_CAPACITY];
831
0
    int32_t langLength = sizeof(lang);
832
0
    char script[ULOC_SCRIPT_CAPACITY];
833
0
    int32_t scriptLength = sizeof(script);
834
0
    char region[ULOC_COUNTRY_CAPACITY];
835
0
    int32_t regionLength = sizeof(region);
836
0
    const char* trailing = "";
837
0
    int32_t trailingLength = 0;
838
0
    int32_t trailingIndex = 0;
839
0
    UBool success = FALSE;
840
841
0
    if(U_FAILURE(*err)) {
842
0
        goto error;
843
0
    }
844
0
    if (localeID == NULL) {
845
0
        goto error;
846
0
    }
847
848
0
    trailingIndex = parseTagString(
849
0
        localeID,
850
0
        lang,
851
0
        &langLength,
852
0
        script,
853
0
        &scriptLength,
854
0
        region,
855
0
        &regionLength,
856
0
        err);
857
0
    if(U_FAILURE(*err)) {
858
        /* Overflow indicates an illegal argument error */
859
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
860
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
861
0
        }
862
863
0
        goto error;
864
0
    }
865
866
    /* Find the length of the trailing portion. */
867
0
    while (_isIDSeparator(localeID[trailingIndex])) {
868
0
        trailingIndex++;
869
0
    }
870
0
    trailing = &localeID[trailingIndex];
871
0
    trailingLength = (int32_t)uprv_strlen(trailing);
872
873
0
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
874
875
0
    success =
876
0
        createLikelySubtagsString(
877
0
            lang,
878
0
            langLength,
879
0
            script,
880
0
            scriptLength,
881
0
            region,
882
0
            regionLength,
883
0
            trailing,
884
0
            trailingLength,
885
0
            sink,
886
0
            err);
887
888
0
    if (!success) {
889
0
        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
890
891
        /*
892
         * If we get here, we need to return localeID.
893
         */
894
0
        sink.Append(localeID, localIDLength);
895
0
    }
896
897
0
    return success;
898
899
0
error:
900
901
0
    if (!U_FAILURE(*err)) {
902
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
903
0
    }
904
0
    return FALSE;
905
0
}
906
907
// Add likely subtags to the sink
908
// return true if the value in the sink is produced by a match during the lookup
909
// return false if the value in the sink is the same as input because there are
910
// no match after the lookup.
911
static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
912
913
static void
914
_uloc_minimizeSubtags(const char* localeID,
915
                      icu::ByteSink& sink,
916
0
                      UErrorCode* err) {
917
0
    icu::CharString maximizedTagBuffer;
918
919
0
    char lang[ULOC_LANG_CAPACITY];
920
0
    int32_t langLength = sizeof(lang);
921
0
    char script[ULOC_SCRIPT_CAPACITY];
922
0
    int32_t scriptLength = sizeof(script);
923
0
    char region[ULOC_COUNTRY_CAPACITY];
924
0
    int32_t regionLength = sizeof(region);
925
0
    const char* trailing = "";
926
0
    int32_t trailingLength = 0;
927
0
    int32_t trailingIndex = 0;
928
0
    UBool successGetMax = FALSE;
929
930
0
    if(U_FAILURE(*err)) {
931
0
        goto error;
932
0
    }
933
0
    else if (localeID == NULL) {
934
0
        goto error;
935
0
    }
936
937
0
    trailingIndex =
938
0
        parseTagString(
939
0
            localeID,
940
0
            lang,
941
0
            &langLength,
942
0
            script,
943
0
            &scriptLength,
944
0
            region,
945
0
            &regionLength,
946
0
            err);
947
0
    if(U_FAILURE(*err)) {
948
949
        /* Overflow indicates an illegal argument error */
950
0
        if (*err == U_BUFFER_OVERFLOW_ERROR) {
951
0
            *err = U_ILLEGAL_ARGUMENT_ERROR;
952
0
        }
953
954
0
        goto error;
955
0
    }
956
957
    /* Find the spot where the variants or the keywords begin, if any. */
958
0
    while (_isIDSeparator(localeID[trailingIndex])) {
959
0
        trailingIndex++;
960
0
    }
961
0
    trailing = &localeID[trailingIndex];
962
0
    trailingLength = (int32_t)uprv_strlen(trailing);
963
964
0
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
965
966
0
    {
967
0
        icu::CharString base;
968
0
        {
969
0
            icu::CharStringByteSink baseSink(&base);
970
0
            createTagString(
971
0
                lang,
972
0
                langLength,
973
0
                script,
974
0
                scriptLength,
975
0
                region,
976
0
                regionLength,
977
0
                NULL,
978
0
                0,
979
0
                baseSink,
980
0
                err);
981
0
        }
982
983
        /**
984
         * First, we need to first get the maximization
985
         * from AddLikelySubtags.
986
         **/
987
0
        {
988
0
            icu::CharStringByteSink maxSink(&maximizedTagBuffer);
989
0
            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
990
0
        }
991
0
    }
992
993
0
    if(U_FAILURE(*err)) {
994
0
        goto error;
995
0
    }
996
997
0
    if (!successGetMax) {
998
        /**
999
         * If we got here, return the locale ID parameter unchanged.
1000
         **/
1001
0
        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1002
0
        sink.Append(localeID, localeIDLength);
1003
0
        return;
1004
0
    }
1005
1006
    // In the following, the lang, script, region are referring to those in
1007
    // the maximizedTagBuffer, not the one in the localeID.
1008
0
    langLength = sizeof(lang);
1009
0
    scriptLength = sizeof(script);
1010
0
    regionLength = sizeof(region);
1011
0
    parseTagString(
1012
0
        maximizedTagBuffer.data(),
1013
0
        lang,
1014
0
        &langLength,
1015
0
        script,
1016
0
        &scriptLength,
1017
0
        region,
1018
0
        &regionLength,
1019
0
        err);
1020
0
    if(U_FAILURE(*err)) {
1021
0
        goto error;
1022
0
    }
1023
1024
    /**
1025
     * Start first with just the language.
1026
     **/
1027
0
    {
1028
0
        icu::CharString tagBuffer;
1029
0
        {
1030
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1031
0
            createLikelySubtagsString(
1032
0
                lang,
1033
0
                langLength,
1034
0
                NULL,
1035
0
                0,
1036
0
                NULL,
1037
0
                0,
1038
0
                NULL,
1039
0
                0,
1040
0
                tagSink,
1041
0
                err);
1042
0
        }
1043
1044
0
        if(U_FAILURE(*err)) {
1045
0
            goto error;
1046
0
        }
1047
0
        else if (!tagBuffer.isEmpty() &&
1048
0
                 uprv_strnicmp(
1049
0
                    maximizedTagBuffer.data(),
1050
0
                    tagBuffer.data(),
1051
0
                    tagBuffer.length()) == 0) {
1052
1053
0
            createTagString(
1054
0
                        lang,
1055
0
                        langLength,
1056
0
                        NULL,
1057
0
                        0,
1058
0
                        NULL,
1059
0
                        0,
1060
0
                        trailing,
1061
0
                        trailingLength,
1062
0
                        sink,
1063
0
                        err);
1064
0
            return;
1065
0
        }
1066
0
    }
1067
1068
    /**
1069
     * Next, try the language and region.
1070
     **/
1071
0
    if (regionLength > 0) {
1072
1073
0
        icu::CharString tagBuffer;
1074
0
        {
1075
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1076
0
            createLikelySubtagsString(
1077
0
                lang,
1078
0
                langLength,
1079
0
                NULL,
1080
0
                0,
1081
0
                region,
1082
0
                regionLength,
1083
0
                NULL,
1084
0
                0,
1085
0
                tagSink,
1086
0
                err);
1087
0
        }
1088
1089
0
        if(U_FAILURE(*err)) {
1090
0
            goto error;
1091
0
        }
1092
0
        else if (!tagBuffer.isEmpty() &&
1093
0
                 uprv_strnicmp(
1094
0
                    maximizedTagBuffer.data(),
1095
0
                    tagBuffer.data(),
1096
0
                    tagBuffer.length()) == 0) {
1097
1098
0
            createTagString(
1099
0
                        lang,
1100
0
                        langLength,
1101
0
                        NULL,
1102
0
                        0,
1103
0
                        region,
1104
0
                        regionLength,
1105
0
                        trailing,
1106
0
                        trailingLength,
1107
0
                        sink,
1108
0
                        err);
1109
0
            return;
1110
0
        }
1111
0
    }
1112
1113
    /**
1114
     * Finally, try the language and script.  This is our last chance,
1115
     * since trying with all three subtags would only yield the
1116
     * maximal version that we already have.
1117
     **/
1118
0
    if (scriptLength > 0) {
1119
0
        icu::CharString tagBuffer;
1120
0
        {
1121
0
            icu::CharStringByteSink tagSink(&tagBuffer);
1122
0
            createLikelySubtagsString(
1123
0
                lang,
1124
0
                langLength,
1125
0
                script,
1126
0
                scriptLength,
1127
0
                NULL,
1128
0
                0,
1129
0
                NULL,
1130
0
                0,
1131
0
                tagSink,
1132
0
                err);
1133
0
        }
1134
1135
0
        if(U_FAILURE(*err)) {
1136
0
            goto error;
1137
0
        }
1138
0
        else if (!tagBuffer.isEmpty() &&
1139
0
                 uprv_strnicmp(
1140
0
                    maximizedTagBuffer.data(),
1141
0
                    tagBuffer.data(),
1142
0
                    tagBuffer.length()) == 0) {
1143
1144
0
            createTagString(
1145
0
                        lang,
1146
0
                        langLength,
1147
0
                        script,
1148
0
                        scriptLength,
1149
0
                        NULL,
1150
0
                        0,
1151
0
                        trailing,
1152
0
                        trailingLength,
1153
0
                        sink,
1154
0
                        err);
1155
0
            return;
1156
0
        }
1157
0
    }
1158
1159
0
    {
1160
        /**
1161
         * If we got here, return the max + trail.
1162
         **/
1163
0
        createTagString(
1164
0
                    lang,
1165
0
                    langLength,
1166
0
                    script,
1167
0
                    scriptLength,
1168
0
                    region,
1169
0
                    regionLength,
1170
0
                    trailing,
1171
0
                    trailingLength,
1172
0
                    sink,
1173
0
                    err);
1174
0
        return;
1175
0
    }
1176
1177
0
error:
1178
1179
0
    if (!U_FAILURE(*err)) {
1180
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1181
0
    }
1182
0
}
1183
1184
static UBool
1185
do_canonicalize(const char*    localeID,
1186
         char* buffer,
1187
         int32_t bufferCapacity,
1188
         UErrorCode* err)
1189
0
{
1190
0
    uloc_canonicalize(
1191
0
        localeID,
1192
0
        buffer,
1193
0
        bufferCapacity,
1194
0
        err);
1195
1196
0
    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1197
0
        *err == U_BUFFER_OVERFLOW_ERROR) {
1198
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1199
1200
0
        return FALSE;
1201
0
    }
1202
0
    else if (U_FAILURE(*err)) {
1203
1204
0
        return FALSE;
1205
0
    }
1206
0
    else {
1207
0
        return TRUE;
1208
0
    }
1209
0
}
1210
1211
U_CAPI int32_t U_EXPORT2
1212
uloc_addLikelySubtags(const char* localeID,
1213
                      char* maximizedLocaleID,
1214
                      int32_t maximizedLocaleIDCapacity,
1215
0
                      UErrorCode* status) {
1216
0
    if (U_FAILURE(*status)) {
1217
0
        return 0;
1218
0
    }
1219
1220
0
    icu::CheckedArrayByteSink sink(
1221
0
            maximizedLocaleID, maximizedLocaleIDCapacity);
1222
1223
0
    ulocimp_addLikelySubtags(localeID, sink, status);
1224
0
    int32_t reslen = sink.NumberOfBytesAppended();
1225
1226
0
    if (U_FAILURE(*status)) {
1227
0
        return sink.Overflowed() ? reslen : -1;
1228
0
    }
1229
1230
0
    if (sink.Overflowed()) {
1231
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1232
0
    } else {
1233
0
        u_terminateChars(
1234
0
                maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1235
0
    }
1236
1237
0
    return reslen;
1238
0
}
1239
1240
static UBool
1241
_ulocimp_addLikelySubtags(const char* localeID,
1242
                          icu::ByteSink& sink,
1243
0
                          UErrorCode* status) {
1244
0
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1245
1246
0
    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1247
0
        return _uloc_addLikelySubtags(localeBuffer, sink, status);
1248
0
    }
1249
0
    return FALSE;
1250
0
}
1251
1252
U_CAPI void U_EXPORT2
1253
ulocimp_addLikelySubtags(const char* localeID,
1254
                         icu::ByteSink& sink,
1255
0
                         UErrorCode* status) {
1256
0
    _ulocimp_addLikelySubtags(localeID, sink, status);
1257
0
}
1258
1259
U_CAPI int32_t U_EXPORT2
1260
uloc_minimizeSubtags(const char* localeID,
1261
                     char* minimizedLocaleID,
1262
                     int32_t minimizedLocaleIDCapacity,
1263
0
                     UErrorCode* status) {
1264
0
    if (U_FAILURE(*status)) {
1265
0
        return 0;
1266
0
    }
1267
1268
0
    icu::CheckedArrayByteSink sink(
1269
0
            minimizedLocaleID, minimizedLocaleIDCapacity);
1270
1271
0
    ulocimp_minimizeSubtags(localeID, sink, status);
1272
0
    int32_t reslen = sink.NumberOfBytesAppended();
1273
1274
0
    if (U_FAILURE(*status)) {
1275
0
        return sink.Overflowed() ? reslen : -1;
1276
0
    }
1277
1278
0
    if (sink.Overflowed()) {
1279
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1280
0
    } else {
1281
0
        u_terminateChars(
1282
0
                minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1283
0
    }
1284
1285
0
    return reslen;
1286
0
}
1287
1288
U_CAPI void U_EXPORT2
1289
ulocimp_minimizeSubtags(const char* localeID,
1290
                        icu::ByteSink& sink,
1291
0
                        UErrorCode* status) {
1292
0
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1293
1294
0
    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1295
0
        _uloc_minimizeSubtags(localeBuffer, sink, status);
1296
0
    }
1297
0
}
1298
1299
// Pairs of (language subtag, + or -) for finding out fast if common languages
1300
// are LTR (minus) or RTL (plus).
1301
static const char LANG_DIR_STRING[] =
1302
        "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1303
1304
// Implemented here because this calls ulocimp_addLikelySubtags().
1305
U_CAPI UBool U_EXPORT2
1306
0
uloc_isRightToLeft(const char *locale) {
1307
0
    UErrorCode errorCode = U_ZERO_ERROR;
1308
0
    char script[8];
1309
0
    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1310
0
    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1311
0
            scriptLength == 0) {
1312
        // Fastpath: We know the likely scripts and their writing direction
1313
        // for some common languages.
1314
0
        errorCode = U_ZERO_ERROR;
1315
0
        char lang[8];
1316
0
        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1317
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1318
0
            return FALSE;
1319
0
        }
1320
0
        if (langLength > 0) {
1321
0
            const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1322
0
            if (langPtr != NULL) {
1323
0
                switch (langPtr[langLength]) {
1324
0
                case '-': return FALSE;
1325
0
                case '+': return TRUE;
1326
0
                default: break;  // partial match of a longer code
1327
0
                }
1328
0
            }
1329
0
        }
1330
        // Otherwise, find the likely script.
1331
0
        errorCode = U_ZERO_ERROR;
1332
0
        icu::CharString likely;
1333
0
        {
1334
0
            icu::CharStringByteSink sink(&likely);
1335
0
            ulocimp_addLikelySubtags(locale, sink, &errorCode);
1336
0
        }
1337
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1338
0
            return FALSE;
1339
0
        }
1340
0
        scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1341
0
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1342
0
                scriptLength == 0) {
1343
0
            return FALSE;
1344
0
        }
1345
0
    }
1346
0
    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1347
0
    return uscript_isRightToLeft(scriptCode);
1348
0
}
1349
1350
U_NAMESPACE_BEGIN
1351
1352
UBool
1353
0
Locale::isRightToLeft() const {
1354
0
    return uloc_isRightToLeft(getBaseName());
1355
0
}
1356
1357
U_NAMESPACE_END
1358
1359
// The following must at least allow for rg key value (6) plus terminator (1).
1360
0
#define ULOC_RG_BUFLEN 8
1361
1362
U_CAPI int32_t U_EXPORT2
1363
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1364
0
                                     char *region, int32_t regionCapacity, UErrorCode* status) {
1365
0
    if (U_FAILURE(*status)) {
1366
0
        return 0;
1367
0
    }
1368
0
    char rgBuf[ULOC_RG_BUFLEN];
1369
0
    UErrorCode rgStatus = U_ZERO_ERROR;
1370
1371
    // First check for rg keyword value
1372
0
    int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1373
0
    if (U_FAILURE(rgStatus) || rgLen != 6) {
1374
0
        rgLen = 0;
1375
0
    } else {
1376
        // rgBuf guaranteed to be zero terminated here, with text len 6
1377
0
        char *rgPtr = rgBuf;
1378
0
        for (; *rgPtr!= 0; rgPtr++) {
1379
0
            *rgPtr = uprv_toupper(*rgPtr);
1380
0
        }
1381
0
        rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1382
0
    }
1383
1384
0
    if (rgLen == 0) {
1385
        // No valid rg keyword value, try for unicode_region_subtag
1386
0
        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1387
0
        if (U_FAILURE(*status)) {
1388
0
            rgLen = 0;
1389
0
        } else if (rgLen == 0 && inferRegion) {
1390
            // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1391
0
            rgStatus = U_ZERO_ERROR;
1392
0
            icu::CharString locBuf;
1393
0
            {
1394
0
                icu::CharStringByteSink sink(&locBuf);
1395
0
                ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1396
0
            }
1397
0
            if (U_SUCCESS(rgStatus)) {
1398
0
                rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1399
0
                if (U_FAILURE(*status)) {
1400
0
                    rgLen = 0;
1401
0
                }
1402
0
            }
1403
0
        }
1404
0
    }
1405
1406
0
    rgBuf[rgLen] = 0;
1407
0
    uprv_strncpy(region, rgBuf, regionCapacity);
1408
0
    return u_terminateChars(region, regionCapacity, rgLen, status);
1409
0
}
1410