/src/icu/source/common/locutil.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  |  *******************************************************************************  | 
5  |  |  * Copyright (C) 2002-2014, International Business Machines Corporation and  | 
6  |  |  * others. All Rights Reserved.  | 
7  |  |  *******************************************************************************  | 
8  |  |  */  | 
9  |  | #include "unicode/utypes.h"  | 
10  |  |  | 
11  |  | #if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION  | 
12  |  |  | 
13  |  | #include "unicode/resbund.h"  | 
14  |  | #include "unicode/uenum.h"  | 
15  |  | #include "cmemory.h"  | 
16  |  | #include "ustrfmt.h"  | 
17  |  | #include "locutil.h"  | 
18  |  | #include "charstr.h"  | 
19  |  | #include "ucln_cmn.h"  | 
20  |  | #include "uassert.h"  | 
21  |  | #include "umutex.h"  | 
22  |  |  | 
23  |  | // see LocaleUtility::getAvailableLocaleNames  | 
24  |  | static icu::UInitOnce   LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;  | 
25  |  | static icu::Hashtable * LocaleUtility_cache = NULL;  | 
26  |  |  | 
27  | 0  | #define UNDERSCORE_CHAR ((UChar)0x005f)  | 
28  | 0  | #define AT_SIGN_CHAR    ((UChar)64)  | 
29  | 0  | #define PERIOD_CHAR     ((UChar)46)  | 
30  |  |  | 
31  |  | /*  | 
32  |  |  ******************************************************************  | 
33  |  |  */  | 
34  |  |  | 
35  |  | /**  | 
36  |  |  * Release all static memory held by Locale Utility.    | 
37  |  |  */  | 
38  |  | U_CDECL_BEGIN  | 
39  | 0  | static UBool U_CALLCONV service_cleanup(void) { | 
40  | 0  |     if (LocaleUtility_cache) { | 
41  | 0  |         delete LocaleUtility_cache;  | 
42  | 0  |         LocaleUtility_cache = NULL;  | 
43  | 0  |     }  | 
44  | 0  |     return TRUE;  | 
45  | 0  | }  | 
46  |  |  | 
47  |  |  | 
48  | 0  | static void U_CALLCONV locale_utility_init(UErrorCode &status) { | 
49  | 0  |     using namespace icu;  | 
50  | 0  |     U_ASSERT(LocaleUtility_cache == NULL);  | 
51  | 0  |     ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);  | 
52  | 0  |     LocaleUtility_cache = new Hashtable(status);  | 
53  | 0  |     if (U_FAILURE(status)) { | 
54  | 0  |         delete LocaleUtility_cache;  | 
55  | 0  |         LocaleUtility_cache = NULL;  | 
56  | 0  |         return;  | 
57  | 0  |     }  | 
58  | 0  |     if (LocaleUtility_cache == NULL) { | 
59  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
60  | 0  |         return;  | 
61  | 0  |     }  | 
62  | 0  |     LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);  | 
63  | 0  | }  | 
64  |  |  | 
65  |  | U_CDECL_END  | 
66  |  |  | 
67  |  | U_NAMESPACE_BEGIN  | 
68  |  |  | 
69  |  | UnicodeString&  | 
70  |  | LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)  | 
71  | 0  | { | 
72  | 0  |   if (id == NULL) { | 
73  | 0  |     result.setToBogus();  | 
74  | 0  |   } else { | 
75  |  |     // Fix case only (no other changes) up to the first '@' or '.' or  | 
76  |  |     // end of string, whichever comes first.  In 3.0 I changed this to  | 
77  |  |     // stop at first '@' or '.'.  It used to run out to the end of  | 
78  |  |     // string.  My fix makes the tests pass but is probably  | 
79  |  |     // structurally incorrect.  See below.  [alan 3.0]  | 
80  |  |  | 
81  |  |     // TODO: Doug, you might want to revise this...  | 
82  | 0  |     result = *id;  | 
83  | 0  |     int32_t i = 0;  | 
84  | 0  |     int32_t end = result.indexOf(AT_SIGN_CHAR);  | 
85  | 0  |     int32_t n = result.indexOf(PERIOD_CHAR);  | 
86  | 0  |     if (n >= 0 && n < end) { | 
87  | 0  |         end = n;  | 
88  | 0  |     }  | 
89  | 0  |     if (end < 0) { | 
90  | 0  |         end = result.length();  | 
91  | 0  |     }  | 
92  | 0  |     n = result.indexOf(UNDERSCORE_CHAR);  | 
93  | 0  |     if (n < 0) { | 
94  | 0  |       n = end;  | 
95  | 0  |     }  | 
96  | 0  |     for (; i < n; ++i) { | 
97  | 0  |       UChar c = result.charAt(i);  | 
98  | 0  |       if (c >= 0x0041 && c <= 0x005a) { | 
99  | 0  |         c += 0x20;  | 
100  | 0  |         result.setCharAt(i, c);  | 
101  | 0  |       }  | 
102  | 0  |     }  | 
103  | 0  |     for (n = end; i < n; ++i) { | 
104  | 0  |       UChar c = result.charAt(i);  | 
105  | 0  |       if (c >= 0x0061 && c <= 0x007a) { | 
106  | 0  |         c -= 0x20;  | 
107  | 0  |         result.setCharAt(i, c);  | 
108  | 0  |       }  | 
109  | 0  |     }  | 
110  | 0  |   }  | 
111  | 0  |   return result;  | 
112  |  | 
  | 
113  |  | #if 0  | 
114  |  |     // This code does a proper full level 2 canonicalization of id.  | 
115  |  |     // It's nasty to go from UChar to char to char to UChar -- but  | 
116  |  |     // that's what you have to do to use the uloc_canonicalize  | 
117  |  |     // function on UnicodeStrings.  | 
118  |  |  | 
119  |  |     // I ended up doing the alternate fix (see above) not for  | 
120  |  |     // performance reasons, although performance will certainly be  | 
121  |  |     // better, but because doing a full level 2 canonicalization  | 
122  |  |     // causes some tests to fail.  [alan 3.0]  | 
123  |  |  | 
124  |  |     // TODO: Doug, you might want to revisit this...  | 
125  |  |     result.setToBogus();  | 
126  |  |     if (id != 0) { | 
127  |  |         int32_t buflen = id->length() + 8; // space for NUL  | 
128  |  |         char* buf = (char*) uprv_malloc(buflen);  | 
129  |  |         char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);  | 
130  |  |         if (buf != 0 && canon != 0) { | 
131  |  |             U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);  | 
132  |  |             UErrorCode ec = U_ZERO_ERROR;  | 
133  |  |             uloc_canonicalize(buf, canon, buflen, &ec);  | 
134  |  |             if (U_SUCCESS(ec)) { | 
135  |  |                 result = UnicodeString(canon);  | 
136  |  |             }  | 
137  |  |         }  | 
138  |  |         uprv_free(buf);  | 
139  |  |         uprv_free(canon);  | 
140  |  |     }  | 
141  |  |     return result;  | 
142  |  | #endif  | 
143  | 0  | }  | 
144  |  |  | 
145  |  | Locale&  | 
146  |  | LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)  | 
147  | 0  | { | 
148  | 0  |     enum { BUFLEN = 128 }; // larger than ever needed | 
149  |  | 
  | 
150  | 0  |     if (id.isBogus() || id.length() >= BUFLEN) { | 
151  | 0  |         result.setToBogus();  | 
152  | 0  |     } else { | 
153  |  |         /*  | 
154  |  |          * We need to convert from a UnicodeString to char * in order to  | 
155  |  |          * create a Locale.  | 
156  |  |          *  | 
157  |  |          * Problem: Locale ID strings may contain '@' which is a variant  | 
158  |  |          * character and cannot be handled by invariant-character conversion.  | 
159  |  |          *  | 
160  |  |          * Hack: Since ICU code can handle locale IDs with multiple encodings  | 
161  |  |          * of '@' (at least for EBCDIC; it's not known to be a problem for  | 
162  |  |          * ASCII-based systems),  | 
163  |  |          * we use regular invariant-character conversion for everything else  | 
164  |  |          * and manually convert U+0040 into a compiler-char-constant '@'.  | 
165  |  |          * While this compilation-time constant may not match the runtime  | 
166  |  |          * encoding of '@', it should be one of the encodings which ICU  | 
167  |  |          * recognizes.  | 
168  |  |          *  | 
169  |  |          * There should be only at most one '@' in a locale ID.  | 
170  |  |          */  | 
171  | 0  |         char buffer[BUFLEN];  | 
172  | 0  |         int32_t prev, i;  | 
173  | 0  |         prev = 0;  | 
174  | 0  |         for(;;) { | 
175  | 0  |             i = id.indexOf((UChar)0x40, prev);  | 
176  | 0  |             if(i < 0) { | 
177  |  |                 // no @ between prev and the rest of the string  | 
178  | 0  |                 id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);  | 
179  | 0  |                 break; // done  | 
180  | 0  |             } else { | 
181  |  |                 // normal invariant-character conversion for text between @s  | 
182  | 0  |                 id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);  | 
183  |  |                 // manually "convert" U+0040 at id[i] into '@' at buffer[i]  | 
184  | 0  |                 buffer[i] = '@';  | 
185  | 0  |                 prev = i + 1;  | 
186  | 0  |             }  | 
187  | 0  |         }  | 
188  | 0  |         result = Locale::createFromName(buffer);  | 
189  | 0  |     }  | 
190  | 0  |     return result;  | 
191  | 0  | }  | 
192  |  |  | 
193  |  | UnicodeString&  | 
194  |  | LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)  | 
195  | 0  | { | 
196  | 0  |     if (locale.isBogus()) { | 
197  | 0  |         result.setToBogus();  | 
198  | 0  |     } else { | 
199  | 0  |         result.append(UnicodeString(locale.getName(), -1, US_INV));  | 
200  | 0  |     }  | 
201  | 0  |     return result;  | 
202  | 0  | }  | 
203  |  |  | 
204  |  | const Hashtable*  | 
205  |  | LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)  | 
206  | 0  | { | 
207  |  |     // LocaleUtility_cache is a hash-of-hashes.  The top-level keys  | 
208  |  |     // are path strings ('bundleID') passed to | 
209  |  |     // ures_openAvailableLocales.  The top-level values are  | 
210  |  |     // second-level hashes.  The second-level keys are result strings  | 
211  |  |     // from ures_openAvailableLocales.  The second-level values are  | 
212  |  |     // garbage ((void*)1 or other random pointer).  | 
213  |  | 
  | 
214  | 0  |     UErrorCode status = U_ZERO_ERROR;  | 
215  | 0  |     umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);  | 
216  | 0  |     Hashtable *cache = LocaleUtility_cache;  | 
217  | 0  |     if (cache == NULL) { | 
218  |  |         // Catastrophic failure.  | 
219  | 0  |         return NULL;  | 
220  | 0  |     }  | 
221  |  |  | 
222  | 0  |     Hashtable* htp;  | 
223  | 0  |     umtx_lock(NULL);  | 
224  | 0  |     htp = (Hashtable*) cache->get(bundleID);  | 
225  | 0  |     umtx_unlock(NULL);  | 
226  |  | 
  | 
227  | 0  |     if (htp == NULL) { | 
228  | 0  |         htp = new Hashtable(status);  | 
229  | 0  |         if (htp && U_SUCCESS(status)) { | 
230  | 0  |             CharString cbundleID;  | 
231  | 0  |             cbundleID.appendInvariantChars(bundleID, status);  | 
232  | 0  |             const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();  | 
233  | 0  |             icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));  | 
234  | 0  |             for (;;) { | 
235  | 0  |                 const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status);  | 
236  | 0  |                 if (id == NULL) { | 
237  | 0  |                     break;  | 
238  | 0  |                 }  | 
239  | 0  |                 htp->put(UnicodeString(id), (void*)htp, status);  | 
240  | 0  |             }  | 
241  | 0  |             if (U_FAILURE(status)) { | 
242  | 0  |                 delete htp;  | 
243  | 0  |                 return NULL;  | 
244  | 0  |             }  | 
245  | 0  |             umtx_lock(NULL);  | 
246  | 0  |             Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));  | 
247  | 0  |             if (t != NULL) { | 
248  |  |                 // Another thread raced through this code, creating the cache entry first.  | 
249  |  |                 // Discard ours and return theirs.  | 
250  | 0  |                 umtx_unlock(NULL);  | 
251  | 0  |                 delete htp;  | 
252  | 0  |                 htp = t;  | 
253  | 0  |             } else { | 
254  | 0  |                 cache->put(bundleID, (void*)htp, status);  | 
255  | 0  |                 umtx_unlock(NULL);  | 
256  | 0  |             }  | 
257  | 0  |         }  | 
258  | 0  |     }  | 
259  | 0  |     return htp;  | 
260  | 0  | }  | 
261  |  |  | 
262  |  | UBool  | 
263  |  | LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)  | 
264  | 0  | { | 
265  | 0  |     return child.indexOf(root) == 0 &&  | 
266  | 0  |       (child.length() == root.length() ||  | 
267  | 0  |        child.charAt(root.length()) == UNDERSCORE_CHAR);  | 
268  | 0  | }  | 
269  |  |  | 
270  |  | U_NAMESPACE_END  | 
271  |  |  | 
272  |  | /* !UCONFIG_NO_SERVICE */  | 
273  |  | #endif  | 
274  |  |  | 
275  |  |  |