1"""
2 babel.localedata
3 ~~~~~~~~~~~~~~~~
4
5 Low-level locale data access.
6
7 :note: The `Locale` class, which uses this module under the hood, provides a
8 more convenient interface for accessing the locale data.
9
10 :copyright: (c) 2013-2025 by the Babel Team.
11 :license: BSD, see LICENSE for more details.
12"""
13
14from __future__ import annotations
15
16import os
17import pickle
18import re
19import sys
20import threading
21from collections import abc
22from collections.abc import Iterator, Mapping, MutableMapping
23from functools import lru_cache
24from itertools import chain
25from typing import Any
26
27_cache: dict[str, Any] = {}
28_cache_lock = threading.RLock()
29_dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
30_windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
31
32
33def normalize_locale(name: str) -> str | None:
34 """Normalize a locale ID by stripping spaces and apply proper casing.
35
36 Returns the normalized locale ID string or `None` if the ID is not
37 recognized.
38 """
39 if not name or not isinstance(name, str):
40 return None
41 name = name.strip().lower()
42 for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
43 if name == locale_id.lower():
44 return locale_id
45
46
47def resolve_locale_filename(name: os.PathLike[str] | str) -> str:
48 """
49 Resolve a locale identifier to a `.dat` path on disk.
50 """
51
52 # Clean up any possible relative paths.
53 name = os.path.basename(name)
54
55 # Ensure we're not left with one of the Windows reserved names.
56 if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
57 raise ValueError(f"Name {name} is invalid on Windows")
58
59 # Build the path.
60 return os.path.join(_dirname, f"{name}.dat")
61
62
63def exists(name: str) -> bool:
64 """Check whether locale data is available for the given locale.
65
66 Returns `True` if it exists, `False` otherwise.
67
68 :param name: the locale identifier string
69 """
70 if not name or not isinstance(name, str):
71 return False
72 if name in _cache:
73 return True
74 file_found = os.path.exists(resolve_locale_filename(name))
75 return True if file_found else bool(normalize_locale(name))
76
77
78@lru_cache(maxsize=None)
79def locale_identifiers() -> list[str]:
80 """Return a list of all locale identifiers for which locale data is
81 available.
82
83 This data is cached after the first invocation.
84 You can clear the cache by calling `locale_identifiers.cache_clear()`.
85
86 .. versionadded:: 0.8.1
87
88 :return: a list of locale identifiers (strings)
89 """
90 return [
91 stem
92 for stem, extension in
93 (os.path.splitext(filename) for filename in os.listdir(_dirname))
94 if extension == '.dat' and stem != 'root'
95 ]
96
97
98def _is_non_likely_script(name: str) -> bool:
99 """Return whether the locale is of the form ``lang_Script``,
100 and the script is not the likely script for the language.
101
102 This implements the behavior of the ``nonlikelyScript`` value of the
103 ``localRules`` attribute for parent locales added in CLDR 45.
104 """
105 from babel.core import get_global, parse_locale
106
107 try:
108 lang, territory, script, variant, *rest = parse_locale(name)
109 except ValueError:
110 return False
111
112 if lang and script and not territory and not variant and not rest:
113 likely_subtag = get_global('likely_subtags').get(lang)
114 _, _, likely_script, *_ = parse_locale(likely_subtag)
115 return script != likely_script
116 return False
117
118
119def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
120 """Load the locale data for the given locale.
121
122 The locale data is a dictionary that contains much of the data defined by
123 the Common Locale Data Repository (CLDR). This data is stored as a
124 collection of pickle files inside the ``babel`` package.
125
126 >>> d = load('en_US')
127 >>> d['languages']['sv']
128 u'Swedish'
129
130 Note that the results are cached, and subsequent requests for the same
131 locale return the same dictionary:
132
133 >>> d1 = load('en_US')
134 >>> d2 = load('en_US')
135 >>> d1 is d2
136 True
137
138 :param name: the locale identifier string (or "root")
139 :param merge_inherited: whether the inherited data should be merged into
140 the data of the requested locale
141 :raise `IOError`: if no locale data file is found for the given locale
142 identifier, or one of the locales it inherits from
143 """
144 name = os.path.basename(name)
145 _cache_lock.acquire()
146 try:
147 data = _cache.get(name)
148 if not data:
149 # Load inherited data
150 if name == 'root' or not merge_inherited:
151 data = {}
152 else:
153 from babel.core import get_global
154 parent = get_global('parent_exceptions').get(name)
155 if not parent:
156 if _is_non_likely_script(name):
157 parent = 'root'
158 else:
159 parts = name.split('_')
160 parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
161 data = load(parent).copy()
162 filename = resolve_locale_filename(name)
163 with open(filename, 'rb') as fileobj:
164 if name != 'root' and merge_inherited:
165 merge(data, pickle.load(fileobj))
166 else:
167 data = pickle.load(fileobj)
168 _cache[name] = data
169 return data
170 finally:
171 _cache_lock.release()
172
173
174def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None:
175 """Merge the data from `dict2` into the `dict1` dictionary, making copies
176 of nested dictionaries.
177
178 >>> d = {1: 'foo', 3: 'baz'}
179 >>> merge(d, {1: 'Foo', 2: 'Bar'})
180 >>> sorted(d.items())
181 [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
182
183 :param dict1: the dictionary to merge into
184 :param dict2: the dictionary containing the data that should be merged
185 """
186 for key, val2 in dict2.items():
187 if val2 is not None:
188 val1 = dict1.get(key)
189 if isinstance(val2, dict):
190 if val1 is None:
191 val1 = {}
192 if isinstance(val1, Alias):
193 val1 = (val1, val2)
194 elif isinstance(val1, tuple):
195 alias, others = val1
196 others = others.copy()
197 merge(others, val2)
198 val1 = (alias, others)
199 else:
200 val1 = val1.copy()
201 merge(val1, val2)
202 else:
203 val1 = val2
204 dict1[key] = val1
205
206
207class Alias:
208 """Representation of an alias in the locale data.
209
210 An alias is a value that refers to some other part of the locale data,
211 as specified by the `keys`.
212 """
213
214 def __init__(self, keys: tuple[str, ...]) -> None:
215 self.keys = tuple(keys)
216
217 def __repr__(self) -> str:
218 return f"<{type(self).__name__} {self.keys!r}>"
219
220 def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]:
221 """Resolve the alias based on the given data.
222
223 This is done recursively, so if one alias resolves to a second alias,
224 that second alias will also be resolved.
225
226 :param data: the locale data
227 :type data: `dict`
228 """
229 base = data
230 for key in self.keys:
231 data = data[key]
232 if isinstance(data, Alias):
233 data = data.resolve(base)
234 elif isinstance(data, tuple):
235 alias, others = data
236 data = alias.resolve(base)
237 return data
238
239
240class LocaleDataDict(abc.MutableMapping):
241 """Dictionary wrapper that automatically resolves aliases to the actual
242 values.
243 """
244
245 def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None):
246 self._data = data
247 if base is None:
248 base = data
249 self.base = base
250
251 def __len__(self) -> int:
252 return len(self._data)
253
254 def __iter__(self) -> Iterator[str | int | None]:
255 return iter(self._data)
256
257 def __getitem__(self, key: str | int | None) -> Any:
258 orig = val = self._data[key]
259 if isinstance(val, Alias): # resolve an alias
260 val = val.resolve(self.base)
261 if isinstance(val, tuple): # Merge a partial dict with an alias
262 alias, others = val
263 val = alias.resolve(self.base).copy()
264 merge(val, others)
265 if isinstance(val, dict): # Return a nested alias-resolving dict
266 val = LocaleDataDict(val, base=self.base)
267 if val is not orig:
268 self._data[key] = val
269 return val
270
271 def __setitem__(self, key: str | int | None, value: Any) -> None:
272 self._data[key] = value
273
274 def __delitem__(self, key: str | int | None) -> None:
275 del self._data[key]
276
277 def copy(self) -> LocaleDataDict:
278 return LocaleDataDict(self._data.copy(), base=self.base)