1"""
2Helpers for configuring locale settings.
3
4Name `localization` is chosen to avoid overlap with builtin `locale` module.
5"""
6from __future__ import annotations
7
8from contextlib import contextmanager
9import locale
10import platform
11import re
12import subprocess
13from typing import Generator
14
15from pandas._config.config import options
16
17
18@contextmanager
19def set_locale(
20 new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
21) -> Generator[str | tuple[str, str], None, None]:
22 """
23 Context manager for temporarily setting a locale.
24
25 Parameters
26 ----------
27 new_locale : str or tuple
28 A string of the form <language_country>.<encoding>. For example to set
29 the current locale to US English with a UTF8 encoding, you would pass
30 "en_US.UTF-8".
31 lc_var : int, default `locale.LC_ALL`
32 The category of the locale being set.
33
34 Notes
35 -----
36 This is useful when you want to run a particular block of code under a
37 particular locale, without globally setting the locale. This probably isn't
38 thread-safe.
39 """
40 # getlocale is not always compliant with setlocale, use setlocale. GH#46595
41 current_locale = locale.setlocale(lc_var)
42
43 try:
44 locale.setlocale(lc_var, new_locale)
45 normalized_code, normalized_encoding = locale.getlocale()
46 if normalized_code is not None and normalized_encoding is not None:
47 yield f"{normalized_code}.{normalized_encoding}"
48 else:
49 yield new_locale
50 finally:
51 locale.setlocale(lc_var, current_locale)
52
53
54def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
55 """
56 Check to see if we can set a locale, and subsequently get the locale,
57 without raising an Exception.
58
59 Parameters
60 ----------
61 lc : str
62 The locale to attempt to set.
63 lc_var : int, default `locale.LC_ALL`
64 The category of the locale being set.
65
66 Returns
67 -------
68 bool
69 Whether the passed locale can be set
70 """
71 try:
72 with set_locale(lc, lc_var=lc_var):
73 pass
74 except (ValueError, locale.Error):
75 # horrible name for a Exception subclass
76 return False
77 else:
78 return True
79
80
81def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]:
82 """
83 Return a list of normalized locales that do not throw an ``Exception``
84 when set.
85
86 Parameters
87 ----------
88 locales : str
89 A string where each locale is separated by a newline.
90 normalize : bool
91 Whether to call ``locale.normalize`` on each locale.
92
93 Returns
94 -------
95 valid_locales : list
96 A list of valid locales.
97 """
98 return [
99 loc
100 for loc in (
101 locale.normalize(loc.strip()) if normalize else loc.strip()
102 for loc in locales
103 )
104 if can_set_locale(loc)
105 ]
106
107
108def get_locales(
109 prefix: str | None = None,
110 normalize: bool = True,
111) -> list[str]:
112 """
113 Get all the locales that are available on the system.
114
115 Parameters
116 ----------
117 prefix : str
118 If not ``None`` then return only those locales with the prefix
119 provided. For example to get all English language locales (those that
120 start with ``"en"``), pass ``prefix="en"``.
121 normalize : bool
122 Call ``locale.normalize`` on the resulting list of available locales.
123 If ``True``, only locales that can be set without throwing an
124 ``Exception`` are returned.
125
126 Returns
127 -------
128 locales : list of strings
129 A list of locale strings that can be set with ``locale.setlocale()``.
130 For example::
131
132 locale.setlocale(locale.LC_ALL, locale_string)
133
134 On error will return an empty list (no locale available, e.g. Windows)
135
136 """
137 if platform.system() in ("Linux", "Darwin"):
138 raw_locales = subprocess.check_output(["locale", "-a"])
139 else:
140 # Other platforms e.g. windows platforms don't define "locale -a"
141 # Note: is_platform_windows causes circular import here
142 return []
143
144 try:
145 # raw_locales is "\n" separated list of locales
146 # it may contain non-decodable parts, so split
147 # extract what we can and then rejoin.
148 split_raw_locales = raw_locales.split(b"\n")
149 out_locales = []
150 for x in split_raw_locales:
151 try:
152 out_locales.append(str(x, encoding=options.display.encoding))
153 except UnicodeError:
154 # 'locale -a' is used to populated 'raw_locales' and on
155 # Redhat 7 Linux (and maybe others) prints locale names
156 # using windows-1252 encoding. Bug only triggered by
157 # a few special characters and when there is an
158 # extensive list of installed locales.
159 out_locales.append(str(x, encoding="windows-1252"))
160
161 except TypeError:
162 pass
163
164 if prefix is None:
165 return _valid_locales(out_locales, normalize)
166
167 pattern = re.compile(f"{prefix}.*")
168 found = pattern.findall("\n".join(out_locales))
169 return _valid_locales(found, normalize)