1"""
2Helpers for configuring locale settings.
3
4Name `localization` is chosen to avoid overlap with builtin `locale` module.
5"""
6from __future__ import annotations
7
8from contextlib import contextmanager
9import locale
10import platform
11import re
12import subprocess
13from typing import TYPE_CHECKING
14
15from pandas._config.config import options
16
17if TYPE_CHECKING:
18 from collections.abc import Generator
19
20
21@contextmanager
22def set_locale(
23 new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
24) -> Generator[str | tuple[str, str], None, None]:
25 """
26 Context manager for temporarily setting a locale.
27
28 Parameters
29 ----------
30 new_locale : str or tuple
31 A string of the form <language_country>.<encoding>. For example to set
32 the current locale to US English with a UTF8 encoding, you would pass
33 "en_US.UTF-8".
34 lc_var : int, default `locale.LC_ALL`
35 The category of the locale being set.
36
37 Notes
38 -----
39 This is useful when you want to run a particular block of code under a
40 particular locale, without globally setting the locale. This probably isn't
41 thread-safe.
42 """
43 # getlocale is not always compliant with setlocale, use setlocale. GH#46595
44 current_locale = locale.setlocale(lc_var)
45
46 try:
47 locale.setlocale(lc_var, new_locale)
48 normalized_code, normalized_encoding = locale.getlocale()
49 if normalized_code is not None and normalized_encoding is not None:
50 yield f"{normalized_code}.{normalized_encoding}"
51 else:
52 yield new_locale
53 finally:
54 locale.setlocale(lc_var, current_locale)
55
56
57def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
58 """
59 Check to see if we can set a locale, and subsequently get the locale,
60 without raising an Exception.
61
62 Parameters
63 ----------
64 lc : str
65 The locale to attempt to set.
66 lc_var : int, default `locale.LC_ALL`
67 The category of the locale being set.
68
69 Returns
70 -------
71 bool
72 Whether the passed locale can be set
73 """
74 try:
75 with set_locale(lc, lc_var=lc_var):
76 pass
77 except (ValueError, locale.Error):
78 # horrible name for a Exception subclass
79 return False
80 else:
81 return True
82
83
84def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]:
85 """
86 Return a list of normalized locales that do not throw an ``Exception``
87 when set.
88
89 Parameters
90 ----------
91 locales : str
92 A string where each locale is separated by a newline.
93 normalize : bool
94 Whether to call ``locale.normalize`` on each locale.
95
96 Returns
97 -------
98 valid_locales : list
99 A list of valid locales.
100 """
101 return [
102 loc
103 for loc in (
104 locale.normalize(loc.strip()) if normalize else loc.strip()
105 for loc in locales
106 )
107 if can_set_locale(loc)
108 ]
109
110
111def get_locales(
112 prefix: str | None = None,
113 normalize: bool = True,
114) -> list[str]:
115 """
116 Get all the locales that are available on the system.
117
118 Parameters
119 ----------
120 prefix : str
121 If not ``None`` then return only those locales with the prefix
122 provided. For example to get all English language locales (those that
123 start with ``"en"``), pass ``prefix="en"``.
124 normalize : bool
125 Call ``locale.normalize`` on the resulting list of available locales.
126 If ``True``, only locales that can be set without throwing an
127 ``Exception`` are returned.
128
129 Returns
130 -------
131 locales : list of strings
132 A list of locale strings that can be set with ``locale.setlocale()``.
133 For example::
134
135 locale.setlocale(locale.LC_ALL, locale_string)
136
137 On error will return an empty list (no locale available, e.g. Windows)
138
139 """
140 if platform.system() in ("Linux", "Darwin"):
141 raw_locales = subprocess.check_output(["locale", "-a"])
142 else:
143 # Other platforms e.g. windows platforms don't define "locale -a"
144 # Note: is_platform_windows causes circular import here
145 return []
146
147 try:
148 # raw_locales is "\n" separated list of locales
149 # it may contain non-decodable parts, so split
150 # extract what we can and then rejoin.
151 split_raw_locales = raw_locales.split(b"\n")
152 out_locales = []
153 for x in split_raw_locales:
154 try:
155 out_locales.append(str(x, encoding=options.display.encoding))
156 except UnicodeError:
157 # 'locale -a' is used to populated 'raw_locales' and on
158 # Redhat 7 Linux (and maybe others) prints locale names
159 # using windows-1252 encoding. Bug only triggered by
160 # a few special characters and when there is an
161 # extensive list of installed locales.
162 out_locales.append(str(x, encoding="windows-1252"))
163
164 except TypeError:
165 pass
166
167 if prefix is None:
168 return _valid_locales(out_locales, normalize)
169
170 pattern = re.compile(f"{prefix}.*")
171 found = pattern.findall("\n".join(out_locales))
172 return _valid_locales(found, normalize)