1"""Translation helper functions."""
2
3import functools
4import gettext as gettext_module
5import os
6import re
7import sys
8import warnings
9
10from asgiref.local import Local
11
12from django.apps import apps
13from django.conf import settings
14from django.conf.locale import LANG_INFO
15from django.core.exceptions import AppRegistryNotReady
16from django.core.signals import setting_changed
17from django.dispatch import receiver
18from django.utils.regex_helper import _lazy_re_compile
19from django.utils.safestring import SafeData, mark_safe
20
21from . import to_language, to_locale
22
23# Translations are cached in a dictionary for every language.
24# The active translations are stored by threadid to make them thread local.
25_translations = {}
26_active = Local()
27
28# The default translation is based on the settings file.
29_default = None
30
31# magic gettext number to separate context from message
32CONTEXT_SEPARATOR = "\x04"
33
34# Maximum number of characters that will be parsed from the Accept-Language
35# header or cookie to prevent possible denial of service or memory exhaustion
36# attacks. About 10x longer than the longest value shown on MDN’s
37# Accept-Language page.
38LANGUAGE_CODE_MAX_LENGTH = 500
39
40# Format of Accept-Language header values. From RFC 9110 Sections 12.4.2 and
41# 12.5.4, and RFC 5646 Section 2.1.
42accept_language_re = _lazy_re_compile(
43 r"""
44 # "en", "en-au", "x-y-z", "es-419", "*"
45 ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*)
46 # Optional "q=1.00", "q=0.8"
47 (?:\s*;\s*q=(0(?:\.[0-9]{,3})?|1(?:\.0{,3})?))?
48 # Multiple accepts per header.
49 (?:\s*,\s*|$)
50 """,
51 re.VERBOSE,
52)
53
54language_code_re = _lazy_re_compile(
55 r"^[a-z]{1,8}(?:-[a-z0-9]{1,8})*(?:@[a-z0-9]{1,20})?$", re.IGNORECASE
56)
57
58language_code_prefix_re = _lazy_re_compile(r"^/(\w+([@-]\w+){0,2})(/|$)")
59
60
61@receiver(setting_changed)
62def reset_cache(*, setting, **kwargs):
63 """
64 Reset global state when LANGUAGES setting has been changed, as some
65 languages should no longer be accepted.
66 """
67 if setting in ("LANGUAGES", "LANGUAGE_CODE"):
68 check_for_language.cache_clear()
69 get_languages.cache_clear()
70 get_supported_language_variant.cache_clear()
71
72
73class TranslationCatalog:
74 """
75 Simulate a dict for DjangoTranslation._catalog so as multiple catalogs
76 with different plural equations are kept separate.
77 """
78
79 def __init__(self, trans=None):
80 self._catalogs = [trans._catalog.copy()] if trans else [{}]
81 self._plurals = [trans.plural] if trans else [lambda n: int(n != 1)]
82
83 def __getitem__(self, key):
84 for cat in self._catalogs:
85 try:
86 return cat[key]
87 except KeyError:
88 pass
89 raise KeyError(key)
90
91 def __setitem__(self, key, value):
92 self._catalogs[0][key] = value
93
94 def __contains__(self, key):
95 return any(key in cat for cat in self._catalogs)
96
97 def items(self):
98 for cat in self._catalogs:
99 yield from cat.items()
100
101 def keys(self):
102 for cat in self._catalogs:
103 yield from cat.keys()
104
105 def update(self, trans):
106 # Merge if plural function is the same as the top catalog, else prepend.
107 if trans.plural.__code__ == self._plurals[0]:
108 self._catalogs[0].update(trans._catalog)
109 else:
110 self._catalogs.insert(0, trans._catalog.copy())
111 self._plurals.insert(0, trans.plural)
112
113 def get(self, key, default=None):
114 missing = object()
115 for cat in self._catalogs:
116 result = cat.get(key, missing)
117 if result is not missing:
118 return result
119 return default
120
121 def plural(self, msgid, num):
122 for cat, plural in zip(self._catalogs, self._plurals):
123 tmsg = cat.get((msgid, plural(num)))
124 if tmsg is not None:
125 return tmsg
126 raise KeyError
127
128
129class DjangoTranslation(gettext_module.GNUTranslations):
130 """
131 Set up the GNUTranslations context with regard to output charset.
132
133 This translation object will be constructed out of multiple GNUTranslations
134 objects by merging their catalogs. It will construct an object for the
135 requested language and add a fallback to the default language, if it's
136 different from the requested language.
137 """
138
139 domain = "django"
140
141 def __init__(self, language, domain=None, localedirs=None):
142 """Create a GNUTranslations() using many locale directories"""
143 gettext_module.GNUTranslations.__init__(self)
144 if domain is not None:
145 self.domain = domain
146
147 self.__language = language
148 self.__to_language = to_language(language)
149 self.__locale = to_locale(language)
150 self._catalog = None
151 # If a language doesn't have a catalog, use the Germanic default for
152 # pluralization: anything except one is pluralized.
153 self.plural = lambda n: int(n != 1)
154
155 if self.domain == "django":
156 if localedirs is not None:
157 # A module-level cache is used for caching 'django' translations
158 warnings.warn(
159 "localedirs is ignored when domain is 'django'.", RuntimeWarning
160 )
161 localedirs = None
162 self._init_translation_catalog()
163
164 if localedirs:
165 for localedir in localedirs:
166 translation = self._new_gnu_trans(localedir)
167 self.merge(translation)
168 else:
169 self._add_installed_apps_translations()
170
171 self._add_local_translations()
172 if (
173 self.__language == settings.LANGUAGE_CODE
174 and self.domain == "django"
175 and self._catalog is None
176 ):
177 # default lang should have at least one translation file available.
178 raise OSError(
179 "No translation files found for default language %s."
180 % settings.LANGUAGE_CODE
181 )
182 self._add_fallback(localedirs)
183 if self._catalog is None:
184 # No catalogs found for this language, set an empty catalog.
185 self._catalog = TranslationCatalog()
186
187 def __repr__(self):
188 return "<DjangoTranslation lang:%s>" % self.__language
189
190 def _new_gnu_trans(self, localedir, use_null_fallback=True):
191 """
192 Return a mergeable gettext.GNUTranslations instance.
193
194 A convenience wrapper. By default gettext uses 'fallback=False'.
195 Using param `use_null_fallback` to avoid confusion with any other
196 references to 'fallback'.
197 """
198 return gettext_module.translation(
199 domain=self.domain,
200 localedir=localedir,
201 languages=[self.__locale],
202 fallback=use_null_fallback,
203 )
204
205 def _init_translation_catalog(self):
206 """Create a base catalog using global django translations."""
207 settingsfile = sys.modules[settings.__module__].__file__
208 localedir = os.path.join(os.path.dirname(settingsfile), "locale")
209 translation = self._new_gnu_trans(localedir)
210 self.merge(translation)
211
212 def _add_installed_apps_translations(self):
213 """Merge translations from each installed app."""
214 try:
215 app_configs = reversed(apps.get_app_configs())
216 except AppRegistryNotReady:
217 raise AppRegistryNotReady(
218 "The translation infrastructure cannot be initialized before the "
219 "apps registry is ready. Check that you don't make non-lazy "
220 "gettext calls at import time."
221 )
222 for app_config in app_configs:
223 localedir = os.path.join(app_config.path, "locale")
224 if os.path.exists(localedir):
225 translation = self._new_gnu_trans(localedir)
226 self.merge(translation)
227
228 def _add_local_translations(self):
229 """Merge translations defined in LOCALE_PATHS."""
230 for localedir in reversed(settings.LOCALE_PATHS):
231 translation = self._new_gnu_trans(localedir)
232 self.merge(translation)
233
234 def _add_fallback(self, localedirs=None):
235 """Set the GNUTranslations() fallback with the default language."""
236 # Don't set a fallback for the default language or any English variant
237 # (as it's empty, so it'll ALWAYS fall back to the default language)
238 if self.__language == settings.LANGUAGE_CODE or self.__language.startswith(
239 "en"
240 ):
241 return
242 if self.domain == "django":
243 # Get from cache
244 default_translation = translation(settings.LANGUAGE_CODE)
245 else:
246 default_translation = DjangoTranslation(
247 settings.LANGUAGE_CODE, domain=self.domain, localedirs=localedirs
248 )
249 self.add_fallback(default_translation)
250
251 def merge(self, other):
252 """Merge another translation into this catalog."""
253 if not getattr(other, "_catalog", None):
254 return # NullTranslations() has no _catalog
255 if self._catalog is None:
256 # Take plural and _info from first catalog found (generally Django's).
257 self.plural = other.plural
258 self._info = other._info.copy()
259 self._catalog = TranslationCatalog(other)
260 else:
261 self._catalog.update(other)
262 if other._fallback:
263 self.add_fallback(other._fallback)
264
265 def language(self):
266 """Return the translation language."""
267 return self.__language
268
269 def to_language(self):
270 """Return the translation language name."""
271 return self.__to_language
272
273 def ngettext(self, msgid1, msgid2, n):
274 try:
275 tmsg = self._catalog.plural(msgid1, n)
276 except KeyError:
277 if self._fallback:
278 return self._fallback.ngettext(msgid1, msgid2, n)
279 if n == 1:
280 tmsg = msgid1
281 else:
282 tmsg = msgid2
283 return tmsg
284
285
286def translation(language):
287 """
288 Return a translation object in the default 'django' domain.
289 """
290 global _translations
291 if language not in _translations:
292 _translations[language] = DjangoTranslation(language)
293 return _translations[language]
294
295
296def activate(language):
297 """
298 Fetch the translation object for a given language and install it as the
299 current translation object for the current thread.
300 """
301 if not language:
302 return
303 _active.value = translation(language)
304
305
306def deactivate():
307 """
308 Uninstall the active translation object so that further _() calls resolve
309 to the default translation object.
310 """
311 if hasattr(_active, "value"):
312 del _active.value
313
314
315def deactivate_all():
316 """
317 Make the active translation object a NullTranslations() instance. This is
318 useful when we want delayed translations to appear as the original string
319 for some reason.
320 """
321 _active.value = gettext_module.NullTranslations()
322 _active.value.to_language = lambda *args: None
323
324
325def get_language():
326 """Return the currently selected language."""
327 t = getattr(_active, "value", None)
328 if t is not None:
329 try:
330 return t.to_language()
331 except AttributeError:
332 pass
333 # If we don't have a real translation object, assume it's the default language.
334 return settings.LANGUAGE_CODE
335
336
337def get_language_bidi():
338 """
339 Return selected language's BiDi layout.
340
341 * False = left-to-right layout
342 * True = right-to-left layout
343 """
344 lang = get_language()
345 if lang is None:
346 return False
347 else:
348 base_lang = get_language().split("-")[0]
349 return base_lang in settings.LANGUAGES_BIDI
350
351
352def catalog():
353 """
354 Return the current active catalog for further processing.
355 This can be used if you need to modify the catalog or want to access the
356 whole message catalog instead of just translating one string.
357 """
358 global _default
359
360 t = getattr(_active, "value", None)
361 if t is not None:
362 return t
363 if _default is None:
364 _default = translation(settings.LANGUAGE_CODE)
365 return _default
366
367
368def gettext(message):
369 """
370 Translate the 'message' string. It uses the current thread to find the
371 translation object to use. If no current translation is activated, the
372 message will be run through the default translation object.
373 """
374 global _default
375
376 eol_message = message.replace("\r\n", "\n").replace("\r", "\n")
377
378 if eol_message:
379 _default = _default or translation(settings.LANGUAGE_CODE)
380 translation_object = getattr(_active, "value", _default)
381
382 result = translation_object.gettext(eol_message)
383 else:
384 # Return an empty value of the corresponding type if an empty message
385 # is given, instead of metadata, which is the default gettext behavior.
386 result = type(message)("")
387
388 if isinstance(message, SafeData):
389 return mark_safe(result)
390
391 return result
392
393
394def pgettext(context, message):
395 msg_with_ctxt = "%s%s%s" % (context, CONTEXT_SEPARATOR, message)
396 result = gettext(msg_with_ctxt)
397 if CONTEXT_SEPARATOR in result:
398 # Translation not found
399 result = message
400 elif isinstance(message, SafeData):
401 result = mark_safe(result)
402 return result
403
404
405def gettext_noop(message):
406 """
407 Mark strings for translation but don't translate them now. This can be
408 used to store strings in global variables that should stay in the base
409 language (because they might be used externally) and will be translated
410 later.
411 """
412 return message
413
414
415def do_ntranslate(singular, plural, number, translation_function):
416 global _default
417
418 t = getattr(_active, "value", None)
419 if t is not None:
420 return getattr(t, translation_function)(singular, plural, number)
421 if _default is None:
422 _default = translation(settings.LANGUAGE_CODE)
423 return getattr(_default, translation_function)(singular, plural, number)
424
425
426def ngettext(singular, plural, number):
427 """
428 Return a string of the translation of either the singular or plural,
429 based on the number.
430 """
431 return do_ntranslate(singular, plural, number, "ngettext")
432
433
434def npgettext(context, singular, plural, number):
435 msgs_with_ctxt = (
436 "%s%s%s" % (context, CONTEXT_SEPARATOR, singular),
437 "%s%s%s" % (context, CONTEXT_SEPARATOR, plural),
438 number,
439 )
440 result = ngettext(*msgs_with_ctxt)
441 if CONTEXT_SEPARATOR in result:
442 # Translation not found
443 result = ngettext(singular, plural, number)
444 return result
445
446
447def all_locale_paths():
448 """
449 Return a list of paths to user-provides languages files.
450 """
451 globalpath = os.path.join(
452 os.path.dirname(sys.modules[settings.__module__].__file__), "locale"
453 )
454 app_paths = []
455 for app_config in apps.get_app_configs():
456 locale_path = os.path.join(app_config.path, "locale")
457 if os.path.exists(locale_path):
458 app_paths.append(locale_path)
459 return [globalpath, *settings.LOCALE_PATHS, *app_paths]
460
461
462@functools.lru_cache(maxsize=1000)
463def check_for_language(lang_code):
464 """
465 Check whether there is a global language file for the given language
466 code. This is used to decide whether a user-provided language is
467 available.
468
469 lru_cache should have a maxsize to prevent from memory exhaustion attacks,
470 as the provided language codes are taken from the HTTP request. See also
471 <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
472 """
473 # First, a quick check to make sure lang_code is well-formed (#21458)
474 if lang_code is None or not language_code_re.search(lang_code):
475 return False
476 return any(
477 gettext_module.find("django", path, [to_locale(lang_code)]) is not None
478 for path in all_locale_paths()
479 )
480
481
482@functools.lru_cache
483def get_languages():
484 """
485 Cache of settings.LANGUAGES in a dictionary for easy lookups by key.
486 Convert keys to lowercase as they should be treated as case-insensitive.
487 """
488 return {key.lower(): value for key, value in dict(settings.LANGUAGES).items()}
489
490
491@functools.lru_cache(maxsize=1000)
492def get_supported_language_variant(lang_code, strict=False):
493 """
494 Return the language code that's listed in supported languages, possibly
495 selecting a more generic variant. Raise LookupError if nothing is found.
496
497 If `strict` is False (the default), look for a country-specific variant
498 when neither the language code nor its generic variant is found.
499
500 The language code is truncated to a maximum length to avoid potential
501 denial of service attacks.
502
503 lru_cache should have a maxsize to prevent from memory exhaustion attacks,
504 as the provided language codes are taken from the HTTP request. See also
505 <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
506 """
507 if lang_code:
508 # Truncate the language code to a maximum length to avoid potential
509 # denial of service attacks.
510 if len(lang_code) > LANGUAGE_CODE_MAX_LENGTH:
511 if (
512 not strict
513 and (index := lang_code.rfind("-", 0, LANGUAGE_CODE_MAX_LENGTH)) > 0
514 ):
515 # There is a generic variant under the maximum length accepted length.
516 lang_code = lang_code[:index]
517 else:
518 raise LookupError(lang_code)
519 # If 'zh-hant-tw' is not supported, try special fallback or subsequent
520 # language codes i.e. 'zh-hant' and 'zh'.
521 possible_lang_codes = [lang_code]
522 try:
523 possible_lang_codes.extend(LANG_INFO[lang_code]["fallback"])
524 except KeyError:
525 pass
526 i = None
527 while (i := lang_code.rfind("-", 0, i)) > -1:
528 possible_lang_codes.append(lang_code[:i])
529 generic_lang_code = possible_lang_codes[-1]
530 supported_lang_codes = get_languages()
531
532 for code in possible_lang_codes:
533 if code.lower() in supported_lang_codes and check_for_language(code):
534 return code
535 if not strict:
536 # if fr-fr is not supported, try fr-ca.
537 for supported_code in supported_lang_codes:
538 if supported_code.startswith(generic_lang_code + "-"):
539 return supported_code
540 raise LookupError(lang_code)
541
542
543def get_language_from_path(path, strict=False):
544 """
545 Return the language code if there's a valid language code found in `path`.
546
547 If `strict` is False (the default), look for a country-specific variant
548 when neither the language code nor its generic variant is found.
549 """
550 regex_match = language_code_prefix_re.match(path)
551 if not regex_match:
552 return None
553 lang_code = regex_match[1]
554 try:
555 return get_supported_language_variant(lang_code, strict=strict)
556 except LookupError:
557 return None
558
559
560def get_language_from_request(request, check_path=False):
561 """
562 Analyze the request to find what language the user wants the system to
563 show. Only languages listed in settings.LANGUAGES are taken into account.
564 If the user requests a sublanguage where we have a main language, we send
565 out the main language.
566
567 If check_path is True, the URL path prefix will be checked for a language
568 code, otherwise this is skipped for backwards compatibility.
569 """
570 if check_path:
571 lang_code = get_language_from_path(request.path_info)
572 if lang_code is not None:
573 return lang_code
574
575 lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME)
576 if (
577 lang_code is not None
578 and lang_code in get_languages()
579 and check_for_language(lang_code)
580 ):
581 return lang_code
582
583 try:
584 return get_supported_language_variant(lang_code)
585 except LookupError:
586 pass
587
588 accept = request.META.get("HTTP_ACCEPT_LANGUAGE", "")
589 for accept_lang, unused in parse_accept_lang_header(accept):
590 if accept_lang == "*":
591 break
592
593 if not language_code_re.search(accept_lang):
594 continue
595
596 try:
597 return get_supported_language_variant(accept_lang)
598 except LookupError:
599 continue
600
601 try:
602 return get_supported_language_variant(settings.LANGUAGE_CODE)
603 except LookupError:
604 return settings.LANGUAGE_CODE
605
606
607@functools.lru_cache(maxsize=1000)
608def _parse_accept_lang_header(lang_string):
609 """
610 Parse the lang_string, which is the body of an HTTP Accept-Language
611 header, and return a tuple of (lang, q-value), ordered by 'q' values.
612
613 Return an empty tuple if there are any format errors in lang_string.
614 """
615 result = []
616 pieces = accept_language_re.split(lang_string.lower())
617 if pieces[-1]:
618 return ()
619 for i in range(0, len(pieces) - 1, 3):
620 first, lang, priority = pieces[i : i + 3]
621 if first:
622 return ()
623 if priority:
624 priority = float(priority)
625 else:
626 priority = 1.0
627 result.append((lang, priority))
628 result.sort(key=lambda k: k[1], reverse=True)
629 return tuple(result)
630
631
632def parse_accept_lang_header(lang_string):
633 """
634 Parse the value of the Accept-Language header up to a maximum length.
635
636 The value of the header is truncated to a maximum length to avoid potential
637 denial of service and memory exhaustion attacks. Excessive memory could be
638 used if the raw value is very large as it would be cached due to the use of
639 functools.lru_cache() to avoid repetitive parsing of common header values.
640 """
641 # If the header value doesn't exceed the maximum allowed length, parse it.
642 if len(lang_string) <= LANGUAGE_CODE_MAX_LENGTH:
643 return _parse_accept_lang_header(lang_string)
644
645 # If there is at least one comma in the value, parse up to the last comma
646 # before the max length, skipping any truncated parts at the end of the
647 # header value.
648 if (index := lang_string.rfind(",", 0, LANGUAGE_CODE_MAX_LENGTH)) > 0:
649 return _parse_accept_lang_header(lang_string[:index])
650
651 # Don't attempt to parse if there is only one language-range value which is
652 # longer than the maximum allowed length and so truncated.
653 return ()