Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/_utils.py: 74%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Internal shared utilities for chardet."""
3from __future__ import annotations
5import warnings
7#: Default maximum number of bytes to examine during detection.
8DEFAULT_MAX_BYTES: int = 200_000
10#: Default minimum confidence threshold for filtering results.
11MINIMUM_THRESHOLD: float = 0.20
13#: Default chunk_size value (deprecated, kept for backward-compat signatures).
14_DEFAULT_CHUNK_SIZE: int = 65_536
17def _warn_deprecated_chunk_size(chunk_size: int, stacklevel: int = 3) -> None:
18 """Emit a deprecation warning if *chunk_size* differs from the default."""
19 if chunk_size != _DEFAULT_CHUNK_SIZE:
20 warnings.warn(
21 "chunk_size is not used in this version of chardet and will be ignored",
22 DeprecationWarning,
23 stacklevel=stacklevel,
24 )
27def _validate_max_bytes(max_bytes: int) -> None:
28 """Raise ValueError if *max_bytes* is not a positive integer."""
29 if isinstance(max_bytes, bool) or not isinstance(max_bytes, int) or max_bytes < 1:
30 msg = "max_bytes must be a positive integer"
31 raise ValueError(msg)
34def _resolve_prefer_superset(
35 should_rename_legacy: bool, prefer_superset: bool, stacklevel: int = 3
36) -> bool:
37 """Resolve the deprecated *should_rename_legacy* into *prefer_superset*."""
38 if should_rename_legacy:
39 warnings.warn(
40 "should_rename_legacy is deprecated, use prefer_superset instead",
41 DeprecationWarning,
42 stacklevel=stacklevel,
43 )
44 return True
45 return prefer_superset
48#: Mapping from ISO 639-1 language codes to English names.
49#: Includes ``"und"`` (ISO 639-3 "Undetermined") for use when language is unknown.
50ISO_TO_LANGUAGE: dict[str, str] = {
51 "ar": "arabic",
52 "be": "belarusian",
53 "bg": "bulgarian",
54 "br": "breton",
55 "cs": "czech",
56 "cy": "welsh",
57 "da": "danish",
58 "de": "german",
59 "el": "greek",
60 "en": "english",
61 "eo": "esperanto",
62 "es": "spanish",
63 "et": "estonian",
64 "fa": "farsi",
65 "fi": "finnish",
66 "fr": "french",
67 "ga": "irish",
68 "gd": "gaelic",
69 "he": "hebrew",
70 "hr": "croatian",
71 "hu": "hungarian",
72 "id": "indonesian",
73 "is": "icelandic",
74 "it": "italian",
75 "ja": "japanese",
76 "kk": "kazakh",
77 "ko": "korean",
78 "lt": "lithuanian",
79 "lv": "latvian",
80 "mk": "macedonian",
81 "ms": "malay",
82 "mt": "maltese",
83 "nl": "dutch",
84 "no": "norwegian",
85 "pl": "polish",
86 "pt": "portuguese",
87 "ro": "romanian",
88 "ru": "russian",
89 "sk": "slovak",
90 "sl": "slovene",
91 "sr": "serbian",
92 "sv": "swedish",
93 "tg": "tajik",
94 "th": "thai",
95 "tr": "turkish",
96 "uk": "ukrainian",
97 "und": "undetermined",
98 "ur": "urdu",
99 "vi": "vietnamese",
100 "zh": "chinese",
101}