Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/_utils.py: 74%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

19 statements  

1"""Internal shared utilities for chardet.""" 

2 

3from __future__ import annotations 

4 

5import warnings 

6 

7#: Default maximum number of bytes to examine during detection. 

8DEFAULT_MAX_BYTES: int = 200_000 

9 

10#: Default minimum confidence threshold for filtering results. 

11MINIMUM_THRESHOLD: float = 0.20 

12 

13#: Default chunk_size value (deprecated, kept for backward-compat signatures). 

14_DEFAULT_CHUNK_SIZE: int = 65_536 

15 

16 

17def _warn_deprecated_chunk_size(chunk_size: int, stacklevel: int = 3) -> None: 

18 """Emit a deprecation warning if *chunk_size* differs from the default.""" 

19 if chunk_size != _DEFAULT_CHUNK_SIZE: 

20 warnings.warn( 

21 "chunk_size is not used in this version of chardet and will be ignored", 

22 DeprecationWarning, 

23 stacklevel=stacklevel, 

24 ) 

25 

26 

27def _validate_max_bytes(max_bytes: int) -> None: 

28 """Raise ValueError if *max_bytes* is not a positive integer.""" 

29 if isinstance(max_bytes, bool) or not isinstance(max_bytes, int) or max_bytes < 1: 

30 msg = "max_bytes must be a positive integer" 

31 raise ValueError(msg) 

32 

33 

34def _resolve_prefer_superset( 

35 should_rename_legacy: bool, prefer_superset: bool, stacklevel: int = 3 

36) -> bool: 

37 """Resolve the deprecated *should_rename_legacy* into *prefer_superset*.""" 

38 if should_rename_legacy: 

39 warnings.warn( 

40 "should_rename_legacy is deprecated, use prefer_superset instead", 

41 DeprecationWarning, 

42 stacklevel=stacklevel, 

43 ) 

44 return True 

45 return prefer_superset 

46 

47 

48#: Mapping from ISO 639-1 language codes to English names. 

49#: Includes ``"und"`` (ISO 639-3 "Undetermined") for use when language is unknown. 

50ISO_TO_LANGUAGE: dict[str, str] = { 

51 "ar": "arabic", 

52 "be": "belarusian", 

53 "bg": "bulgarian", 

54 "br": "breton", 

55 "cs": "czech", 

56 "cy": "welsh", 

57 "da": "danish", 

58 "de": "german", 

59 "el": "greek", 

60 "en": "english", 

61 "eo": "esperanto", 

62 "es": "spanish", 

63 "et": "estonian", 

64 "fa": "farsi", 

65 "fi": "finnish", 

66 "fr": "french", 

67 "ga": "irish", 

68 "gd": "gaelic", 

69 "he": "hebrew", 

70 "hr": "croatian", 

71 "hu": "hungarian", 

72 "id": "indonesian", 

73 "is": "icelandic", 

74 "it": "italian", 

75 "ja": "japanese", 

76 "kk": "kazakh", 

77 "ko": "korean", 

78 "lt": "lithuanian", 

79 "lv": "latvian", 

80 "mk": "macedonian", 

81 "ms": "malay", 

82 "mt": "maltese", 

83 "nl": "dutch", 

84 "no": "norwegian", 

85 "pl": "polish", 

86 "pt": "portuguese", 

87 "ro": "romanian", 

88 "ru": "russian", 

89 "sk": "slovak", 

90 "sl": "slovene", 

91 "sr": "serbian", 

92 "sv": "swedish", 

93 "tg": "tajik", 

94 "th": "thai", 

95 "tr": "turkish", 

96 "uk": "ukrainian", 

97 "und": "undetermined", 

98 "ur": "urdu", 

99 "vi": "vietnamese", 

100 "zh": "chinese", 

101}