Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/enums.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2All of the Enums that are used throughout the chardet package.
4:author: Dan Blanchard (dan.blanchard@gmail.com)
5"""
7from enum import Flag, IntEnum, auto
10class InputState(IntEnum):
11 """
12 This enum represents the different states a universal detector can be in.
13 """
15 PURE_ASCII = 0
16 ESC_ASCII = 1
17 HIGH_BYTE = 2
20class LanguageFilter(Flag):
21 """
22 This enum represents the different language filters we can apply to a
23 ``UniversalDetector``.
24 """
26 CHINESE_SIMPLIFIED = auto()
27 CHINESE_TRADITIONAL = auto()
28 JAPANESE = auto()
29 KOREAN = auto()
30 NON_CJK = auto()
31 CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
32 CJK = CHINESE | JAPANESE | KOREAN
33 ALL = NON_CJK | CJK
36class ProbingState(IntEnum):
37 """
38 This enum represents the different states a prober can be in.
39 """
41 DETECTING = 0
42 FOUND_IT = 1
43 NOT_ME = 2
46class MachineState(IntEnum):
47 """
48 This enum represents the different states a state machine can be in.
49 """
51 START = 0
52 ERROR = 1
53 ITS_ME = 2
56class SequenceLikelihood(IntEnum):
57 """
58 This enum represents the likelihood of a character following the previous one.
59 """
61 NEGATIVE = 0
62 UNLIKELY = 1
63 LIKELY = 2
64 POSITIVE = 3
67class CharacterCategory(IntEnum):
68 """
69 This enum represents the different categories language models for
70 ``SingleByteCharsetProber`` put characters into.
72 Anything less than DIGIT is considered a letter.
73 """
75 UNDEFINED = 255
76 CONTROL = 254
77 SYMBOL = 253
78 LINE_BREAK = 252
79 DIGIT = 251
82class EncodingEra(Flag):
83 """
84 This enum represents different eras of character encodings, used to filter
85 which encodings are considered during detection.
87 The numeric values also serve as preference tiers for tie-breaking when
88 confidence scores are very close. Lower values = more preferred/modern.
89 """
91 MODERN_WEB = auto() # UTF-8/16/32, Windows-125x, modern multibyte (widely used)
92 LEGACY_ISO = auto() # ISO-8859-x (legacy but common)
93 LEGACY_MAC = auto() # Mac encodings (less common)
94 DOS = auto() # CP437, CP850, CP852, etc. (very legacy)
95 MAINFRAME = auto() # EBCDIC variants (CP037, CP500, etc.)
96 ALL = MODERN_WEB | LEGACY_ISO | LEGACY_MAC | DOS | MAINFRAME