Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/__init__.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

28 statements  

1"""Detection pipeline stages and shared types.""" 

2 

3from __future__ import annotations 

4 

5import dataclasses 

6from dataclasses import field 

7from typing import TypedDict 

8 

9#: Confidence for deterministic (non-BOM) detection stages. 

10#: Used by escape, markup, and utf1632 stages (and by the orchestrator for 

11#: the binary-detection result). 

12DETERMINISTIC_CONFIDENCE: float = 0.95 

13 

14#: Byte table for fast non-ASCII counting (C-speed via bytes.translate). 

15#: Deleting all bytes >= 0x80 and comparing lengths gives the non-ASCII count. 

16HIGH_BYTES: bytes = bytes(range(0x80, 0x100)) 

17 

18#: Bytes considered valid in ASCII text: tab (0x09), newline (0x0A), 

19#: carriage return (0x0D), and printable ASCII (0x20-0x7E). 

20#: Used by ``ascii.py`` directly and by ``utf1632.py`` (with null added). 

21ASCII_TEXT_BYTES: bytes = bytes([0x09, 0x0A, 0x0D, *range(0x20, 0x7F)]) 

22 

23 

24class DetectionDict(TypedDict): 

25 """Dictionary representation of a detection result. 

26 

27 Returned by :func:`chardet.detect`, :func:`chardet.detect_all`, 

28 and :attr:`chardet.UniversalDetector.result`. 

29 """ 

30 

31 encoding: str | None 

32 confidence: float 

33 language: str | None 

34 mime_type: str | None 

35 

36 

37@dataclasses.dataclass(frozen=True, slots=True) 

38class DetectionResult: 

39 """A single encoding detection result. 

40 

41 Frozen dataclass holding the encoding name, confidence score, and 

42 optional language identifier returned by the detection pipeline. 

43 """ 

44 

45 encoding: str | None 

46 confidence: float 

47 language: str | None 

48 mime_type: str | None = None 

49 

50 def to_dict(self) -> DetectionDict: 

51 """Convert this result to a plain dict. 

52 

53 :returns: A dict with ``'encoding'``, ``'confidence'``, ``'language'``, and ``'mime_type'`` keys. 

54 """ 

55 return { 

56 "encoding": self.encoding, 

57 "confidence": self.confidence, 

58 "language": self.language, 

59 "mime_type": self.mime_type, 

60 } 

61 

62 

63#: Sentinel result for "no detection" — used by the orchestrator for 

64#: filtered-out fallbacks and by UniversalDetector before close(). 

65_NONE_RESULT = DetectionResult(encoding=None, confidence=0.0, language=None) 

66 

67 

68@dataclasses.dataclass(slots=True) 

69class PipelineContext: 

70 """Per-run mutable state for a single pipeline invocation. 

71 

72 Created once at the start of ``run_pipeline()`` and threaded through 

73 the call chain via function parameters. Each concurrent ``detect()`` 

74 call gets its own context, eliminating the need for module-level 

75 mutable caches. 

76 """ 

77 

78 analysis_cache: dict[str, tuple[float, int, int]] = field(default_factory=dict) 

79 non_ascii_count: int | None = None 

80 mb_scores: dict[str, float] = field(default_factory=dict) 

81 mb_coverage: dict[str, float] = field(default_factory=dict)