Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/__init__.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

24 statements  

1"""Detection pipeline stages and shared types.""" 

2 

3from __future__ import annotations 

4 

5import dataclasses 

6from dataclasses import field 

7from typing import TypedDict 

8 

9#: Confidence for deterministic (non-BOM) detection stages. 

10#: Used by escape, markup, and utf1632 stages (and by the orchestrator for 

11#: the binary-detection result). 

12DETERMINISTIC_CONFIDENCE: float = 0.95 

13 

14#: Byte table for fast non-ASCII counting (C-speed via bytes.translate). 

15#: Deleting all bytes >= 0x80 and comparing lengths gives the non-ASCII count. 

16HIGH_BYTES: bytes = bytes(range(0x80, 0x100)) 

17 

18 

19class DetectionDict(TypedDict): 

20 """Dictionary representation of a detection result. 

21 

22 Returned by :func:`chardet.detect`, :func:`chardet.detect_all`, 

23 and :attr:`chardet.UniversalDetector.result`. 

24 """ 

25 

26 encoding: str | None 

27 confidence: float 

28 language: str | None 

29 

30 

31@dataclasses.dataclass(frozen=True, slots=True) 

32class DetectionResult: 

33 """A single encoding detection result. 

34 

35 Frozen dataclass holding the encoding name, confidence score, and 

36 optional language identifier returned by the detection pipeline. 

37 """ 

38 

39 encoding: str | None 

40 confidence: float 

41 language: str | None 

42 

43 def to_dict(self) -> DetectionDict: 

44 """Convert this result to a plain dict. 

45 

46 :returns: A dict with ``'encoding'``, ``'confidence'``, and ``'language'`` keys. 

47 """ 

48 return { 

49 "encoding": self.encoding, 

50 "confidence": self.confidence, 

51 "language": self.language, 

52 } 

53 

54 

55@dataclasses.dataclass(slots=True) 

56class PipelineContext: 

57 """Per-run mutable state for a single pipeline invocation. 

58 

59 Created once at the start of ``run_pipeline()`` and threaded through 

60 the call chain via function parameters. Each concurrent ``detect()`` 

61 call gets its own context, eliminating the need for module-level 

62 mutable caches. 

63 """ 

64 

65 analysis_cache: dict[str, tuple[float, int, int]] = field(default_factory=dict) 

66 non_ascii_count: int | None = None 

67 mb_scores: dict[str, float] = field(default_factory=dict) 

68 mb_coverage: dict[str, float] = field(default_factory=dict)