1"""Detection pipeline stages and shared types."""
2
3from __future__ import annotations
4
5import dataclasses
6from dataclasses import field
7from typing import TypedDict
8
9#: Confidence for deterministic (non-BOM) detection stages.
10#: Used by escape, markup, and utf1632 stages (and by the orchestrator for
11#: the binary-detection result).
12DETERMINISTIC_CONFIDENCE: float = 0.95
13
14#: Byte table for fast non-ASCII counting (C-speed via bytes.translate).
15#: Deleting all bytes >= 0x80 and comparing lengths gives the non-ASCII count.
16HIGH_BYTES: bytes = bytes(range(0x80, 0x100))
17
18
19class DetectionDict(TypedDict):
20 """Dictionary representation of a detection result.
21
22 Returned by :func:`chardet.detect`, :func:`chardet.detect_all`,
23 and :attr:`chardet.UniversalDetector.result`.
24 """
25
26 encoding: str | None
27 confidence: float
28 language: str | None
29
30
31@dataclasses.dataclass(frozen=True, slots=True)
32class DetectionResult:
33 """A single encoding detection result.
34
35 Frozen dataclass holding the encoding name, confidence score, and
36 optional language identifier returned by the detection pipeline.
37 """
38
39 encoding: str | None
40 confidence: float
41 language: str | None
42
43 def to_dict(self) -> DetectionDict:
44 """Convert this result to a plain dict.
45
46 :returns: A dict with ``'encoding'``, ``'confidence'``, and ``'language'`` keys.
47 """
48 return {
49 "encoding": self.encoding,
50 "confidence": self.confidence,
51 "language": self.language,
52 }
53
54
55@dataclasses.dataclass(slots=True)
56class PipelineContext:
57 """Per-run mutable state for a single pipeline invocation.
58
59 Created once at the start of ``run_pipeline()`` and threaded through
60 the call chain via function parameters. Each concurrent ``detect()``
61 call gets its own context, eliminating the need for module-level
62 mutable caches.
63 """
64
65 analysis_cache: dict[str, tuple[float, int, int]] = field(default_factory=dict)
66 non_ascii_count: int | None = None
67 mb_scores: dict[str, float] = field(default_factory=dict)
68 mb_coverage: dict[str, float] = field(default_factory=dict)