1"""Detection pipeline stages and shared types."""
2
3from __future__ import annotations
4
5import dataclasses
6from dataclasses import field
7from typing import TypedDict
8
9#: Confidence for deterministic (non-BOM) detection stages.
10#: Used by escape, markup, and utf1632 stages (and by the orchestrator for
11#: the binary-detection result).
12DETERMINISTIC_CONFIDENCE: float = 0.95
13
14#: Byte table for fast non-ASCII counting (C-speed via bytes.translate).
15#: Deleting all bytes >= 0x80 and comparing lengths gives the non-ASCII count.
16HIGH_BYTES: bytes = bytes(range(0x80, 0x100))
17
18#: Bytes considered valid in ASCII text: tab (0x09), newline (0x0A),
19#: carriage return (0x0D), and printable ASCII (0x20-0x7E).
20#: Used by ``ascii.py`` directly and by ``utf1632.py`` (with null added).
21ASCII_TEXT_BYTES: bytes = bytes([0x09, 0x0A, 0x0D, *range(0x20, 0x7F)])
22
23
24class DetectionDict(TypedDict):
25 """Dictionary representation of a detection result.
26
27 Returned by :func:`chardet.detect`, :func:`chardet.detect_all`,
28 and :attr:`chardet.UniversalDetector.result`.
29 """
30
31 encoding: str | None
32 confidence: float
33 language: str | None
34 mime_type: str | None
35
36
37@dataclasses.dataclass(frozen=True, slots=True)
38class DetectionResult:
39 """A single encoding detection result.
40
41 Frozen dataclass holding the encoding name, confidence score, and
42 optional language identifier returned by the detection pipeline.
43 """
44
45 encoding: str | None
46 confidence: float
47 language: str | None
48 mime_type: str | None = None
49
50 def to_dict(self) -> DetectionDict:
51 """Convert this result to a plain dict.
52
53 :returns: A dict with ``'encoding'``, ``'confidence'``, ``'language'``, and ``'mime_type'`` keys.
54 """
55 return {
56 "encoding": self.encoding,
57 "confidence": self.confidence,
58 "language": self.language,
59 "mime_type": self.mime_type,
60 }
61
62
63#: Sentinel result for "no detection" — used by the orchestrator for
64#: filtered-out fallbacks and by UniversalDetector before close().
65_NONE_RESULT = DetectionResult(encoding=None, confidence=0.0, language=None)
66
67
68@dataclasses.dataclass(slots=True)
69class PipelineContext:
70 """Per-run mutable state for a single pipeline invocation.
71
72 Created once at the start of ``run_pipeline()`` and threaded through
73 the call chain via function parameters. Each concurrent ``detect()``
74 call gets its own context, eliminating the need for module-level
75 mutable caches.
76 """
77
78 analysis_cache: dict[str, tuple[float, int, int]] = field(default_factory=dict)
79 non_ascii_count: int | None = None
80 mb_scores: dict[str, float] = field(default_factory=dict)
81 mb_coverage: dict[str, float] = field(default_factory=dict)