1"""Stage 1c: Pure ASCII detection (with null-separator tolerance).
2
3Note: ``from __future__ import annotations`` is intentionally omitted because
4this module is compiled with mypyc, which does not support PEP 563 string
5annotations.
6"""
7
8from chardet.pipeline import ASCII_TEXT_BYTES, DetectionResult
9
10# Maximum fraction of null bytes to still classify data as ASCII.
11# Null-separated CLI output (find -print0, git ls-tree -z) typically has
12# 1-3.5% nulls. 5% covers all realistic cases while staying well below
13# the UTF-16 guard threshold (15%).
14_MAX_NULL_FRACTION = 0.05
15
16
17def detect_ascii(data: bytes) -> DetectionResult | None:
18 r"""Return an ASCII result if all bytes are printable ASCII plus common whitespace.
19
20 Tolerates sparse null bytes (``\x00``) up to ``_MAX_NULL_FRACTION`` of
21 the data, returning confidence 0.99 instead of 1.0 to distinguish from
22 pure ASCII.
23
24 :param data: The raw byte data to examine.
25 :returns: A :class:`DetectionResult` for ASCII, or ``None``.
26 """
27 if not data:
28 return None
29 remainder = data.translate(None, ASCII_TEXT_BYTES)
30 if not remainder:
31 return DetectionResult(encoding="ascii", confidence=1.0, language=None)
32 # Check if the only non-allowed bytes are null separators
33 if remainder.replace(b"\x00", b""):
34 return None # Non-null, non-ASCII bytes present
35 # All non-allowed bytes are nulls — accept if sparse enough
36 null_fraction = len(remainder) / len(data)
37 if null_fraction <= _MAX_NULL_FRACTION:
38 return DetectionResult(encoding="ascii", confidence=0.99, language=None)
39 return None