Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/ascii.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

15 statements  

1"""Stage 1c: Pure ASCII detection (with null-separator tolerance). 

2 

3Note: ``from __future__ import annotations`` is intentionally omitted because 

4this module is compiled with mypyc, which does not support PEP 563 string 

5annotations. 

6""" 

7 

8from chardet.pipeline import ASCII_TEXT_BYTES, DetectionResult 

9 

10# Maximum fraction of null bytes to still classify data as ASCII. 

11# Null-separated CLI output (find -print0, git ls-tree -z) typically has 

12# 1-3.5% nulls. 5% covers all realistic cases while staying well below 

13# the UTF-16 guard threshold (15%). 

14_MAX_NULL_FRACTION = 0.05 

15 

16 

17def detect_ascii(data: bytes) -> DetectionResult | None: 

18 r"""Return an ASCII result if all bytes are printable ASCII plus common whitespace. 

19 

20 Tolerates sparse null bytes (``\x00``) up to ``_MAX_NULL_FRACTION`` of 

21 the data, returning confidence 0.99 instead of 1.0 to distinguish from 

22 pure ASCII. 

23 

24 :param data: The raw byte data to examine. 

25 :returns: A :class:`DetectionResult` for ASCII, or ``None``. 

26 """ 

27 if not data: 

28 return None 

29 remainder = data.translate(None, ASCII_TEXT_BYTES) 

30 if not remainder: 

31 return DetectionResult(encoding="ascii", confidence=1.0, language=None) 

32 # Check if the only non-allowed bytes are null separators 

33 if remainder.replace(b"\x00", b""): 

34 return None # Non-null, non-ASCII bytes present 

35 # All non-allowed bytes are nulls — accept if sparse enough 

36 null_fraction = len(remainder) / len(data) 

37 if null_fraction <= _MAX_NULL_FRACTION: 

38 return DetectionResult(encoding="ascii", confidence=0.99, language=None) 

39 return None