Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/binary.py: 50%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

12 statements  

1"""Stage 0: Binary content detection.""" 

2 

3from __future__ import annotations 

4 

5from chardet._utils import DEFAULT_MAX_BYTES 

6 

7# Threshold: if more than this fraction of bytes are binary indicators, it's binary 

8_BINARY_THRESHOLD = 0.01 

9 

10# Translation table that maps binary-indicator control bytes (0x00-0x08, 

11# 0x0E-0x1F — excludes \t \n \v \f \r) to None (deleting them) and keeps 

12# everything else. len(data) - len(translated) gives the count in one 

13# C-level pass. 

14_BINARY_DELETE = bytes(range(0x09)) + bytes(range(0x0E, 0x20)) 

15 

16 

17def is_binary(data: bytes, max_bytes: int = DEFAULT_MAX_BYTES) -> bool: 

18 """Return ``True`` if *data* appears to be binary (not text) content. 

19 

20 :param data: The raw byte data to examine. 

21 :param max_bytes: Maximum number of bytes to scan. 

22 :returns: ``True`` if the data is classified as binary. 

23 """ 

24 data = data[:max_bytes] 

25 if not data: 

26 return False 

27 

28 clean = data.translate(None, _BINARY_DELETE) 

29 binary_count = len(data) - len(clean) 

30 return binary_count / len(data) > _BINARY_THRESHOLD