Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_internal/utils/encoding.py: 78%

18 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-26 06:33 +0000

1import codecs 

2import locale 

3import re 

4import sys 

5from typing import List, Tuple 

6 

7BOMS: List[Tuple[bytes, str]] = [ 

8 (codecs.BOM_UTF8, "utf-8"), 

9 (codecs.BOM_UTF16, "utf-16"), 

10 (codecs.BOM_UTF16_BE, "utf-16-be"), 

11 (codecs.BOM_UTF16_LE, "utf-16-le"), 

12 (codecs.BOM_UTF32, "utf-32"), 

13 (codecs.BOM_UTF32_BE, "utf-32-be"), 

14 (codecs.BOM_UTF32_LE, "utf-32-le"), 

15] 

16 

17ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)") 

18 

19 

20def auto_decode(data: bytes) -> str: 

21 """Check a bytes string for a BOM to correctly detect the encoding 

22 

23 Fallback to locale.getpreferredencoding(False) like open() on Python3""" 

24 for bom, encoding in BOMS: 

25 if data.startswith(bom): 

26 return data[len(bom) :].decode(encoding) 

27 # Lets check the first two lines as in PEP263 

28 for line in data.split(b"\n")[:2]: 

29 if line[0:1] == b"#" and ENCODING_RE.search(line): 

30 result = ENCODING_RE.search(line) 

31 assert result is not None 

32 encoding = result.groups()[0].decode("ascii") 

33 return data.decode(encoding) 

34 return data.decode( 

35 locale.getpreferredencoding(False) or sys.getdefaultencoding(), 

36 )