1import unicodedata
2import sys
3from configparser import ConfigParser
4
5from .compat import py39
6from .warnings import SetuptoolsDeprecationWarning
7
8
9# HFS Plus uses decomposed UTF-8
10def decompose(path):
11 if isinstance(path, str):
12 return unicodedata.normalize('NFD', path)
13 try:
14 path = path.decode('utf-8')
15 path = unicodedata.normalize('NFD', path)
16 path = path.encode('utf-8')
17 except UnicodeError:
18 pass # Not UTF-8
19 return path
20
21
22def filesys_decode(path):
23 """
24 Ensure that the given path is decoded,
25 ``None`` when no expected encoding works
26 """
27
28 if isinstance(path, str):
29 return path
30
31 fs_enc = sys.getfilesystemencoding() or 'utf-8'
32 candidates = fs_enc, 'utf-8'
33
34 for enc in candidates:
35 try:
36 return path.decode(enc)
37 except UnicodeDecodeError:
38 continue
39
40 return None
41
42
43def try_encode(string, enc):
44 "turn unicode encoding into a functional routine"
45 try:
46 return string.encode(enc)
47 except UnicodeEncodeError:
48 return None
49
50
51def _read_utf8_with_fallback(file: str, fallback_encoding=py39.LOCALE_ENCODING) -> str:
52 """
53 First try to read the file with UTF-8, if there is an error fallback to a
54 different encoding ("locale" by default). Returns the content of the file.
55 Also useful when reading files that might have been produced by an older version of
56 setuptools.
57 """
58 try:
59 with open(file, "r", encoding="utf-8") as f:
60 return f.read()
61 except UnicodeDecodeError: # pragma: no cover
62 _Utf8EncodingNeeded.emit(file=file, fallback_encoding=fallback_encoding)
63 with open(file, "r", encoding=fallback_encoding) as f:
64 return f.read()
65
66
67def _cfg_read_utf8_with_fallback(
68 cfg: ConfigParser, file: str, fallback_encoding=py39.LOCALE_ENCODING
69) -> None:
70 """Same idea as :func:`_read_utf8_with_fallback`, but for the
71 :meth:`ConfigParser.read` method.
72
73 This method may call ``cfg.clear()``.
74 """
75 try:
76 cfg.read(file, encoding="utf-8")
77 except UnicodeDecodeError: # pragma: no cover
78 _Utf8EncodingNeeded.emit(file=file, fallback_encoding=fallback_encoding)
79 cfg.clear()
80 cfg.read(file, encoding=fallback_encoding)
81
82
83class _Utf8EncodingNeeded(SetuptoolsDeprecationWarning):
84 _SUMMARY = """
85 `encoding="utf-8"` fails with {file!r}, trying `encoding={fallback_encoding!r}`.
86 """
87
88 _DETAILS = """
89 Fallback behaviour for UTF-8 is considered **deprecated** and future versions of
90 `setuptools` may not implement it.
91
92 Please encode {file!r} with "utf-8" to ensure future builds will succeed.
93
94 If this file was produced by `setuptools` itself, cleaning up the cached files
95 and re-building/re-installing the package with a newer version of `setuptools`
96 (e.g. by updating `build-system.requires` in its `pyproject.toml`)
97 might solve the problem.
98 """
99 # TODO: Add a deadline?
100 # Will we be able to remove this?
101 # The question comes to mind mainly because of sdists that have been produced
102 # by old versions of setuptools and published to PyPI...