Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/utils.py: 24%

97 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1import re 

2import sys 

3from ast import literal_eval 

4from functools import total_ordering 

5from typing import NamedTuple, Sequence, Union 

6 

7# The following is a list in Python that are line breaks in str.splitlines, but 

8# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, 

9# 0xA) are allowed to split lines. 

10_NON_LINE_BREAKS = ( 

11 '\v', # Vertical Tabulation 0xB 

12 '\f', # Form Feed 0xC 

13 '\x1C', # File Separator 

14 '\x1D', # Group Separator 

15 '\x1E', # Record Separator 

16 '\x85', # Next Line (NEL - Equivalent to CR+LF. 

17 # Used to mark end-of-line on some IBM mainframes.) 

18 '\u2028', # Line Separator 

19 '\u2029', # Paragraph Separator 

20) 

21 

22 

23class Version(NamedTuple): 

24 major: int 

25 minor: int 

26 micro: int 

27 

28 

29def split_lines(string: str, keepends: bool = False) -> Sequence[str]: 

30 r""" 

31 Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, 

32 looks at form feeds and other special characters as normal text. Just 

33 splits ``\n`` and ``\r\n``. 

34 Also different: Returns ``[""]`` for an empty string input. 

35 

36 In Python 2.7 form feeds are used as normal characters when using 

37 str.splitlines. However in Python 3 somewhere there was a decision to split 

38 also on form feeds. 

39 """ 

40 if keepends: 

41 lst = string.splitlines(True) 

42 

43 # We have to merge lines that were broken by form feed characters. 

44 merge = [] 

45 for i, line in enumerate(lst): 

46 try: 

47 last_chr = line[-1] 

48 except IndexError: 

49 pass 

50 else: 

51 if last_chr in _NON_LINE_BREAKS: 

52 merge.append(i) 

53 

54 for index in reversed(merge): 

55 try: 

56 lst[index] = lst[index] + lst[index + 1] 

57 del lst[index + 1] 

58 except IndexError: 

59 # index + 1 can be empty and therefore there's no need to 

60 # merge. 

61 pass 

62 

63 # The stdlib's implementation of the end is inconsistent when calling 

64 # it with/without keepends. One time there's an empty string in the 

65 # end, one time there's none. 

66 if string.endswith('\n') or string.endswith('\r') or string == '': 

67 lst.append('') 

68 return lst 

69 else: 

70 return re.split(r'\n|\r\n|\r', string) 

71 

72 

73def python_bytes_to_unicode( 

74 source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict' 

75) -> str: 

76 """ 

77 Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a 

78 unicode object like in :py:meth:`bytes.decode`. 

79 

80 :param encoding: See :py:meth:`bytes.decode` documentation. 

81 :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be 

82 ``'strict'``, ``'replace'`` or ``'ignore'``. 

83 """ 

84 def detect_encoding(): 

85 """ 

86 For the implementation of encoding definitions in Python, look at: 

87 - http://www.python.org/dev/peps/pep-0263/ 

88 - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations 

89 """ 

90 byte_mark = literal_eval(r"b'\xef\xbb\xbf'") 

91 if source.startswith(byte_mark): 

92 # UTF-8 byte-order mark 

93 return 'utf-8' 

94 

95 first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0) 

96 possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", 

97 first_two_lines) 

98 if possible_encoding: 

99 e = possible_encoding.group(1) 

100 if not isinstance(e, str): 

101 e = str(e, 'ascii', 'replace') 

102 return e 

103 else: 

104 # the default if nothing else has been set -> PEP 263 

105 return encoding 

106 

107 if isinstance(source, str): 

108 # only cast str/bytes 

109 return source 

110 

111 encoding = detect_encoding() 

112 try: 

113 # Cast to unicode 

114 return str(source, encoding, errors) 

115 except LookupError: 

116 if errors == 'replace': 

117 # This is a weird case that can happen if the given encoding is not 

118 # a valid encoding. This usually shouldn't happen with provided 

119 # encodings, but can happen if somebody uses encoding declarations 

120 # like `# coding: foo-8`. 

121 return str(source, 'utf-8', errors) 

122 raise 

123 

124 

125def version_info() -> Version: 

126 """ 

127 Returns a namedtuple of parso's version, similar to Python's 

128 ``sys.version_info``. 

129 """ 

130 from parso import __version__ 

131 tupl = re.findall(r'[a-z]+|\d+', __version__) 

132 return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) 

133 

134 

135class _PythonVersionInfo(NamedTuple): 

136 major: int 

137 minor: int 

138 

139 

140@total_ordering 

141class PythonVersionInfo(_PythonVersionInfo): 

142 def __gt__(self, other): 

143 if isinstance(other, tuple): 

144 if len(other) != 2: 

145 raise ValueError("Can only compare to tuples of length 2.") 

146 return (self.major, self.minor) > other 

147 super().__gt__(other) 

148 

149 return (self.major, self.minor) 

150 

151 def __eq__(self, other): 

152 if isinstance(other, tuple): 

153 if len(other) != 2: 

154 raise ValueError("Can only compare to tuples of length 2.") 

155 return (self.major, self.minor) == other 

156 super().__eq__(other) 

157 

158 def __ne__(self, other): 

159 return not self.__eq__(other) 

160 

161 

162def _parse_version(version) -> PythonVersionInfo: 

163 match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) 

164 if match is None: 

165 raise ValueError('The given version is not in the right format. ' 

166 'Use something like "3.8" or "3".') 

167 

168 major = int(match.group(1)) 

169 minor = match.group(2) 

170 if minor is None: 

171 # Use the latest Python in case it's not exactly defined, because the 

172 # grammars are typically backwards compatible? 

173 if major == 2: 

174 minor = "7" 

175 elif major == 3: 

176 minor = "6" 

177 else: 

178 raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") 

179 minor = int(minor) 

180 return PythonVersionInfo(major, minor) 

181 

182 

183def parse_version_string(version: str = None) -> PythonVersionInfo: 

184 """ 

185 Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and 

186 returns a corresponding version info that is always two characters long in 

187 decimal. 

188 """ 

189 if version is None: 

190 version = '%s.%s' % sys.version_info[:2] 

191 if not isinstance(version, str): 

192 raise TypeError('version must be a string like "3.8"') 

193 

194 return _parse_version(version)