Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/utils.py: 41%

99 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3# 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7# 99% of the code is different from pgen2, now. 

8# 

9# A fork of `parso.utils`. 

10# https://github.com/davidhalter/parso/blob/master/parso/utils.py 

11# 

12# The following changes were made: 

13# - Drop Python 2 compatibility layer 

14# - Use dataclasses instead of namedtuple 

15# - Apply type hints directly to files 

16# - Make PythonVersionInfo directly usable in hashmaps 

17# - Unroll total ordering because Pyre doesn't understand it 

18 

19 

20import re 

21import sys 

22from ast import literal_eval 

23from dataclasses import dataclass 

24from typing import Optional, Sequence, Tuple, Union 

25 

26# The following is a list in Python that are line breaks in str.splitlines, but 

27# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, 

28# 0xA) are allowed to split lines. 

29_NON_LINE_BREAKS = ( 

30 "\v", # Vertical Tabulation 0xB 

31 "\f", # Form Feed 0xC 

32 "\x1C", # File Separator 

33 "\x1D", # Group Separator 

34 "\x1E", # Record Separator 

35 "\x85", # Next Line (NEL - Equivalent to CR+LF. 

36 # Used to mark end-of-line on some IBM mainframes.) 

37 "\u2028", # Line Separator 

38 "\u2029", # Paragraph Separator 

39) 

40 

41 

42@dataclass(frozen=True) 

43class Version: 

44 major: int 

45 minor: int 

46 micro: int 

47 

48 

49def split_lines(string: str, keepends: bool = False) -> Sequence[str]: 

50 r""" 

51 Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, 

52 looks at form feeds and other special characters as normal text. Just 

53 splits ``\n`` and ``\r\n``. 

54 Also different: Returns ``[""]`` for an empty string input. 

55 

56 In Python 2.7 form feeds are used as normal characters when using 

57 str.splitlines. However in Python 3 somewhere there was a decision to split 

58 also on form feeds. 

59 """ 

60 if keepends: 

61 lst = string.splitlines(True) 

62 

63 # We have to merge lines that were broken by form feed characters. 

64 merge = [] 

65 for i, line in enumerate(lst): 

66 try: 

67 last_chr = line[-1] 

68 except IndexError: 

69 pass 

70 else: 

71 if last_chr in _NON_LINE_BREAKS: 

72 merge.append(i) 

73 

74 for index in reversed(merge): 

75 try: 

76 lst[index] = lst[index] + lst[index + 1] 

77 del lst[index + 1] 

78 except IndexError: 

79 # index + 1 can be empty and therefore there's no need to 

80 # merge. 

81 pass 

82 

83 # The stdlib's implementation of the end is inconsistent when calling 

84 # it with/without keepends. One time there's an empty string in the 

85 # end, one time there's none. 

86 if string.endswith("\n") or string.endswith("\r") or string == "": 

87 lst.append("") 

88 return lst 

89 else: 

90 return re.split(r"\n|\r\n|\r", string) 

91 

92 

93def python_bytes_to_unicode( 

94 source: Union[str, bytes], encoding: str = "utf-8", errors: str = "strict" 

95) -> str: 

96 """ 

97 Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a 

98 unicode object like in :py:meth:`bytes.decode`. 

99 

100 :param encoding: See :py:meth:`bytes.decode` documentation. 

101 :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be 

102 ``'strict'``, ``'replace'`` or ``'ignore'``. 

103 """ 

104 

105 def detect_encoding() -> Union[str, bytes]: 

106 """ 

107 For the implementation of encoding definitions in Python, look at: 

108 - http://www.python.org/dev/peps/pep-0263/ 

109 - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations 

110 """ 

111 byte_mark = literal_eval(r"b'\xef\xbb\xbf'") 

112 if source.startswith(byte_mark): 

113 # UTF-8 byte-order mark 

114 return b"utf-8" 

115 

116 # pyre-ignore Pyre can't see that Union[str, bytes] conforms to AnyStr. 

117 first_two_match = re.match(rb"(?:[^\n]*\n){0,2}", source) 

118 if first_two_match is None: 

119 return encoding 

120 first_two_lines = first_two_match.group(0) 

121 possible_encoding = re.search(rb"coding[=:]\s*([-\w.]+)", first_two_lines) 

122 if possible_encoding: 

123 return possible_encoding.group(1) 

124 else: 

125 # the default if nothing else has been set -> PEP 263 

126 return encoding 

127 

128 if isinstance(source, str): 

129 # only cast bytes 

130 return source 

131 

132 actual_encoding = detect_encoding() 

133 if not isinstance(actual_encoding, str): 

134 actual_encoding = actual_encoding.decode("utf-8", "replace") 

135 

136 # Cast to str 

137 return source.decode(actual_encoding, errors) 

138 

139 

140@dataclass(frozen=True) 

141class PythonVersionInfo: 

142 major: int 

143 minor: int 

144 

145 def __gt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

146 if isinstance(other, tuple): 

147 if len(other) != 2: 

148 raise ValueError("Can only compare to tuples of length 2.") 

149 return (self.major, self.minor) > other 

150 

151 return (self.major, self.minor) > (other.major, other.minor) 

152 

153 def __ge__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

154 return self.__gt__(other) or self.__eq__(other) 

155 

156 def __lt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

157 if isinstance(other, tuple): 

158 if len(other) != 2: 

159 raise ValueError("Can only compare to tuples of length 2.") 

160 return (self.major, self.minor) < other 

161 

162 return (self.major, self.minor) < (other.major, other.minor) 

163 

164 def __le__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

165 return self.__lt__(other) or self.__eq__(other) 

166 

167 def __eq__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

168 if isinstance(other, tuple): 

169 if len(other) != 2: 

170 raise ValueError("Can only compare to tuples of length 2.") 

171 return (self.major, self.minor) == other 

172 

173 return (self.major, self.minor) == (other.major, other.minor) 

174 

175 def __ne__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool: 

176 return not self.__eq__(other) 

177 

178 def __hash__(self) -> int: 

179 return hash((self.major, self.minor)) 

180 

181 

182def _parse_version(version: str) -> PythonVersionInfo: 

183 match = re.match(r"(\d+)(?:\.(\d+)(?:\.\d+)?)?$", version) 

184 if match is None: 

185 raise ValueError( 

186 ( 

187 "The given version is not in the right format. " 

188 + 'Use something like "3.2" or "3".' 

189 ) 

190 ) 

191 

192 major = int(match.group(1)) 

193 minor = match.group(2) 

194 if minor is None: 

195 # Use the latest Python in case it's not exactly defined, because the 

196 # grammars are typically backwards compatible? 

197 if major == 2: 

198 minor = "7" 

199 elif major == 3: 

200 minor = "6" 

201 else: 

202 raise NotImplementedError( 

203 "Sorry, no support yet for those fancy new/old versions." 

204 ) 

205 minor = int(minor) 

206 return PythonVersionInfo(major, minor) 

207 

208 

209def parse_version_string(version: Optional[str] = None) -> PythonVersionInfo: 

210 """ 

211 Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and 

212 returns a corresponding version info that is always two characters long in 

213 decimal. 

214 """ 

215 if version is None: 

216 version = "%s.%s" % sys.version_info[:2] 

217 

218 return _parse_version(version)