Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/utils.py: 24%

1import re

2import sys

3from ast import literal_eval

4from functools import total_ordering

5from typing import NamedTuple, Sequence, Union

7# The following is a list in Python that are line breaks in str.splitlines, but

8# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,

9# 0xA) are allowed to split lines.

10_NON_LINE_BREAKS = (

11 '\v', # Vertical Tabulation 0xB

12 '\f', # Form Feed 0xC

13 '\x1C', # File Separator

14 '\x1D', # Group Separator

15 '\x1E', # Record Separator

16 '\x85', # Next Line (NEL - Equivalent to CR+LF.

17 # Used to mark end-of-line on some IBM mainframes.)

18 '\u2028', # Line Separator

19 '\u2029', # Paragraph Separator

20)

23class Version(NamedTuple):

24 major: int

25 minor: int

26 micro: int

29def split_lines(string: str, keepends: bool = False) -> Sequence[str]:

30 r"""

31 Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,

32 looks at form feeds and other special characters as normal text. Just

33 splits ``\n`` and ``\r\n``.

34 Also different: Returns ``[""]`` for an empty string input.

36 In Python 2.7 form feeds are used as normal characters when using

37 str.splitlines. However in Python 3 somewhere there was a decision to split

38 also on form feeds.

39 """

40 if keepends:

41 lst = string.splitlines(True)

43 # We have to merge lines that were broken by form feed characters.

44 merge = []

45 for i, line in enumerate(lst):

46 try:

47 last_chr = line[-1]

48 except IndexError:

49 pass

50 else:

51 if last_chr in _NON_LINE_BREAKS:

52 merge.append(i)

54 for index in reversed(merge):

55 try:

56 lst[index] = lst[index] + lst[index + 1]

57 del lst[index + 1]

58 except IndexError:

59 # index + 1 can be empty and therefore there's no need to

60 # merge.

61 pass

63 # The stdlib's implementation of the end is inconsistent when calling

64 # it with/without keepends. One time there's an empty string in the

65 # end, one time there's none.

66 if string.endswith('\n') or string.endswith('\r') or string == '':

67 lst.append('')

68 return lst

69 else:

70 return re.split(r'\n|\r\n|\r', string)

73def python_bytes_to_unicode(

74 source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict'

75) -> str:

76 """

77 Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a

78 unicode object like in :py:meth:`bytes.decode`.

80 :param encoding: See :py:meth:`bytes.decode` documentation.

81 :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be

82 ``'strict'``, ``'replace'`` or ``'ignore'``.

83 """

84 def detect_encoding():

85 """

86 For the implementation of encoding definitions in Python, look at:

87 - http://www.python.org/dev/peps/pep-0263/

88 - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations

89 """

90 byte_mark = literal_eval(r"b'\xef\xbb\xbf'")

91 if source.startswith(byte_mark):

92 # UTF-8 byte-order mark

93 return 'utf-8'

95 first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0)

96 possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",

97 first_two_lines)

98 if possible_encoding:

99 e = possible_encoding.group(1)

100 if not isinstance(e, str):

101 e = str(e, 'ascii', 'replace')

102 return e

103 else:

104 # the default if nothing else has been set -> PEP 263

105 return encoding

106

107 if isinstance(source, str):

108 # only cast str/bytes

109 return source

110

111 encoding = detect_encoding()

112 try:

113 # Cast to unicode

114 return str(source, encoding, errors)

115 except LookupError:

116 if errors == 'replace':

117 # This is a weird case that can happen if the given encoding is not

118 # a valid encoding. This usually shouldn't happen with provided

119 # encodings, but can happen if somebody uses encoding declarations

120 # like `# coding: foo-8`.

121 return str(source, 'utf-8', errors)

122 raise

123

124

125def version_info() -> Version:

126 """

127 Returns a namedtuple of parso's version, similar to Python's

128 ``sys.version_info``.

129 """

130 from parso import __version__

131 tupl = re.findall(r'[a-z]+|\d+', __version__)

132 return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])

133

134

135class _PythonVersionInfo(NamedTuple):

136 major: int

137 minor: int

138

139

140@total_ordering

141class PythonVersionInfo(_PythonVersionInfo):

142 def __gt__(self, other):

143 if isinstance(other, tuple):

144 if len(other) != 2:

145 raise ValueError("Can only compare to tuples of length 2.")

146 return (self.major, self.minor) > other

147 super().__gt__(other)

148

149 return (self.major, self.minor)

150

151 def __eq__(self, other):

152 if isinstance(other, tuple):

153 if len(other) != 2:

154 raise ValueError("Can only compare to tuples of length 2.")

155 return (self.major, self.minor) == other

156 super().__eq__(other)

157

158 def __ne__(self, other):

159 return not self.__eq__(other)

160

161

162def _parse_version(version) -> PythonVersionInfo:

163 match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)

164 if match is None:

165 raise ValueError('The given version is not in the right format. '

166 'Use something like "3.8" or "3".')

167

168 major = int(match.group(1))

169 minor = match.group(2)

170 if minor is None:

171 # Use the latest Python in case it's not exactly defined, because the

172 # grammars are typically backwards compatible?

173 if major == 2:

174 minor = "7"

175 elif major == 3:

176 minor = "6"

177 else:

178 raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")

179 minor = int(minor)

180 return PythonVersionInfo(major, minor)

181

182

183def parse_version_string(version: str = None) -> PythonVersionInfo:

184 """

185 Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and

186 returns a corresponding version info that is always two characters long in

187 decimal.

188 """

189 if version is None:

190 version = '%s.%s' % sys.version_info[:2]

191 if not isinstance(version, str):

192 raise TypeError('version must be a string like "3.8"')

193

194 return _parse_version(version)