Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/utils.py: 41%
99 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3#
4# Modifications:
5# Copyright David Halter and Contributors
6# Modifications are dual-licensed: MIT and PSF.
7# 99% of the code is different from pgen2, now.
8#
9# A fork of `parso.utils`.
10# https://github.com/davidhalter/parso/blob/master/parso/utils.py
11#
12# The following changes were made:
13# - Drop Python 2 compatibility layer
14# - Use dataclasses instead of namedtuple
15# - Apply type hints directly to files
16# - Make PythonVersionInfo directly usable in hashmaps
17# - Unroll total ordering because Pyre doesn't understand it
20import re
21import sys
22from ast import literal_eval
23from dataclasses import dataclass
24from typing import Optional, Sequence, Tuple, Union
26# The following is a list in Python that are line breaks in str.splitlines, but
27# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
28# 0xA) are allowed to split lines.
29_NON_LINE_BREAKS = (
30 "\v", # Vertical Tabulation 0xB
31 "\f", # Form Feed 0xC
32 "\x1C", # File Separator
33 "\x1D", # Group Separator
34 "\x1E", # Record Separator
35 "\x85", # Next Line (NEL - Equivalent to CR+LF.
36 # Used to mark end-of-line on some IBM mainframes.)
37 "\u2028", # Line Separator
38 "\u2029", # Paragraph Separator
39)
42@dataclass(frozen=True)
43class Version:
44 major: int
45 minor: int
46 micro: int
49def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
50 r"""
51 Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
52 looks at form feeds and other special characters as normal text. Just
53 splits ``\n`` and ``\r\n``.
54 Also different: Returns ``[""]`` for an empty string input.
56 In Python 2.7 form feeds are used as normal characters when using
57 str.splitlines. However in Python 3 somewhere there was a decision to split
58 also on form feeds.
59 """
60 if keepends:
61 lst = string.splitlines(True)
63 # We have to merge lines that were broken by form feed characters.
64 merge = []
65 for i, line in enumerate(lst):
66 try:
67 last_chr = line[-1]
68 except IndexError:
69 pass
70 else:
71 if last_chr in _NON_LINE_BREAKS:
72 merge.append(i)
74 for index in reversed(merge):
75 try:
76 lst[index] = lst[index] + lst[index + 1]
77 del lst[index + 1]
78 except IndexError:
79 # index + 1 can be empty and therefore there's no need to
80 # merge.
81 pass
83 # The stdlib's implementation of the end is inconsistent when calling
84 # it with/without keepends. One time there's an empty string in the
85 # end, one time there's none.
86 if string.endswith("\n") or string.endswith("\r") or string == "":
87 lst.append("")
88 return lst
89 else:
90 return re.split(r"\n|\r\n|\r", string)
93def python_bytes_to_unicode(
94 source: Union[str, bytes], encoding: str = "utf-8", errors: str = "strict"
95) -> str:
96 """
97 Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
98 unicode object like in :py:meth:`bytes.decode`.
100 :param encoding: See :py:meth:`bytes.decode` documentation.
101 :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
102 ``'strict'``, ``'replace'`` or ``'ignore'``.
103 """
105 def detect_encoding() -> Union[str, bytes]:
106 """
107 For the implementation of encoding definitions in Python, look at:
108 - http://www.python.org/dev/peps/pep-0263/
109 - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
110 """
111 byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
112 if source.startswith(byte_mark):
113 # UTF-8 byte-order mark
114 return b"utf-8"
116 # pyre-ignore Pyre can't see that Union[str, bytes] conforms to AnyStr.
117 first_two_match = re.match(rb"(?:[^\n]*\n){0,2}", source)
118 if first_two_match is None:
119 return encoding
120 first_two_lines = first_two_match.group(0)
121 possible_encoding = re.search(rb"coding[=:]\s*([-\w.]+)", first_two_lines)
122 if possible_encoding:
123 return possible_encoding.group(1)
124 else:
125 # the default if nothing else has been set -> PEP 263
126 return encoding
128 if isinstance(source, str):
129 # only cast bytes
130 return source
132 actual_encoding = detect_encoding()
133 if not isinstance(actual_encoding, str):
134 actual_encoding = actual_encoding.decode("utf-8", "replace")
136 # Cast to str
137 return source.decode(actual_encoding, errors)
140@dataclass(frozen=True)
141class PythonVersionInfo:
142 major: int
143 minor: int
145 def __gt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
146 if isinstance(other, tuple):
147 if len(other) != 2:
148 raise ValueError("Can only compare to tuples of length 2.")
149 return (self.major, self.minor) > other
151 return (self.major, self.minor) > (other.major, other.minor)
153 def __ge__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
154 return self.__gt__(other) or self.__eq__(other)
156 def __lt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
157 if isinstance(other, tuple):
158 if len(other) != 2:
159 raise ValueError("Can only compare to tuples of length 2.")
160 return (self.major, self.minor) < other
162 return (self.major, self.minor) < (other.major, other.minor)
164 def __le__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
165 return self.__lt__(other) or self.__eq__(other)
167 def __eq__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
168 if isinstance(other, tuple):
169 if len(other) != 2:
170 raise ValueError("Can only compare to tuples of length 2.")
171 return (self.major, self.minor) == other
173 return (self.major, self.minor) == (other.major, other.minor)
175 def __ne__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
176 return not self.__eq__(other)
178 def __hash__(self) -> int:
179 return hash((self.major, self.minor))
182def _parse_version(version: str) -> PythonVersionInfo:
183 match = re.match(r"(\d+)(?:\.(\d+)(?:\.\d+)?)?$", version)
184 if match is None:
185 raise ValueError(
186 (
187 "The given version is not in the right format. "
188 + 'Use something like "3.2" or "3".'
189 )
190 )
192 major = int(match.group(1))
193 minor = match.group(2)
194 if minor is None:
195 # Use the latest Python in case it's not exactly defined, because the
196 # grammars are typically backwards compatible?
197 if major == 2:
198 minor = "7"
199 elif major == 3:
200 minor = "6"
201 else:
202 raise NotImplementedError(
203 "Sorry, no support yet for those fancy new/old versions."
204 )
205 minor = int(minor)
206 return PythonVersionInfo(major, minor)
209def parse_version_string(version: Optional[str] = None) -> PythonVersionInfo:
210 """
211 Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
212 returns a corresponding version info that is always two characters long in
213 decimal.
214 """
215 if version is None:
216 version = "%s.%s" % sys.version_info[:2]
218 return _parse_version(version)