1# Copyright (c) Meta Platforms, Inc. and affiliates.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
5
6import codecs
7import re
8import sys
9from dataclasses import dataclass, field, fields
10from enum import Enum
11from typing import Any, Callable, FrozenSet, List, Mapping, Optional, Pattern, Union
12
13from libcst._add_slots import add_slots
14from libcst._nodes.whitespace import NEWLINE_RE
15from libcst._parser.parso.utils import parse_version_string, PythonVersionInfo
16
17_INDENT_RE: Pattern[str] = re.compile(r"[ \t]+")
18
19try:
20 from libcst_native import parser_config as config_mod
21
22 MockWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig
23except ImportError:
24 from libcst._parser.types import py_config as config_mod
25
26 MockWhitespaceParserConfig = config_mod.MockWhitespaceParserConfig
27
28BaseWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig
29ParserConfig = config_mod.ParserConfig
30parser_config_asdict: Callable[[ParserConfig], Mapping[str, Any]] = (
31 config_mod.parser_config_asdict
32)
33
34
35class AutoConfig(Enum):
36 """
37 A sentinel value used in PartialParserConfig
38 """
39
40 token: int = 0
41
42 def __repr__(self) -> str:
43 return str(self)
44
45
46# This list should be kept in sorted order.
47KNOWN_PYTHON_VERSION_STRINGS = ["3.0", "3.1", "3.3", "3.5", "3.6", "3.7", "3.8"]
48
49
50@add_slots
51@dataclass(frozen=True)
52class PartialParserConfig:
53 r"""
54 An optional object that can be supplied to the parser entrypoints (e.g.
55 :func:`parse_module`) to configure the parser.
56
57 Unspecified fields will be inferred from the input source code or from the execution
58 environment.
59
60 >>> import libcst as cst
61 >>> tree = cst.parse_module("abc")
62 >>> tree.bytes
63 b'abc'
64 >>> # override the default utf-8 encoding
65 ... tree = cst.parse_module("abc", cst.PartialParserConfig(encoding="utf-32"))
66 >>> tree.bytes
67 b'\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00'
68 """
69
70 #: The version of Python that the input source code is expected to be syntactically
71 #: compatible with. This may be different from the Python interpreter being used to
72 #: run LibCST. For example, you can parse code as 3.7 with a CPython 3.6
73 #: interpreter.
74 #:
75 #: If unspecified, it will default to the syntax of the running interpreter
76 #: (rounding down from among the following list).
77 #:
78 #: Currently, only Python 3.0, 3.1, 3.3, 3.5, 3.6, 3.7 and 3.8 syntax is supported.
79 #: The gaps did not have any syntax changes from the version prior.
80 python_version: Union[str, AutoConfig] = AutoConfig.token
81
82 #: A named tuple with the ``major`` and ``minor`` Python version numbers. This is
83 #: derived from :attr:`python_version` and should not be supplied to the
84 #: :class:`PartialParserConfig` constructor.
85 parsed_python_version: PythonVersionInfo = field(init=False)
86
87 #: The file's encoding format. When parsing a ``bytes`` object, this value may be
88 #: inferred from the contents of the parsed source code. When parsing a ``str``,
89 #: this value defaults to ``"utf-8"``.
90 encoding: Union[str, AutoConfig] = AutoConfig.token
91
92 #: Detected ``__future__`` import names
93 future_imports: Union[FrozenSet[str], AutoConfig] = AutoConfig.token
94
95 #: The indentation of the file, expressed as a series of tabs and/or spaces. This
96 #: value is inferred from the contents of the parsed source code by default.
97 default_indent: Union[str, AutoConfig] = AutoConfig.token
98
99 #: The newline of the file, expressed as ``\n``, ``\r\n``, or ``\r``. This value is
100 #: inferred from the contents of the parsed source code by default.
101 default_newline: Union[str, AutoConfig] = AutoConfig.token
102
103 def __post_init__(self) -> None:
104 raw_python_version = self.python_version
105
106 if isinstance(raw_python_version, AutoConfig):
107 # If unspecified, we'll try to pick the same as the running
108 # interpreter. There will always be at least one entry.
109 parsed_python_version = _pick_compatible_python_version()
110 else:
111 # If the caller specified a version, we require that to be a known
112 # version (because we don't want to encourage doing duplicate work
113 # when there weren't syntax changes).
114
115 # `parse_version_string` will raise a ValueError if the version is
116 # invalid.
117 parsed_python_version = parse_version_string(raw_python_version)
118
119 if not any(
120 parsed_python_version == parse_version_string(v)
121 for v in KNOWN_PYTHON_VERSION_STRINGS
122 ):
123 comma_versions = ", ".join(KNOWN_PYTHON_VERSION_STRINGS)
124 raise ValueError(
125 "LibCST can only parse code using one of the following versions of "
126 + f"Python's grammar: {comma_versions}. More versions may be "
127 + "supported by future releases."
128 )
129
130 # We use object.__setattr__ because the dataclass is frozen. See:
131 # https://docs.python.org/3/library/dataclasses.html#frozen-instances
132 # This should be safe behavior inside of `__post_init__`.
133 object.__setattr__(self, "parsed_python_version", parsed_python_version)
134
135 encoding = self.encoding
136 if not isinstance(encoding, AutoConfig):
137 try:
138 codecs.lookup(encoding)
139 except LookupError:
140 raise ValueError(f"{repr(encoding)} is not a supported encoding")
141
142 newline = self.default_newline
143 if (
144 not isinstance(newline, AutoConfig)
145 and NEWLINE_RE.fullmatch(newline) is None
146 ):
147 raise ValueError(
148 f"Got an invalid value for default_newline: {repr(newline)}"
149 )
150
151 indent = self.default_indent
152 if not isinstance(indent, AutoConfig) and _INDENT_RE.fullmatch(indent) is None:
153 raise ValueError(f"Got an invalid value for default_indent: {repr(indent)}")
154
155 def __repr__(self) -> str:
156 init_keys: List[str] = []
157
158 for f in fields(self):
159 # We don't display the parsed_python_version attribute because it contains
160 # the same value as python_version, only parsed.
161 if f.name == "parsed_python_version":
162 continue
163 value = getattr(self, f.name)
164 if not isinstance(value, AutoConfig):
165 init_keys.append(f"{f.name}={value!r}")
166
167 return f"{self.__class__.__name__}({', '.join(init_keys)})"
168
169
170def _pick_compatible_python_version(version: Optional[str] = None) -> PythonVersionInfo:
171 max_version = parse_version_string(version)
172 for v in KNOWN_PYTHON_VERSION_STRINGS[::-1]:
173 tmp = parse_version_string(v)
174 if tmp <= max_version:
175 return tmp
176
177 raise ValueError(
178 f"No version found older than {version} ({max_version}) while "
179 + f"running on {sys.version_info}"
180 )