Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/wrapped

1# Copyright (c) Meta Platforms, Inc. and affiliates.

3# This source code is licensed under the MIT license found in the

4# LICENSE file in the root directory of this source tree.

7"""

8Parso's tokenize doesn't give us tokens in the format that we'd ideally like, so this

9performs a small number of transformations to the token stream:

11- `end_pos` is precomputed as a property, instead of lazily as a method, for more

12 efficient access.

13- `whitespace_before` and `whitespace_after` have been added. These include the correct

14 indentation information.

15- `prefix` is removed, since we don't use it anywhere.

16- `ERRORTOKEN` and `ERROR_DEDENT` have been removed, because we don't intend to support

17 error recovery. If we encounter token errors, we'll raise a ParserSyntaxError instead.

19If performance becomes a concern, we can rewrite this later as a fork of the original

20tokenize module, instead of as a wrapper.

21"""

23from dataclasses import dataclass, field

24from enum import Enum

25from typing import Generator, Iterator, List, Optional, Sequence

27from libcst._add_slots import add_slots

28from libcst._exceptions import ParserSyntaxError

29from libcst._parser.parso.python.token import PythonTokenTypes, TokenType

30from libcst._parser.parso.python.tokenize import (

31 Token as OrigToken,

32 tokenize_lines as orig_tokenize_lines,

33)

34from libcst._parser.parso.utils import PythonVersionInfo, split_lines

35from libcst._parser.types.token import Token

36from libcst._parser.types.whitespace_state import WhitespaceState

38_ERRORTOKEN: TokenType = PythonTokenTypes.ERRORTOKEN

39_ERROR_DEDENT: TokenType = PythonTokenTypes.ERROR_DEDENT

41_INDENT: TokenType = PythonTokenTypes.INDENT

42_DEDENT: TokenType = PythonTokenTypes.DEDENT

43_ENDMARKER: TokenType = PythonTokenTypes.ENDMARKER

45_FSTRING_START: TokenType = PythonTokenTypes.FSTRING_START

46_FSTRING_END: TokenType = PythonTokenTypes.FSTRING_END

48_OP: TokenType = PythonTokenTypes.OP

51class _ParenthesisOrFStringStackEntry(Enum):

52 PARENTHESIS = 0

53 FSTRING = 0

56_PARENTHESIS_STACK_ENTRY: _ParenthesisOrFStringStackEntry = (

57 _ParenthesisOrFStringStackEntry.PARENTHESIS

58)

59_FSTRING_STACK_ENTRY: _ParenthesisOrFStringStackEntry = (

60 _ParenthesisOrFStringStackEntry.FSTRING

61)

64@add_slots

65@dataclass(frozen=False)

66class _TokenizeState:

67 lines: Sequence[str]

68 previous_whitespace_state: WhitespaceState = field(

69 default_factory=lambda: WhitespaceState(

70 line=1, column=0, absolute_indent="", is_parenthesized=False

71 )

72 )

73 indents: List[str] = field(default_factory=lambda: [""])

74 parenthesis_or_fstring_stack: List[_ParenthesisOrFStringStackEntry] = field(

75 default_factory=list

76 )

79def tokenize(code: str, version_info: PythonVersionInfo) -> Iterator[Token]:

80 try:

81 from libcst_native import tokenize as native_tokenize

83 return native_tokenize.tokenize(code)

84 except ImportError:

85 lines = split_lines(code, keepends=True)

86 return tokenize_lines(code, lines, version_info)

89def tokenize_lines(

90 code: str, lines: Sequence[str], version_info: PythonVersionInfo

91) -> Iterator[Token]:

92 try:

93 from libcst_native import tokenize as native_tokenize

95 # TODO: pass through version_info

96 return native_tokenize.tokenize(code)

97 except ImportError:

98 return tokenize_lines_py(code, lines, version_info)

100

101def tokenize_lines_py(

102 code: str, lines: Sequence[str], version_info: PythonVersionInfo

103) -> Generator[Token, None, None]:

104 state = _TokenizeState(lines)

105 orig_tokens_iter = iter(orig_tokenize_lines(lines, version_info))

106

107 # Iterate over the tokens and pass them to _convert_token, providing a one-token

108 # lookahead, to enable proper indent handling.

109 try:

110 curr_token = next(orig_tokens_iter)

111 except StopIteration:

112 pass # empty file

113 else:

114 for next_token in orig_tokens_iter:

115 yield _convert_token(state, curr_token, next_token)

116 curr_token = next_token

117 yield _convert_token(state, curr_token, None)

118

119

120def _convert_token( # noqa: C901: too complex

121 state: _TokenizeState, curr_token: OrigToken, next_token: Optional[OrigToken]

122) -> Token:

123 ct_type = curr_token.type

124 ct_string = curr_token.string

125 ct_start_pos = curr_token.start_pos

126 if ct_type is _ERRORTOKEN:

127 raise ParserSyntaxError(

128 f"{ct_string!r} is not a valid token.",

129 lines=state.lines,

130 raw_line=ct_start_pos[0],

131 raw_column=ct_start_pos[1],

132 )

133 if ct_type is _ERROR_DEDENT:

134 raise ParserSyntaxError(

135 "Inconsistent indentation. Expected a dedent.",

136 lines=state.lines,

137 raw_line=ct_start_pos[0],

138 raw_column=ct_start_pos[1],

139 )

140

141 # Compute relative indent changes for indent/dedent nodes

142 relative_indent: Optional[str] = None

143 if ct_type is _INDENT:

144 old_indent = "" if len(state.indents) < 2 else state.indents[-2]

145 new_indent = state.indents[-1]

146 relative_indent = new_indent[len(old_indent) :]

147

148 if next_token is not None:

149 nt_type = next_token.type

150 if nt_type is _INDENT:

151 nt_line, nt_column = next_token.start_pos

152 state.indents.append(state.lines[nt_line - 1][:nt_column])

153 elif nt_type is _DEDENT:

154 state.indents.pop()

155

156 whitespace_before = state.previous_whitespace_state

157

158 if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER:

159 # Don't update whitespace state for these dummy tokens. This makes it possible

160 # to partially parse whitespace for IndentedBlock footers, and then parse the

161 # rest of the whitespace in the following statement's leading_lines.

162 # Unfortunately, that means that the indentation is either wrong for the footer

163 # comments, or for the next line. We've chosen to allow it to be wrong for the

164 # IndentedBlock footer and manually override the state when parsing whitespace

165 # in that particular node.

166 whitespace_after = whitespace_before

167 ct_end_pos = ct_start_pos

168 else:

169 # Not a dummy token, so update the whitespace state.

170

171 # Compute our own end_pos, since parso's end_pos is wrong for triple-strings.

172 lines = split_lines(ct_string)

173 if len(lines) > 1:

174 ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1])

175 else:

176 ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string))

177

178 # Figure out what mode the whitespace parser should use. If we're inside

179 # parentheses, certain whitespace (e.g. newlines) are allowed where they would

180 # otherwise not be. f-strings override and disable this behavior, however.

181 #

182 # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to

183 # duplicate that logic here.

184

185 pof_stack = state.parenthesis_or_fstring_stack

186 try:

187 if ct_type is _FSTRING_START:

188 pof_stack.append(_FSTRING_STACK_ENTRY)

189 elif ct_type is _FSTRING_END:

190 pof_stack.pop()

191 elif ct_type is _OP:

192 if ct_string in "([{":

193 pof_stack.append(_PARENTHESIS_STACK_ENTRY)

194 elif ct_string in ")]}":

195 pof_stack.pop()

196 except IndexError:

197 # pof_stack may be empty by the time we need to read from it due to

198 # mismatched braces.

199 raise ParserSyntaxError(

200 "Encountered a closing brace without a matching opening brace.",

201 lines=state.lines,

202 raw_line=ct_start_pos[0],

203 raw_column=ct_start_pos[1],

204 )

205 is_parenthesized = (

206 len(pof_stack) > 0 and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY

207 )

208

209 whitespace_after = WhitespaceState(

210 ct_end_pos[0], ct_end_pos[1], state.indents[-1], is_parenthesized

211 )

212

213 # Hold onto whitespace_after, so we can use it as whitespace_before in the next

214 # node.

215 state.previous_whitespace_state = whitespace_after

216

217 return Token(

218 ct_type,

219 ct_string,

220 ct_start_pos,

221 ct_end_pos,

222 whitespace_before,

223 whitespace_after,

224 relative_indent,

225 )

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/wrapped_tokenize.py: 35%

100 statements