Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/entrypoints.py: 61%

49 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright (c) Meta Platforms, Inc. and affiliates. 

2# 

3# This source code is licensed under the MIT license found in the 

4# LICENSE file in the root directory of this source tree. 

5 

6""" 

7Parser entrypoints define the way users of our API are allowed to interact with the 

8parser. A parser entrypoint should take the source code and some configuration 

9information 

10""" 

11 

12import os 

13from functools import partial 

14from typing import Union 

15 

16from libcst._nodes.base import CSTNode 

17from libcst._nodes.expression import BaseExpression 

18from libcst._nodes.module import Module 

19from libcst._nodes.statement import BaseCompoundStatement, SimpleStatementLine 

20from libcst._parser.detect_config import convert_to_utf8, detect_config 

21from libcst._parser.grammar import get_grammar, validate_grammar 

22from libcst._parser.python_parser import PythonCSTParser 

23from libcst._parser.types.config import PartialParserConfig 

24 

25_DEFAULT_PARTIAL_PARSER_CONFIG: PartialParserConfig = PartialParserConfig() 

26 

27 

28def is_native() -> bool: 

29 typ = os.environ.get("LIBCST_PARSER_TYPE") 

30 return typ != "pure" 

31 

32 

33def _parse( 

34 entrypoint: str, 

35 source: Union[str, bytes], 

36 config: PartialParserConfig, 

37 *, 

38 detect_trailing_newline: bool, 

39 detect_default_newline: bool, 

40) -> CSTNode: 

41 if is_native(): 

42 from libcst.native import parse_expression, parse_module, parse_statement 

43 

44 encoding, source_str = convert_to_utf8(source, partial=config) 

45 

46 if entrypoint == "file_input": 

47 parse = partial(parse_module, encoding=encoding) 

48 elif entrypoint == "stmt_input": 

49 parse = parse_statement 

50 elif entrypoint == "expression_input": 

51 parse = parse_expression 

52 else: 

53 raise ValueError(f"Unknown parser entry point: {entrypoint}") 

54 

55 return parse(source_str) 

56 return _pure_python_parse( 

57 entrypoint, 

58 source, 

59 config, 

60 detect_trailing_newline=detect_trailing_newline, 

61 detect_default_newline=detect_default_newline, 

62 ) 

63 

64 

65def _pure_python_parse( 

66 entrypoint: str, 

67 source: Union[str, bytes], 

68 config: PartialParserConfig, 

69 *, 

70 detect_trailing_newline: bool, 

71 detect_default_newline: bool, 

72) -> CSTNode: 

73 detection_result = detect_config( 

74 source, 

75 partial=config, 

76 detect_trailing_newline=detect_trailing_newline, 

77 detect_default_newline=detect_default_newline, 

78 ) 

79 validate_grammar() 

80 grammar = get_grammar(config.parsed_python_version, config.future_imports) 

81 

82 parser = PythonCSTParser( 

83 tokens=detection_result.tokens, 

84 config=detection_result.config, 

85 pgen_grammar=grammar, 

86 start_nonterminal=entrypoint, 

87 ) 

88 # The parser has an Any return type, we can at least refine it to CSTNode here. 

89 result = parser.parse() 

90 assert isinstance(result, CSTNode) 

91 return result 

92 

93 

94def parse_module( 

95 source: Union[str, bytes], # the only entrypoint that accepts bytes 

96 config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG, 

97) -> Module: 

98 """ 

99 Accepts an entire python module, including all leading and trailing whitespace. 

100 

101 If source is ``bytes``, the encoding will be inferred and preserved. If 

102 the source is a ``string``, we will default to assuming UTF-8 encoding if the 

103 module is rendered back out to source as bytes. It is recommended that when 

104 calling :func:`~libcst.parse_module` with a string you access the serialized 

105 code using :class:`~libcst.Module`'s code attribute, and when calling it with 

106 bytes you access the serialized code using :class:`~libcst.Module`'s bytes 

107 attribute. 

108 """ 

109 result = _parse( 

110 "file_input", 

111 source, 

112 config, 

113 detect_trailing_newline=True, 

114 detect_default_newline=True, 

115 ) 

116 assert isinstance(result, Module) 

117 return result 

118 

119 

120def parse_statement( 

121 source: str, config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG 

122) -> Union[SimpleStatementLine, BaseCompoundStatement]: 

123 """ 

124 Accepts a statement followed by a trailing newline. If a trailing newline is not 

125 provided, one will be added. :func:`parse_statement` is provided mainly as a 

126 convenience function to generate semi-complex trees from code snippetes. If you 

127 need to represent a statement exactly, including all leading/trailing comments, 

128 you should instead use :func:`parse_module`. 

129 

130 Leading comments and trailing comments (on the same line) are accepted, but 

131 whitespace (or anything else) after the statement's trailing newline is not valid 

132 (there's nowhere to store it on the statement node). Note that since there is 

133 nowhere to store leading and trailing comments/empty lines, code rendered out 

134 from a parsed statement using ``cst.Module([]).code_for_node(statement)`` will 

135 not include leading/trailing comments. 

136 """ 

137 # use detect_trailing_newline to insert a newline 

138 result = _parse( 

139 "stmt_input", 

140 source, 

141 config, 

142 detect_trailing_newline=True, 

143 detect_default_newline=False, 

144 ) 

145 assert isinstance(result, (SimpleStatementLine, BaseCompoundStatement)) 

146 return result 

147 

148 

149def parse_expression( 

150 source: str, config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG 

151) -> BaseExpression: 

152 """ 

153 Accepts an expression on a single line. Leading and trailing whitespace is not 

154 valid (there's nowhere to store it on the expression node). 

155 :func:`parse_expression` is provided mainly as a convenience function to generate 

156 semi-complex trees from code snippets. If you need to represent an expression 

157 exactly, including all leading/trailing comments, you should instead use 

158 :func:`parse_module`. 

159 """ 

160 result = _parse( 

161 "expression_input", 

162 source, 

163 config, 

164 detect_trailing_newline=False, 

165 detect_default_newline=False, 

166 ) 

167 assert isinstance(result, BaseExpression) 

168 return result