1# Copyright (c) Meta Platforms, Inc. and affiliates.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
5
6"""
7Parser entrypoints define the way users of our API are allowed to interact with the
8parser. A parser entrypoint should take the source code and some configuration
9information
10"""
11
12from functools import partial
13from typing import Union
14
15from libcst._nodes.base import CSTNode
16from libcst._nodes.expression import BaseExpression
17from libcst._nodes.module import Module
18from libcst._nodes.statement import BaseCompoundStatement, SimpleStatementLine
19from libcst._parser.detect_config import convert_to_utf8
20from libcst._parser.types.config import PartialParserConfig
21
22_DEFAULT_PARTIAL_PARSER_CONFIG: PartialParserConfig = PartialParserConfig()
23
24
25def _parse(
26 entrypoint: str,
27 source: Union[str, bytes],
28 config: PartialParserConfig,
29 *,
30 detect_trailing_newline: bool,
31 detect_default_newline: bool,
32) -> CSTNode:
33
34 encoding, source_str = convert_to_utf8(source, partial=config)
35
36 from libcst import native
37
38 if entrypoint == "file_input":
39 parse = partial(native.parse_module, encoding=encoding)
40 elif entrypoint == "stmt_input":
41 parse = native.parse_statement
42 elif entrypoint == "expression_input":
43 parse = native.parse_expression
44 else:
45 raise ValueError(f"Unknown parser entry point: {entrypoint}")
46
47 return parse(source_str)
48
49
50def parse_module(
51 source: Union[str, bytes], # the only entrypoint that accepts bytes
52 config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG,
53) -> Module:
54 """
55 Accepts an entire python module, including all leading and trailing whitespace.
56
57 If source is ``bytes``, the encoding will be inferred and preserved. If
58 the source is a ``string``, we will default to assuming UTF-8 encoding if the
59 module is rendered back out to source as bytes. It is recommended that when
60 calling :func:`~libcst.parse_module` with a string you access the serialized
61 code using :class:`~libcst.Module`'s code attribute, and when calling it with
62 bytes you access the serialized code using :class:`~libcst.Module`'s bytes
63 attribute.
64 """
65 result = _parse(
66 "file_input",
67 source,
68 config,
69 detect_trailing_newline=True,
70 detect_default_newline=True,
71 )
72 assert isinstance(result, Module)
73 return result
74
75
76def parse_statement(
77 source: str, config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG
78) -> Union[SimpleStatementLine, BaseCompoundStatement]:
79 """
80 Accepts a statement followed by a trailing newline. If a trailing newline is not
81 provided, one will be added. :func:`parse_statement` is provided mainly as a
82 convenience function to generate semi-complex trees from code snippetes. If you
83 need to represent a statement exactly, including all leading/trailing comments,
84 you should instead use :func:`parse_module`.
85
86 Leading comments and trailing comments (on the same line) are accepted, but
87 whitespace (or anything else) after the statement's trailing newline is not valid
88 (there's nowhere to store it on the statement node). Note that since there is
89 nowhere to store leading and trailing comments/empty lines, code rendered out
90 from a parsed statement using ``cst.Module([]).code_for_node(statement)`` will
91 not include leading/trailing comments.
92 """
93 # use detect_trailing_newline to insert a newline
94 result = _parse(
95 "stmt_input",
96 source,
97 config,
98 detect_trailing_newline=True,
99 detect_default_newline=False,
100 )
101 assert isinstance(result, (SimpleStatementLine, BaseCompoundStatement))
102 return result
103
104
105def parse_expression(
106 source: str, config: PartialParserConfig = _DEFAULT_PARTIAL_PARSER_CONFIG
107) -> BaseExpression:
108 """
109 Accepts an expression on a single line. Leading and trailing whitespace is not
110 valid (there's nowhere to store it on the expression node).
111 :func:`parse_expression` is provided mainly as a convenience function to generate
112 semi-complex trees from code snippets. If you need to represent an expression
113 exactly, including all leading/trailing comments, you should instead use
114 :func:`parse_module`.
115 """
116 result = _parse(
117 "expression_input",
118 source,
119 config,
120 detect_trailing_newline=False,
121 detect_default_newline=False,
122 )
123 assert isinstance(result, BaseExpression)
124 return result