/src/llvm-project/clang/lib/Format/FormatTokenLexer.h
Line | Count | Source |
1 | | //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains FormatTokenLexer, which tokenizes a source file |
11 | | /// into a token stream suitable for ClangFormat. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H |
17 | | |
18 | | #include "Encoding.h" |
19 | | #include "FormatToken.h" |
20 | | #include "clang/Basic/LangOptions.h" |
21 | | #include "clang/Basic/SourceLocation.h" |
22 | | #include "clang/Basic/SourceManager.h" |
23 | | #include "clang/Format/Format.h" |
24 | | #include "llvm/ADT/MapVector.h" |
25 | | #include "llvm/ADT/SmallPtrSet.h" |
26 | | #include "llvm/ADT/StringSet.h" |
27 | | #include "llvm/Support/Regex.h" |
28 | | |
29 | | #include <stack> |
30 | | |
31 | | namespace clang { |
32 | | namespace format { |
33 | | |
34 | | enum LexerState { |
35 | | NORMAL, |
36 | | TEMPLATE_STRING, |
37 | | TOKEN_STASHED, |
38 | | }; |
39 | | |
40 | | class FormatTokenLexer { |
41 | | public: |
42 | | FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, |
43 | | const FormatStyle &Style, encoding::Encoding Encoding, |
44 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
45 | | IdentifierTable &IdentTable); |
46 | | |
47 | | ArrayRef<FormatToken *> lex(); |
48 | | |
49 | 2.76k | const AdditionalKeywords &getKeywords() { return Keywords; } |
50 | | |
51 | | private: |
52 | | void tryMergePreviousTokens(); |
53 | | |
54 | | bool tryMergeLessLess(); |
55 | | bool tryMergeGreaterGreater(); |
56 | | bool tryMergeNSStringLiteral(); |
57 | | bool tryMergeJSPrivateIdentifier(); |
58 | | bool tryMergeCSharpStringLiteral(); |
59 | | bool tryMergeCSharpKeywordVariables(); |
60 | | bool tryMergeNullishCoalescingEqual(); |
61 | | bool tryTransformCSharpForEach(); |
62 | | bool tryMergeForEach(); |
63 | | bool tryTransformTryUsageForC(); |
64 | | |
65 | | // Merge the most recently lexed tokens into a single token if their kinds are |
66 | | // correct. |
67 | | bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); |
68 | | // Merge without checking their kinds. |
69 | | bool tryMergeTokens(size_t Count, TokenType NewType); |
70 | | // Merge if their kinds match any one of Kinds. |
71 | | bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds, |
72 | | TokenType NewType); |
73 | | |
74 | | // Returns \c true if \p Tok can only be followed by an operand in JavaScript. |
75 | | bool precedesOperand(FormatToken *Tok); |
76 | | |
77 | | bool canPrecedeRegexLiteral(FormatToken *Prev); |
78 | | |
79 | | // Tries to parse a JavaScript Regex literal starting at the current token, |
80 | | // if that begins with a slash and is in a location where JavaScript allows |
81 | | // regex literals. Changes the current token to a regex literal and updates |
82 | | // its text if successful. |
83 | | void tryParseJSRegexLiteral(); |
84 | | |
85 | | // Handles JavaScript template strings. |
86 | | // |
87 | | // JavaScript template strings use backticks ('`') as delimiters, and allow |
88 | | // embedding expressions nested in ${expr-here}. Template strings can be |
89 | | // nested recursively, i.e. expressions can contain template strings in turn. |
90 | | // |
91 | | // The code below parses starting from a backtick, up to a closing backtick or |
92 | | // an opening ${. It also maintains a stack of lexing contexts to handle |
93 | | // nested template parts by balancing curly braces. |
94 | | void handleTemplateStrings(); |
95 | | |
96 | | void handleCSharpVerbatimAndInterpolatedStrings(); |
97 | | |
98 | | void tryParsePythonComment(); |
99 | | |
100 | | bool tryMerge_TMacro(); |
101 | | |
102 | | bool tryMergeConflictMarkers(); |
103 | | |
104 | | void truncateToken(size_t NewLen); |
105 | | |
106 | | FormatToken *getStashedToken(); |
107 | | |
108 | | FormatToken *getNextToken(); |
109 | | |
110 | | FormatToken *FormatTok; |
111 | | bool IsFirstToken; |
112 | | std::stack<LexerState> StateStack; |
113 | | unsigned Column; |
114 | | unsigned TrailingWhitespace; |
115 | | std::unique_ptr<Lexer> Lex; |
116 | | LangOptions LangOpts; |
117 | | const SourceManager &SourceMgr; |
118 | | FileID ID; |
119 | | const FormatStyle &Style; |
120 | | IdentifierTable &IdentTable; |
121 | | AdditionalKeywords Keywords; |
122 | | encoding::Encoding Encoding; |
123 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; |
124 | | // Index (in 'Tokens') of the last token that starts a new line. |
125 | | unsigned FirstInLineIndex; |
126 | | SmallVector<FormatToken *, 16> Tokens; |
127 | | |
128 | | llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; |
129 | | |
130 | | llvm::SmallPtrSet<IdentifierInfo *, 8> TypeNames; |
131 | | |
132 | | bool FormattingDisabled; |
133 | | |
134 | | llvm::Regex MacroBlockBeginRegex; |
135 | | llvm::Regex MacroBlockEndRegex; |
136 | | |
137 | | // Targets that may appear inside a C# attribute. |
138 | | static const llvm::StringSet<> CSharpAttributeTargets; |
139 | | |
140 | | /// Handle Verilog-specific tokens. |
141 | | bool readRawTokenVerilogSpecific(Token &Tok); |
142 | | |
143 | | void readRawToken(FormatToken &Tok); |
144 | | |
145 | | void resetLexer(unsigned Offset); |
146 | | }; |
147 | | |
148 | | } // namespace format |
149 | | } // namespace clang |
150 | | |
151 | | #endif |