/src/llvm-project/clang/lib/Format/TokenAnnotator.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements a token annotator, i.e. creates |
11 | | /// \c AnnotatedTokens out of \c FormatTokens with required extra information. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
17 | | |
18 | | #include "UnwrappedLineParser.h" |
19 | | #include "clang/Format/Format.h" |
20 | | |
21 | | namespace clang { |
22 | | namespace format { |
23 | | |
24 | | enum LineType { |
25 | | LT_Invalid, |
26 | | LT_ImportStatement, |
27 | | LT_ObjCDecl, // An @interface, @implementation, or @protocol line. |
28 | | LT_ObjCMethodDecl, |
29 | | LT_ObjCProperty, // An @property line. |
30 | | LT_Other, |
31 | | LT_PreprocessorDirective, |
32 | | LT_VirtualFunctionDecl, |
33 | | LT_ArrayOfStructInitializer, |
34 | | LT_CommentAbovePPDirective, |
35 | | }; |
36 | | |
37 | | enum ScopeType { |
38 | | // Contained in class declaration/definition. |
39 | | ST_Class, |
40 | | // Contained within function definition. |
41 | | ST_Function, |
42 | | // Contained within other scope block (loop, if/else, etc). |
43 | | ST_Other, |
44 | | }; |
45 | | |
46 | | class AnnotatedLine { |
47 | | public: |
48 | | AnnotatedLine(const UnwrappedLine &Line) |
49 | | : First(Line.Tokens.front().Tok), Level(Line.Level), |
50 | | PPLevel(Line.PPLevel), |
51 | | MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), |
52 | | MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), |
53 | | InPPDirective(Line.InPPDirective), |
54 | | InPragmaDirective(Line.InPragmaDirective), |
55 | | InMacroBody(Line.InMacroBody), |
56 | | MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), |
57 | | IsMultiVariableDeclStmt(false), Affected(false), |
58 | | LeadingEmptyLinesAffected(false), ChildrenAffected(false), |
59 | | ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), |
60 | 1.59M | FirstStartColumn(Line.FirstStartColumn) { |
61 | 1.59M | assert(!Line.Tokens.empty()); |
62 | | |
63 | | // Calculate Next and Previous for all tokens. Note that we must overwrite |
64 | | // Next and Previous for every token, as previous formatting runs might have |
65 | | // left them in a different state. |
66 | 0 | First->Previous = nullptr; |
67 | 1.59M | FormatToken *Current = First; |
68 | 1.59M | addChildren(Line.Tokens.front(), Current); |
69 | 50.6M | for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { |
70 | 50.6M | if (Node.Tok->MacroParent) |
71 | 0 | ContainsMacroCall = true; |
72 | 50.6M | Current->Next = Node.Tok; |
73 | 50.6M | Node.Tok->Previous = Current; |
74 | 50.6M | Current = Current->Next; |
75 | 50.6M | addChildren(Node, Current); |
76 | | // FIXME: if we add children, previous will point to the token before |
77 | | // the children; changing this requires significant changes across |
78 | | // clang-format. |
79 | 50.6M | } |
80 | 1.59M | Last = Current; |
81 | 1.59M | Last->Next = nullptr; |
82 | 1.59M | } |
83 | | |
84 | 52.2M | void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { |
85 | 52.2M | Current->Children.clear(); |
86 | 52.2M | for (const auto &Child : Node.Children) { |
87 | 270k | Children.push_back(new AnnotatedLine(Child)); |
88 | 270k | if (Children.back()->ContainsMacroCall) |
89 | 0 | ContainsMacroCall = true; |
90 | 270k | Current->Children.push_back(Children.back()); |
91 | 270k | } |
92 | 52.2M | } |
93 | | |
94 | 0 | size_t size() const { |
95 | 0 | size_t Size = 1; |
96 | 0 | for (const auto *Child : Children) |
97 | 0 | Size += Child->size(); |
98 | 0 | return Size; |
99 | 0 | } |
100 | | |
101 | 1.59M | ~AnnotatedLine() { |
102 | 1.59M | for (AnnotatedLine *Child : Children) |
103 | 270k | delete Child; |
104 | 1.59M | FormatToken *Current = First; |
105 | 53.8M | while (Current) { |
106 | 52.2M | Current->Children.clear(); |
107 | 52.2M | Current->Role.reset(); |
108 | 52.2M | Current = Current->Next; |
109 | 52.2M | } |
110 | 1.59M | } |
111 | | |
112 | 381k | bool isComment() const { |
113 | 381k | return First && First->is(tok::comment) && !First->getNextNonComment(); |
114 | 381k | } |
115 | | |
116 | | /// \c true if this line starts with the given tokens in order, ignoring |
117 | | /// comments. |
118 | 42.2M | template <typename... Ts> bool startsWith(Ts... Tokens) const { |
119 | 42.2M | return First && First->startsSequence(Tokens...); |
120 | 42.2M | } bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind>(clang::tok::TokenKind) const Line | Count | Source | 118 | 34.9M | template <typename... Ts> bool startsWith(Ts... Tokens) const { | 119 | 34.9M | return First && First->startsSequence(Tokens...); | 120 | 34.9M | } |
bool clang::format::AnnotatedLine::startsWith<clang::format::TokenType>(clang::format::TokenType) const Line | Count | Source | 118 | 6.89M | template <typename... Ts> bool startsWith(Ts... Tokens) const { | 119 | 6.89M | return First && First->startsSequence(Tokens...); | 120 | 6.89M | } |
bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind) const Line | Count | Source | 118 | 368k | template <typename... Ts> bool startsWith(Ts... Tokens) const { | 119 | 368k | return First && First->startsSequence(Tokens...); | 120 | 368k | } |
Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::IdentifierInfo*, clang::tok::TokenKind>(clang::IdentifierInfo*, clang::tok::TokenKind) const Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::IdentifierInfo*, clang::tok::TokenKind>(clang::tok::TokenKind, clang::IdentifierInfo*, clang::tok::TokenKind) const Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::IdentifierInfo*>(clang::IdentifierInfo*) const Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind) const |
121 | | |
122 | | /// \c true if this line ends with the given tokens in reversed order, |
123 | | /// ignoring comments. |
124 | | /// For example, given tokens [T1, T2, T3, ...], the function returns true if |
125 | | /// this line is like "... T3 T2 T1". |
126 | 7.42k | template <typename... Ts> bool endsWith(Ts... Tokens) const { |
127 | 7.42k | return Last && Last->endsSequence(Tokens...); |
128 | 7.42k | } bool clang::format::AnnotatedLine::endsWith<clang::tok::TokenKind>(clang::tok::TokenKind) const Line | Count | Source | 126 | 2.58k | template <typename... Ts> bool endsWith(Ts... Tokens) const { | 127 | 2.58k | return Last && Last->endsSequence(Tokens...); | 128 | 2.58k | } |
bool clang::format::AnnotatedLine::endsWith<clang::format::TokenType>(clang::format::TokenType) const Line | Count | Source | 126 | 4.13k | template <typename... Ts> bool endsWith(Ts... Tokens) const { | 127 | 4.13k | return Last && Last->endsSequence(Tokens...); | 128 | 4.13k | } |
bool clang::format::AnnotatedLine::endsWith<clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind) const Line | Count | Source | 126 | 712 | template <typename... Ts> bool endsWith(Ts... Tokens) const { | 127 | 712 | return Last && Last->endsSequence(Tokens...); | 128 | 712 | } |
|
129 | | |
130 | | /// \c true if this line looks like a function definition instead of a |
131 | | /// function declaration. Asserts MightBeFunctionDecl. |
132 | 406 | bool mightBeFunctionDefinition() const { |
133 | 406 | assert(MightBeFunctionDecl); |
134 | | // Try to determine if the end of a stream of tokens is either the |
135 | | // Definition or the Declaration for a function. It does this by looking for |
136 | | // the ';' in foo(); and using that it ends with a ; to know this is the |
137 | | // Definition, however the line could end with |
138 | | // foo(); /* comment */ |
139 | | // or |
140 | | // foo(); // comment |
141 | | // or |
142 | | // foo() // comment |
143 | | // endsWith() ignores the comment. |
144 | 0 | return !endsWith(tok::semi); |
145 | 406 | } |
146 | | |
147 | | /// \c true if this line starts a namespace definition. |
148 | 184k | bool startsWithNamespace() const { |
149 | 184k | return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || |
150 | 184k | startsWith(tok::kw_inline, tok::kw_namespace) || |
151 | 184k | startsWith(tok::kw_export, tok::kw_namespace); |
152 | 184k | } |
153 | | |
154 | 1.92M | FormatToken *getFirstNonComment() const { |
155 | 1.92M | assert(First); |
156 | 1.92M | return First->is(tok::comment) ? First->getNextNonComment() : First; |
157 | 1.92M | } |
158 | | |
159 | 321k | FormatToken *getLastNonComment() const { |
160 | 321k | assert(Last); |
161 | 321k | return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last; |
162 | 321k | } |
163 | | |
164 | | FormatToken *First; |
165 | | FormatToken *Last; |
166 | | |
167 | | SmallVector<AnnotatedLine *, 0> Children; |
168 | | |
169 | | LineType Type; |
170 | | unsigned Level; |
171 | | unsigned PPLevel; |
172 | | size_t MatchingOpeningBlockLineIndex; |
173 | | size_t MatchingClosingBlockLineIndex; |
174 | | bool InPPDirective; |
175 | | bool InPragmaDirective; |
176 | | bool InMacroBody; |
177 | | bool MustBeDeclaration; |
178 | | bool MightBeFunctionDecl; |
179 | | bool IsMultiVariableDeclStmt; |
180 | | |
181 | | /// \c True if this line contains a macro call for which an expansion exists. |
182 | | bool ContainsMacroCall = false; |
183 | | |
184 | | /// \c True if this line should be formatted, i.e. intersects directly or |
185 | | /// indirectly with one of the input ranges. |
186 | | bool Affected; |
187 | | |
188 | | /// \c True if the leading empty lines of this line intersect with one of the |
189 | | /// input ranges. |
190 | | bool LeadingEmptyLinesAffected; |
191 | | |
192 | | /// \c True if one of this line's children intersects with an input range. |
193 | | bool ChildrenAffected; |
194 | | |
195 | | /// \c True if breaking after last attribute group in function return type. |
196 | | bool ReturnTypeWrapped; |
197 | | |
198 | | /// \c True if this line should be indented by ContinuationIndent in addition |
199 | | /// to the normal indention level. |
200 | | bool IsContinuation; |
201 | | |
202 | | unsigned FirstStartColumn; |
203 | | |
204 | | private: |
205 | | // Disallow copying. |
206 | | AnnotatedLine(const AnnotatedLine &) = delete; |
207 | | void operator=(const AnnotatedLine &) = delete; |
208 | | }; |
209 | | |
210 | | /// Determines extra information about the tokens comprising an |
211 | | /// \c UnwrappedLine. |
212 | | class TokenAnnotator { |
213 | | public: |
214 | | TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) |
215 | 1.05k | : Style(Style), Keywords(Keywords) {} |
216 | | |
217 | | /// Adapts the indent levels of comment lines to the indent of the |
218 | | /// subsequent line. |
219 | | // FIXME: Can/should this be done in the UnwrappedLineParser? |
220 | | void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; |
221 | | |
222 | | void annotate(AnnotatedLine &Line); |
223 | | void calculateFormattingInformation(AnnotatedLine &Line) const; |
224 | | |
225 | | private: |
226 | | /// Calculate the penalty for splitting before \c Tok. |
227 | | unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, |
228 | | bool InFunctionDecl) const; |
229 | | |
230 | | bool spaceRequiredBeforeParens(const FormatToken &Right) const; |
231 | | |
232 | | bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, |
233 | | const FormatToken &Right) const; |
234 | | |
235 | | bool spaceRequiredBefore(const AnnotatedLine &Line, |
236 | | const FormatToken &Right) const; |
237 | | |
238 | | bool mustBreakBefore(const AnnotatedLine &Line, |
239 | | const FormatToken &Right) const; |
240 | | |
241 | | bool canBreakBefore(const AnnotatedLine &Line, |
242 | | const FormatToken &Right) const; |
243 | | |
244 | | bool mustBreakForReturnType(const AnnotatedLine &Line) const; |
245 | | |
246 | | void printDebugInfo(const AnnotatedLine &Line) const; |
247 | | |
248 | | void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; |
249 | | |
250 | | void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; |
251 | | |
252 | | FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, |
253 | | FormatToken *CurrentToken, |
254 | | unsigned Depth) const; |
255 | | FormatStyle::PointerAlignmentStyle |
256 | | getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; |
257 | | |
258 | | FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( |
259 | | const FormatToken &PointerOrReference) const; |
260 | | |
261 | | const FormatStyle &Style; |
262 | | |
263 | | const AdditionalKeywords &Keywords; |
264 | | |
265 | | SmallVector<ScopeType> Scopes; |
266 | | }; |
267 | | |
268 | | } // end namespace format |
269 | | } // end namespace clang |
270 | | |
271 | | #endif |