/src/llvm-project/clang/lib/Format/FormatToken.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- FormatToken.cpp - Format C++ code --------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements specific functions of \c FormatTokens and their |
11 | | /// roles. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "FormatToken.h" |
16 | | #include "ContinuationIndenter.h" |
17 | | #include "llvm/ADT/SmallVector.h" |
18 | | #include "llvm/Support/Debug.h" |
19 | | #include <climits> |
20 | | |
21 | | namespace clang { |
22 | | namespace format { |
23 | | |
24 | 0 | const char *getTokenTypeName(TokenType Type) { |
25 | 0 | static const char *const TokNames[] = { |
26 | 0 | #define TYPE(X) #X, |
27 | 0 | LIST_TOKEN_TYPES |
28 | 0 | #undef TYPE |
29 | 0 | nullptr}; |
30 | |
|
31 | 0 | if (Type < NUM_TOKEN_TYPES) |
32 | 0 | return TokNames[Type]; |
33 | 0 | llvm_unreachable("unknown TokenType"); |
34 | 0 | return nullptr; |
35 | 0 | } |
36 | | |
37 | | // FIXME: This is copy&pasted from Sema. Put it in a common place and remove |
38 | | // duplication. |
39 | 16.9M | bool FormatToken::isSimpleTypeSpecifier() const { |
40 | 16.9M | switch (Tok.getKind()) { |
41 | 0 | case tok::kw_short: |
42 | 39 | case tok::kw_long: |
43 | 39 | case tok::kw___int64: |
44 | 39 | case tok::kw___int128: |
45 | 40 | case tok::kw_signed: |
46 | 592 | case tok::kw_unsigned: |
47 | 2.54k | case tok::kw_void: |
48 | 3.86k | case tok::kw_char: |
49 | 8.88k | case tok::kw_int: |
50 | 8.88k | case tok::kw_half: |
51 | 8.94k | case tok::kw_float: |
52 | 8.94k | case tok::kw_double: |
53 | 8.94k | case tok::kw___bf16: |
54 | 8.94k | case tok::kw__Float16: |
55 | 8.94k | case tok::kw___float128: |
56 | 8.94k | case tok::kw___ibm128: |
57 | 8.94k | case tok::kw_wchar_t: |
58 | 8.99k | case tok::kw_bool: |
59 | 143k | #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: |
60 | 143k | #include "clang/Basic/TransformTypeTraits.def" |
61 | 143k | case tok::annot_typename: |
62 | 8.99k | case tok::kw_char8_t: |
63 | 8.99k | case tok::kw_char16_t: |
64 | 8.99k | case tok::kw_char32_t: |
65 | 9.01k | case tok::kw_typeof: |
66 | 9.01k | case tok::kw_decltype: |
67 | 9.01k | case tok::kw__Atomic: |
68 | 9.01k | return true; |
69 | 16.9M | default: |
70 | 16.9M | return false; |
71 | 16.9M | } |
72 | 16.9M | } |
73 | | |
74 | 22.5k | bool FormatToken::isTypeOrIdentifier() const { |
75 | 22.5k | return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier); |
76 | 22.5k | } |
77 | | |
78 | 50.3k | bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { |
79 | 50.3k | assert(is(tok::r_brace)); |
80 | 50.3k | if (!Style.Cpp11BracedListStyle || |
81 | 50.3k | Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { |
82 | 50.3k | return false; |
83 | 50.3k | } |
84 | 0 | const auto *LBrace = MatchingParen; |
85 | 0 | assert(LBrace && LBrace->is(tok::l_brace)); |
86 | 0 | if (LBrace->is(BK_BracedInit)) |
87 | 0 | return true; |
88 | 0 | if (LBrace->Previous && LBrace->Previous->is(tok::equal)) |
89 | 0 | return true; |
90 | 0 | return false; |
91 | 0 | } |
92 | | |
93 | 18.2M | bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { |
94 | | // C# Does not indent object initialisers as continuations. |
95 | 18.2M | if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) |
96 | 0 | return true; |
97 | 18.2M | if (is(TT_TemplateString) && opensScope()) |
98 | 0 | return true; |
99 | 18.2M | return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || |
100 | 18.2M | (is(tok::l_brace) && |
101 | 18.2M | (getBlockKind() == BK_Block || is(TT_DictLiteral) || |
102 | 608k | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || |
103 | 18.2M | (is(tok::less) && Style.isProto()); |
104 | 18.2M | } |
105 | | |
106 | 83.8k | TokenRole::~TokenRole() {} |
107 | | |
108 | 0 | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} |
109 | | |
110 | | unsigned CommaSeparatedList::formatAfterToken(LineState &State, |
111 | | ContinuationIndenter *Indenter, |
112 | 25.2k | bool DryRun) { |
113 | 25.2k | if (!State.NextToken || !State.NextToken->Previous) |
114 | 0 | return 0; |
115 | | |
116 | 25.2k | if (Formats.size() <= 1) |
117 | 25.1k | return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). |
118 | | |
119 | | // Ensure that we start on the opening brace. |
120 | 53 | const FormatToken *LBrace = |
121 | 53 | State.NextToken->Previous->getPreviousNonComment(); |
122 | 53 | if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || |
123 | 53 | LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || |
124 | 53 | LBrace->Next->is(TT_DesignatedInitializerPeriod)) { |
125 | 11 | return 0; |
126 | 11 | } |
127 | | |
128 | | // Calculate the number of code points we have to format this list. As the |
129 | | // first token is already placed, we have to subtract it. |
130 | 42 | unsigned RemainingCodePoints = |
131 | 42 | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; |
132 | | |
133 | | // Find the best ColumnFormat, i.e. the best number of columns to use. |
134 | 42 | const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); |
135 | | |
136 | | // If no ColumnFormat can be used, the braced list would generally be |
137 | | // bin-packed. Add a severe penalty to this so that column layouts are |
138 | | // preferred if possible. |
139 | 42 | if (!Format) |
140 | 0 | return 10000; |
141 | | |
142 | | // Format the entire list. |
143 | 42 | unsigned Penalty = 0; |
144 | 42 | unsigned Column = 0; |
145 | 42 | unsigned Item = 0; |
146 | 19.6k | while (State.NextToken != LBrace->MatchingParen) { |
147 | 19.6k | bool NewLine = false; |
148 | 19.6k | unsigned ExtraSpaces = 0; |
149 | | |
150 | | // If the previous token was one of our commas, we are now on the next item. |
151 | 19.6k | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { |
152 | 9.74k | if (!State.NextToken->isTrailingComment()) { |
153 | 9.74k | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; |
154 | 9.74k | ++Column; |
155 | 9.74k | } |
156 | 9.74k | ++Item; |
157 | 9.74k | } |
158 | | |
159 | 19.6k | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { |
160 | 1.31k | Column = 0; |
161 | 1.31k | NewLine = true; |
162 | 1.31k | } |
163 | | |
164 | | // Place token using the continuation indenter and store the penalty. |
165 | 19.6k | Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); |
166 | 19.6k | } |
167 | 42 | return Penalty; |
168 | 42 | } |
169 | | |
170 | | unsigned CommaSeparatedList::formatFromToken(LineState &State, |
171 | | ContinuationIndenter *Indenter, |
172 | 21.4k | bool DryRun) { |
173 | | // Formatting with 1 Column isn't really a column layout, so we don't need the |
174 | | // special logic here. We can just avoid bin packing any of the parameters. |
175 | 21.4k | if (Formats.size() == 1 || HasNestedBracedList) |
176 | 117 | State.Stack.back().AvoidBinPacking = true; |
177 | 21.4k | return 0; |
178 | 21.4k | } |
179 | | |
180 | | // Returns the lengths in code points between Begin and End (both included), |
181 | | // assuming that the entire sequence is put on a single line. |
182 | | static unsigned CodePointsBetween(const FormatToken *Begin, |
183 | 291k | const FormatToken *End) { |
184 | 291k | assert(End->TotalLength >= Begin->TotalLength); |
185 | 0 | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; |
186 | 291k | } |
187 | | |
188 | 21.2k | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { |
189 | | // FIXME: At some point we might want to do this for other lists, too. |
190 | 21.2k | if (!Token->MatchingParen || |
191 | 21.2k | !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { |
192 | 14.8k | return; |
193 | 14.8k | } |
194 | | |
195 | | // In C++11 braced list style, we should not format in columns unless they |
196 | | // have many items (20 or more) or we allow bin-packing of function call |
197 | | // arguments. |
198 | 6.37k | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && |
199 | 6.37k | Commas.size() < 19) { |
200 | 0 | return; |
201 | 0 | } |
202 | | |
203 | | // Limit column layout for JavaScript array initializers to 20 or more items |
204 | | // for now to introduce it carefully. We can become more aggressive if this |
205 | | // necessary. |
206 | 6.37k | if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) |
207 | 254 | return; |
208 | | |
209 | | // Column format doesn't really make sense if we don't align after brackets. |
210 | 6.12k | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) |
211 | 0 | return; |
212 | | |
213 | 6.12k | FormatToken *ItemBegin = Token->Next; |
214 | 6.12k | while (ItemBegin->isTrailingComment()) |
215 | 0 | ItemBegin = ItemBegin->Next; |
216 | 6.12k | SmallVector<bool, 8> MustBreakBeforeItem; |
217 | | |
218 | | // The lengths of an item if it is put at the end of the line. This includes |
219 | | // trailing comments which are otherwise ignored for column alignment. |
220 | 6.12k | SmallVector<unsigned, 8> EndOfLineItemLength; |
221 | 6.12k | MustBreakBeforeItem.reserve(Commas.size() + 1); |
222 | 6.12k | EndOfLineItemLength.reserve(Commas.size() + 1); |
223 | 6.12k | ItemLengths.reserve(Commas.size() + 1); |
224 | | |
225 | 6.12k | bool HasSeparatingComment = false; |
226 | 151k | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { |
227 | 145k | assert(ItemBegin); |
228 | | // Skip comments on their own line. |
229 | 145k | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { |
230 | 0 | ItemBegin = ItemBegin->Next; |
231 | 0 | HasSeparatingComment = i > 0; |
232 | 0 | } |
233 | | |
234 | 145k | MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); |
235 | 145k | if (ItemBegin->is(tok::l_brace)) |
236 | 25.5k | HasNestedBracedList = true; |
237 | 145k | const FormatToken *ItemEnd = nullptr; |
238 | 145k | if (i == Commas.size()) { |
239 | 6.11k | ItemEnd = Token->MatchingParen; |
240 | 6.11k | const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); |
241 | 6.11k | ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); |
242 | 6.11k | if (Style.Cpp11BracedListStyle && |
243 | 6.11k | !ItemEnd->Previous->isTrailingComment()) { |
244 | | // In Cpp11 braced list style, the } and possibly other subsequent |
245 | | // tokens will need to stay on a line with the last element. |
246 | 20.5k | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) |
247 | 14.4k | ItemEnd = ItemEnd->Next; |
248 | 6.11k | } else { |
249 | | // In other braced lists styles, the "}" can be wrapped to the new line. |
250 | 0 | ItemEnd = Token->MatchingParen->Previous; |
251 | 0 | } |
252 | 139k | } else { |
253 | 139k | ItemEnd = Commas[i]; |
254 | | // The comma is counted as part of the item when calculating the length. |
255 | 139k | ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); |
256 | | |
257 | | // Consume trailing comments so the are included in EndOfLineItemLength. |
258 | 139k | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && |
259 | 139k | ItemEnd->Next->isTrailingComment()) { |
260 | 0 | ItemEnd = ItemEnd->Next; |
261 | 0 | } |
262 | 139k | } |
263 | 145k | EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); |
264 | | // If there is a trailing comma in the list, the next item will start at the |
265 | | // closing brace. Don't create an extra item for this. |
266 | 145k | if (ItemEnd->getNextNonComment() == Token->MatchingParen) |
267 | 12 | break; |
268 | 145k | ItemBegin = ItemEnd->Next; |
269 | 145k | } |
270 | | |
271 | | // Don't use column layout for lists with few elements and in presence of |
272 | | // separating comments. |
273 | 6.12k | if (Commas.size() < 5 || HasSeparatingComment) |
274 | 5.52k | return; |
275 | | |
276 | 599 | if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) |
277 | 17 | return; |
278 | | |
279 | | // We can never place more than ColumnLimit / 3 items in a row (because of the |
280 | | // spaces and the comma). |
281 | 582 | unsigned MaxItems = Style.ColumnLimit / 3; |
282 | 582 | SmallVector<unsigned> MinSizeInColumn; |
283 | 582 | MinSizeInColumn.reserve(MaxItems); |
284 | 11.6k | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { |
285 | 11.1k | ColumnFormat Format; |
286 | 11.1k | Format.Columns = Columns; |
287 | 11.1k | Format.ColumnSizes.resize(Columns); |
288 | 11.1k | MinSizeInColumn.assign(Columns, UINT_MAX); |
289 | 11.1k | Format.LineCount = 1; |
290 | 11.1k | bool HasRowWithSufficientColumns = false; |
291 | 11.1k | unsigned Column = 0; |
292 | 2.69M | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { |
293 | 2.68M | assert(i < MustBreakBeforeItem.size()); |
294 | 2.68M | if (MustBreakBeforeItem[i] || Column == Columns) { |
295 | 477k | ++Format.LineCount; |
296 | 477k | Column = 0; |
297 | 477k | } |
298 | 2.68M | if (Column == Columns - 1) |
299 | 478k | HasRowWithSufficientColumns = true; |
300 | 2.68M | unsigned Length = |
301 | 2.68M | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; |
302 | 2.68M | Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); |
303 | 2.68M | MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); |
304 | 2.68M | ++Column; |
305 | 2.68M | } |
306 | | // If all rows are terminated early (e.g. by trailing comments), we don't |
307 | | // need to look further. |
308 | 11.1k | if (!HasRowWithSufficientColumns) |
309 | 45 | break; |
310 | 11.1k | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. |
311 | | |
312 | 125k | for (unsigned i = 0; i < Columns; ++i) |
313 | 114k | Format.TotalWidth += Format.ColumnSizes[i]; |
314 | | |
315 | | // Don't use this Format, if the difference between the longest and shortest |
316 | | // element in a column exceeds a threshold to avoid excessive spaces. |
317 | 11.1k | if ([&] { |
318 | 83.4k | for (unsigned i = 0; i < Columns - 1; ++i) |
319 | 77.3k | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) |
320 | 5.03k | return true; |
321 | 6.07k | return false; |
322 | 11.1k | }()) { |
323 | 5.03k | continue; |
324 | 5.03k | } |
325 | | |
326 | | // Ignore layouts that are bound to violate the column limit. |
327 | 6.07k | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) |
328 | 392 | continue; |
329 | | |
330 | 5.68k | Formats.push_back(Format); |
331 | 5.68k | } |
332 | 582 | } |
333 | | |
334 | | const CommaSeparatedList::ColumnFormat * |
335 | 42 | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { |
336 | 42 | const ColumnFormat *BestFormat = nullptr; |
337 | 309 | for (const ColumnFormat &Format : llvm::reverse(Formats)) { |
338 | 309 | if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { |
339 | 95 | if (BestFormat && Format.LineCount > BestFormat->LineCount) |
340 | 41 | break; |
341 | 54 | BestFormat = &Format; |
342 | 54 | } |
343 | 309 | } |
344 | 42 | return BestFormat; |
345 | 42 | } |
346 | | |
347 | | } // namespace format |
348 | | } // namespace clang |