Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/Format/FormatToken.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements specific functions of \c FormatTokens and their
11
/// roles.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#include "FormatToken.h"
16
#include "ContinuationIndenter.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/Support/Debug.h"
19
#include <climits>
20
21
namespace clang {
22
namespace format {
23
24
0
const char *getTokenTypeName(TokenType Type) {
25
0
  static const char *const TokNames[] = {
26
0
#define TYPE(X) #X,
27
0
      LIST_TOKEN_TYPES
28
0
#undef TYPE
29
0
      nullptr};
30
31
0
  if (Type < NUM_TOKEN_TYPES)
32
0
    return TokNames[Type];
33
0
  llvm_unreachable("unknown TokenType");
34
0
  return nullptr;
35
0
}
36
37
// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38
// duplication.
39
16.9M
bool FormatToken::isSimpleTypeSpecifier() const {
40
16.9M
  switch (Tok.getKind()) {
41
0
  case tok::kw_short:
42
39
  case tok::kw_long:
43
39
  case tok::kw___int64:
44
39
  case tok::kw___int128:
45
40
  case tok::kw_signed:
46
592
  case tok::kw_unsigned:
47
2.54k
  case tok::kw_void:
48
3.86k
  case tok::kw_char:
49
8.88k
  case tok::kw_int:
50
8.88k
  case tok::kw_half:
51
8.94k
  case tok::kw_float:
52
8.94k
  case tok::kw_double:
53
8.94k
  case tok::kw___bf16:
54
8.94k
  case tok::kw__Float16:
55
8.94k
  case tok::kw___float128:
56
8.94k
  case tok::kw___ibm128:
57
8.94k
  case tok::kw_wchar_t:
58
8.99k
  case tok::kw_bool:
59
143k
#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
60
143k
#include "clang/Basic/TransformTypeTraits.def"
61
143k
  case tok::annot_typename:
62
8.99k
  case tok::kw_char8_t:
63
8.99k
  case tok::kw_char16_t:
64
8.99k
  case tok::kw_char32_t:
65
9.01k
  case tok::kw_typeof:
66
9.01k
  case tok::kw_decltype:
67
9.01k
  case tok::kw__Atomic:
68
9.01k
    return true;
69
16.9M
  default:
70
16.9M
    return false;
71
16.9M
  }
72
16.9M
}
73
74
22.5k
bool FormatToken::isTypeOrIdentifier() const {
75
22.5k
  return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier);
76
22.5k
}
77
78
50.3k
bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
79
50.3k
  assert(is(tok::r_brace));
80
50.3k
  if (!Style.Cpp11BracedListStyle ||
81
50.3k
      Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
82
50.3k
    return false;
83
50.3k
  }
84
0
  const auto *LBrace = MatchingParen;
85
0
  assert(LBrace && LBrace->is(tok::l_brace));
86
0
  if (LBrace->is(BK_BracedInit))
87
0
    return true;
88
0
  if (LBrace->Previous && LBrace->Previous->is(tok::equal))
89
0
    return true;
90
0
  return false;
91
0
}
92
93
18.2M
bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
94
  // C# Does not indent object initialisers as continuations.
95
18.2M
  if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
96
0
    return true;
97
18.2M
  if (is(TT_TemplateString) && opensScope())
98
0
    return true;
99
18.2M
  return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
100
18.2M
         (is(tok::l_brace) &&
101
18.2M
          (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
102
608k
           (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
103
18.2M
         (is(tok::less) && Style.isProto());
104
18.2M
}
105
106
83.8k
TokenRole::~TokenRole() {}
107
108
0
void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
109
110
unsigned CommaSeparatedList::formatAfterToken(LineState &State,
111
                                              ContinuationIndenter *Indenter,
112
25.2k
                                              bool DryRun) {
113
25.2k
  if (!State.NextToken || !State.NextToken->Previous)
114
0
    return 0;
115
116
25.2k
  if (Formats.size() <= 1)
117
25.1k
    return 0; // Handled by formatFromToken (1) or avoid severe penalty (0).
118
119
  // Ensure that we start on the opening brace.
120
53
  const FormatToken *LBrace =
121
53
      State.NextToken->Previous->getPreviousNonComment();
122
53
  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
123
53
      LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
124
53
      LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
125
11
    return 0;
126
11
  }
127
128
  // Calculate the number of code points we have to format this list. As the
129
  // first token is already placed, we have to subtract it.
130
42
  unsigned RemainingCodePoints =
131
42
      Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
132
133
  // Find the best ColumnFormat, i.e. the best number of columns to use.
134
42
  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
135
136
  // If no ColumnFormat can be used, the braced list would generally be
137
  // bin-packed. Add a severe penalty to this so that column layouts are
138
  // preferred if possible.
139
42
  if (!Format)
140
0
    return 10000;
141
142
  // Format the entire list.
143
42
  unsigned Penalty = 0;
144
42
  unsigned Column = 0;
145
42
  unsigned Item = 0;
146
19.6k
  while (State.NextToken != LBrace->MatchingParen) {
147
19.6k
    bool NewLine = false;
148
19.6k
    unsigned ExtraSpaces = 0;
149
150
    // If the previous token was one of our commas, we are now on the next item.
151
19.6k
    if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
152
9.74k
      if (!State.NextToken->isTrailingComment()) {
153
9.74k
        ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
154
9.74k
        ++Column;
155
9.74k
      }
156
9.74k
      ++Item;
157
9.74k
    }
158
159
19.6k
    if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
160
1.31k
      Column = 0;
161
1.31k
      NewLine = true;
162
1.31k
    }
163
164
    // Place token using the continuation indenter and store the penalty.
165
19.6k
    Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
166
19.6k
  }
167
42
  return Penalty;
168
42
}
169
170
unsigned CommaSeparatedList::formatFromToken(LineState &State,
171
                                             ContinuationIndenter *Indenter,
172
21.4k
                                             bool DryRun) {
173
  // Formatting with 1 Column isn't really a column layout, so we don't need the
174
  // special logic here. We can just avoid bin packing any of the parameters.
175
21.4k
  if (Formats.size() == 1 || HasNestedBracedList)
176
117
    State.Stack.back().AvoidBinPacking = true;
177
21.4k
  return 0;
178
21.4k
}
179
180
// Returns the lengths in code points between Begin and End (both included),
181
// assuming that the entire sequence is put on a single line.
182
static unsigned CodePointsBetween(const FormatToken *Begin,
183
291k
                                  const FormatToken *End) {
184
291k
  assert(End->TotalLength >= Begin->TotalLength);
185
0
  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
186
291k
}
187
188
21.2k
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
189
  // FIXME: At some point we might want to do this for other lists, too.
190
21.2k
  if (!Token->MatchingParen ||
191
21.2k
      !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
192
14.8k
    return;
193
14.8k
  }
194
195
  // In C++11 braced list style, we should not format in columns unless they
196
  // have many items (20 or more) or we allow bin-packing of function call
197
  // arguments.
198
6.37k
  if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
199
6.37k
      Commas.size() < 19) {
200
0
    return;
201
0
  }
202
203
  // Limit column layout for JavaScript array initializers to 20 or more items
204
  // for now to introduce it carefully. We can become more aggressive if this
205
  // necessary.
206
6.37k
  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
207
254
    return;
208
209
  // Column format doesn't really make sense if we don't align after brackets.
210
6.12k
  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
211
0
    return;
212
213
6.12k
  FormatToken *ItemBegin = Token->Next;
214
6.12k
  while (ItemBegin->isTrailingComment())
215
0
    ItemBegin = ItemBegin->Next;
216
6.12k
  SmallVector<bool, 8> MustBreakBeforeItem;
217
218
  // The lengths of an item if it is put at the end of the line. This includes
219
  // trailing comments which are otherwise ignored for column alignment.
220
6.12k
  SmallVector<unsigned, 8> EndOfLineItemLength;
221
6.12k
  MustBreakBeforeItem.reserve(Commas.size() + 1);
222
6.12k
  EndOfLineItemLength.reserve(Commas.size() + 1);
223
6.12k
  ItemLengths.reserve(Commas.size() + 1);
224
225
6.12k
  bool HasSeparatingComment = false;
226
151k
  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
227
145k
    assert(ItemBegin);
228
    // Skip comments on their own line.
229
145k
    while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
230
0
      ItemBegin = ItemBegin->Next;
231
0
      HasSeparatingComment = i > 0;
232
0
    }
233
234
145k
    MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
235
145k
    if (ItemBegin->is(tok::l_brace))
236
25.5k
      HasNestedBracedList = true;
237
145k
    const FormatToken *ItemEnd = nullptr;
238
145k
    if (i == Commas.size()) {
239
6.11k
      ItemEnd = Token->MatchingParen;
240
6.11k
      const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
241
6.11k
      ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
242
6.11k
      if (Style.Cpp11BracedListStyle &&
243
6.11k
          !ItemEnd->Previous->isTrailingComment()) {
244
        // In Cpp11 braced list style, the } and possibly other subsequent
245
        // tokens will need to stay on a line with the last element.
246
20.5k
        while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
247
14.4k
          ItemEnd = ItemEnd->Next;
248
6.11k
      } else {
249
        // In other braced lists styles, the "}" can be wrapped to the new line.
250
0
        ItemEnd = Token->MatchingParen->Previous;
251
0
      }
252
139k
    } else {
253
139k
      ItemEnd = Commas[i];
254
      // The comma is counted as part of the item when calculating the length.
255
139k
      ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
256
257
      // Consume trailing comments so the are included in EndOfLineItemLength.
258
139k
      if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
259
139k
          ItemEnd->Next->isTrailingComment()) {
260
0
        ItemEnd = ItemEnd->Next;
261
0
      }
262
139k
    }
263
145k
    EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
264
    // If there is a trailing comma in the list, the next item will start at the
265
    // closing brace. Don't create an extra item for this.
266
145k
    if (ItemEnd->getNextNonComment() == Token->MatchingParen)
267
12
      break;
268
145k
    ItemBegin = ItemEnd->Next;
269
145k
  }
270
271
  // Don't use column layout for lists with few elements and in presence of
272
  // separating comments.
273
6.12k
  if (Commas.size() < 5 || HasSeparatingComment)
274
5.52k
    return;
275
276
599
  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
277
17
    return;
278
279
  // We can never place more than ColumnLimit / 3 items in a row (because of the
280
  // spaces and the comma).
281
582
  unsigned MaxItems = Style.ColumnLimit / 3;
282
582
  SmallVector<unsigned> MinSizeInColumn;
283
582
  MinSizeInColumn.reserve(MaxItems);
284
11.6k
  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
285
11.1k
    ColumnFormat Format;
286
11.1k
    Format.Columns = Columns;
287
11.1k
    Format.ColumnSizes.resize(Columns);
288
11.1k
    MinSizeInColumn.assign(Columns, UINT_MAX);
289
11.1k
    Format.LineCount = 1;
290
11.1k
    bool HasRowWithSufficientColumns = false;
291
11.1k
    unsigned Column = 0;
292
2.69M
    for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
293
2.68M
      assert(i < MustBreakBeforeItem.size());
294
2.68M
      if (MustBreakBeforeItem[i] || Column == Columns) {
295
477k
        ++Format.LineCount;
296
477k
        Column = 0;
297
477k
      }
298
2.68M
      if (Column == Columns - 1)
299
478k
        HasRowWithSufficientColumns = true;
300
2.68M
      unsigned Length =
301
2.68M
          (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
302
2.68M
      Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
303
2.68M
      MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
304
2.68M
      ++Column;
305
2.68M
    }
306
    // If all rows are terminated early (e.g. by trailing comments), we don't
307
    // need to look further.
308
11.1k
    if (!HasRowWithSufficientColumns)
309
45
      break;
310
11.1k
    Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
311
312
125k
    for (unsigned i = 0; i < Columns; ++i)
313
114k
      Format.TotalWidth += Format.ColumnSizes[i];
314
315
    // Don't use this Format, if the difference between the longest and shortest
316
    // element in a column exceeds a threshold to avoid excessive spaces.
317
11.1k
    if ([&] {
318
83.4k
          for (unsigned i = 0; i < Columns - 1; ++i)
319
77.3k
            if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
320
5.03k
              return true;
321
6.07k
          return false;
322
11.1k
        }()) {
323
5.03k
      continue;
324
5.03k
    }
325
326
    // Ignore layouts that are bound to violate the column limit.
327
6.07k
    if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
328
392
      continue;
329
330
5.68k
    Formats.push_back(Format);
331
5.68k
  }
332
582
}
333
334
const CommaSeparatedList::ColumnFormat *
335
42
CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
336
42
  const ColumnFormat *BestFormat = nullptr;
337
309
  for (const ColumnFormat &Format : llvm::reverse(Formats)) {
338
309
    if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {
339
95
      if (BestFormat && Format.LineCount > BestFormat->LineCount)
340
41
        break;
341
54
      BestFormat = &Format;
342
54
    }
343
309
  }
344
42
  return BestFormat;
345
42
}
346
347
} // namespace format
348
} // namespace clang