Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/Format/ContinuationIndenter.h
Line
Count
Source (jump to first uncovered line)
1
//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements an indenter that manages the indentation of
11
/// continuations.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16
#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18
#include "Encoding.h"
19
#include "FormatToken.h"
20
#include "clang/Format/Format.h"
21
#include "llvm/Support/Regex.h"
22
#include <map>
23
#include <optional>
24
#include <tuple>
25
26
namespace clang {
27
class SourceManager;
28
29
namespace format {
30
31
class AnnotatedLine;
32
class BreakableToken;
33
struct FormatToken;
34
struct LineState;
35
struct ParenState;
36
struct RawStringFormatStyleManager;
37
class WhitespaceManager;
38
39
struct RawStringFormatStyleManager {
40
  llvm::StringMap<FormatStyle> DelimiterStyle;
41
  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42
43
  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44
45
  std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46
47
  std::optional<FormatStyle>
48
  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
49
};
50
51
class ContinuationIndenter {
52
public:
53
  /// Constructs a \c ContinuationIndenter to format \p Line starting in
54
  /// column \p FirstIndent.
55
  ContinuationIndenter(const FormatStyle &Style,
56
                       const AdditionalKeywords &Keywords,
57
                       const SourceManager &SourceMgr,
58
                       WhitespaceManager &Whitespaces,
59
                       encoding::Encoding Encoding,
60
                       bool BinPackInconclusiveFunctions);
61
62
  /// Get the initial state, i.e. the state after placing \p Line's
63
  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64
  /// the case of formatting inside raw string literals, \p FirstStartColumn is
65
  /// the column at which the state of the parent formatter is.
66
  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67
                            const AnnotatedLine *Line, bool DryRun);
68
69
  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70
  // better home.
71
  /// Returns \c true, if a line break after \p State is allowed.
72
  bool canBreak(const LineState &State);
73
74
  /// Returns \c true, if a line break after \p State is mandatory.
75
  bool mustBreak(const LineState &State);
76
77
  /// Appends the next token to \p State and updates information
78
  /// necessary for indentation.
79
  ///
80
  /// Puts the token on the current line if \p Newline is \c false and adds a
81
  /// line break and necessary indentation otherwise.
82
  ///
83
  /// If \p DryRun is \c false, also creates and stores the required
84
  /// \c Replacement.
85
  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86
                           unsigned ExtraSpaces = 0);
87
88
  /// Get the column limit for this line. This is the style's column
89
  /// limit, potentially reduced for preprocessor definitions.
90
  unsigned getColumnLimit(const LineState &State) const;
91
92
private:
93
  /// Mark the next token as consumed in \p State and modify its stacks
94
  /// accordingly.
95
  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96
97
  /// Update 'State' according to the next token's fake left parentheses.
98
  void moveStatePastFakeLParens(LineState &State, bool Newline);
99
  /// Update 'State' according to the next token's fake r_parens.
100
  void moveStatePastFakeRParens(LineState &State);
101
102
  /// Update 'State' according to the next token being one of "(<{[".
103
  void moveStatePastScopeOpener(LineState &State, bool Newline);
104
  /// Update 'State' according to the next token being one of ")>}]".
105
  void moveStatePastScopeCloser(LineState &State);
106
  /// Update 'State' with the next token opening a nested block.
107
  void moveStateToNewBlock(LineState &State);
108
109
  /// Reformats a raw string literal.
110
  ///
111
  /// \returns An extra penalty induced by reformatting the token.
112
  unsigned reformatRawStringLiteral(const FormatToken &Current,
113
                                    LineState &State,
114
                                    const FormatStyle &RawStringStyle,
115
                                    bool DryRun, bool Newline);
116
117
  /// If the current token is at the end of the current line, handle
118
  /// the transition to the next line.
119
  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120
                           bool DryRun, bool AllowBreak, bool Newline);
121
122
  /// If \p Current is a raw string that is configured to be reformatted,
123
  /// return the style to be used.
124
  std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125
                                               const LineState &State);
126
127
  /// If the current token sticks out over the end of the line, break
128
  /// it if possible.
129
  ///
130
  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131
  /// when tokens are broken or lines exceed the column limit, and exceeded
132
  /// indicates whether the algorithm purposefully left lines exceeding the
133
  /// column limit.
134
  ///
135
  /// The returned penalty will cover the cost of the additional line breaks
136
  /// and column limit violation in all lines except for the last one. The
137
  /// penalty for the column limit violation in the last line (and in single
138
  /// line tokens) is handled in \c addNextStateToQueue.
139
  ///
140
  /// \p Strict indicates whether reflowing is allowed to leave characters
141
  /// protruding the column limit; if true, lines will be split strictly within
142
  /// the column limit where possible; if false, words are allowed to protrude
143
  /// over the column limit as long as the penalty is less than the penalty
144
  /// of a break.
145
  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146
                                                 LineState &State,
147
                                                 bool AllowBreak, bool DryRun,
148
                                                 bool Strict);
149
150
  /// Returns the \c BreakableToken starting at \p Current, or nullptr
151
  /// if the current token cannot be broken.
152
  std::unique_ptr<BreakableToken>
153
  createBreakableToken(const FormatToken &Current, LineState &State,
154
                       bool AllowBreak);
155
156
  /// Appends the next token to \p State and updates information
157
  /// necessary for indentation.
158
  ///
159
  /// Puts the token on the current line.
160
  ///
161
  /// If \p DryRun is \c false, also creates and stores the required
162
  /// \c Replacement.
163
  void addTokenOnCurrentLine(LineState &State, bool DryRun,
164
                             unsigned ExtraSpaces);
165
166
  /// Appends the next token to \p State and updates information
167
  /// necessary for indentation.
168
  ///
169
  /// Adds a line break and necessary indentation.
170
  ///
171
  /// If \p DryRun is \c false, also creates and stores the required
172
  /// \c Replacement.
173
  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174
175
  /// Calculate the new column for a line wrap before the next token.
176
  unsigned getNewLineColumn(const LineState &State);
177
178
  /// Adds a multiline token to the \p State.
179
  ///
180
  /// \returns Extra penalty for the first line of the literal: last line is
181
  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182
  /// matter, as we don't change them.
183
  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184
185
  /// Returns \c true if the next token starts a multiline string
186
  /// literal.
187
  ///
188
  /// This includes implicitly concatenated strings, strings that will be broken
189
  /// by clang-format and string literals with escaped newlines.
190
  bool nextIsMultilineString(const LineState &State);
191
192
  FormatStyle Style;
193
  const AdditionalKeywords &Keywords;
194
  const SourceManager &SourceMgr;
195
  WhitespaceManager &Whitespaces;
196
  encoding::Encoding Encoding;
197
  bool BinPackInconclusiveFunctions;
198
  llvm::Regex CommentPragmasRegex;
199
  const RawStringFormatStyleManager RawStringFormats;
200
};
201
202
struct ParenState {
203
  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204
             bool AvoidBinPacking, bool NoLineBreak)
205
      : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206
        NestedBlockIndent(Indent), IsAligned(false),
207
        BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
208
        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
209
        NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
210
        LastOperatorWrapped(true), ContainsLineBreak(false),
211
        ContainsUnwrappedBuilder(false), AlignColons(true),
212
        ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
213
        NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
214
        IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
215
928k
        IsWrappedConditional(false), UnindentOperator(false) {}
216
217
  /// \brief The token opening this parenthesis level, or nullptr if this level
218
  /// is opened by fake parenthesis.
219
  ///
220
  /// Not considered for memoization as it will always have the same value at
221
  /// the same token.
222
  const FormatToken *Tok;
223
224
  /// The position to which a specific parenthesis level needs to be
225
  /// indented.
226
  unsigned Indent;
227
228
  /// The position of the last space on each level.
229
  ///
230
  /// Used e.g. to break like:
231
  /// functionCall(Parameter, otherCall(
232
  ///                             OtherParameter));
233
  unsigned LastSpace;
234
235
  /// If a block relative to this parenthesis level gets wrapped, indent
236
  /// it this much.
237
  unsigned NestedBlockIndent;
238
239
  /// The position the first "<<" operator encountered on each level.
240
  ///
241
  /// Used to align "<<" operators. 0 if no such operator has been encountered
242
  /// on a level.
243
  unsigned FirstLessLess = 0;
244
245
  /// The column of a \c ? in a conditional expression;
246
  unsigned QuestionColumn = 0;
247
248
  /// The position of the colon in an ObjC method declaration/call.
249
  unsigned ColonPos = 0;
250
251
  /// The start of the most recent function in a builder-type call.
252
  unsigned StartOfFunctionCall = 0;
253
254
  /// Contains the start of array subscript expressions, so that they
255
  /// can be aligned.
256
  unsigned StartOfArraySubscripts = 0;
257
258
  /// If a nested name specifier was broken over multiple lines, this
259
  /// contains the start column of the second line. Otherwise 0.
260
  unsigned NestedNameSpecifierContinuation = 0;
261
262
  /// If a call expression was broken over multiple lines, this
263
  /// contains the start column of the second line. Otherwise 0.
264
  unsigned CallContinuation = 0;
265
266
  /// The column of the first variable name in a variable declaration.
267
  ///
268
  /// Used to align further variables if necessary.
269
  unsigned VariablePos = 0;
270
271
  /// Whether this block's indentation is used for alignment.
272
  bool IsAligned : 1;
273
274
  /// Whether a newline needs to be inserted before the block's closing
275
  /// brace.
276
  ///
277
  /// We only want to insert a newline before the closing brace if there also
278
  /// was a newline after the beginning left brace.
279
  bool BreakBeforeClosingBrace : 1;
280
281
  /// Whether a newline needs to be inserted before the block's closing
282
  /// paren.
283
  ///
284
  /// We only want to insert a newline before the closing paren if there also
285
  /// was a newline after the beginning left paren.
286
  bool BreakBeforeClosingParen : 1;
287
288
  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289
  /// lines, in this context.
290
  bool AvoidBinPacking : 1;
291
292
  /// Break after the next comma (or all the commas in this context if
293
  /// \c AvoidBinPacking is \c true).
294
  bool BreakBeforeParameter : 1;
295
296
  /// Line breaking in this context would break a formatting rule.
297
  bool NoLineBreak : 1;
298
299
  /// Same as \c NoLineBreak, but is restricted until the end of the
300
  /// operand (including the next ",").
301
  bool NoLineBreakInOperand : 1;
302
303
  /// True if the last binary operator on this level was wrapped to the
304
  /// next line.
305
  bool LastOperatorWrapped : 1;
306
307
  /// \c true if this \c ParenState already contains a line-break.
308
  ///
309
  /// The first line break in a certain \c ParenState causes extra penalty so
310
  /// that clang-format prefers similar breaks, i.e. breaks in the same
311
  /// parenthesis.
312
  bool ContainsLineBreak : 1;
313
314
  /// \c true if this \c ParenState contains multiple segments of a
315
  /// builder-type call on one line.
316
  bool ContainsUnwrappedBuilder : 1;
317
318
  /// \c true if the colons of the curren ObjC method expression should
319
  /// be aligned.
320
  ///
321
  /// Not considered for memoization as it will always have the same value at
322
  /// the same token.
323
  bool AlignColons : 1;
324
325
  /// \c true if at least one selector name was found in the current
326
  /// ObjC method expression.
327
  ///
328
  /// Not considered for memoization as it will always have the same value at
329
  /// the same token.
330
  bool ObjCSelectorNameFound : 1;
331
332
  /// \c true if there are multiple nested blocks inside these parens.
333
  ///
334
  /// Not considered for memoization as it will always have the same value at
335
  /// the same token.
336
  bool HasMultipleNestedBlocks : 1;
337
338
  /// The start of a nested block (e.g. lambda introducer in C++ or
339
  /// "function" in JavaScript) is not wrapped to a new line.
340
  bool NestedBlockInlined : 1;
341
342
  /// \c true if the current \c ParenState represents an Objective-C
343
  /// array literal.
344
  bool IsInsideObjCArrayLiteral : 1;
345
346
  bool IsCSharpGenericTypeConstraint : 1;
347
348
  /// \brief true if the current \c ParenState represents the false branch of
349
  /// a chained conditional expression (e.g. else-if)
350
  bool IsChainedConditional : 1;
351
352
  /// \brief true if there conditionnal was wrapped on the first operator (the
353
  /// question mark)
354
  bool IsWrappedConditional : 1;
355
356
  /// \brief Indicates the indent should be reduced by the length of the
357
  /// operator.
358
  bool UnindentOperator : 1;
359
360
33.4M
  bool operator<(const ParenState &Other) const {
361
33.4M
    if (Indent != Other.Indent)
362
958k
      return Indent < Other.Indent;
363
32.4M
    if (LastSpace != Other.LastSpace)
364
3.08M
      return LastSpace < Other.LastSpace;
365
29.3M
    if (NestedBlockIndent != Other.NestedBlockIndent)
366
704k
      return NestedBlockIndent < Other.NestedBlockIndent;
367
28.6M
    if (FirstLessLess != Other.FirstLessLess)
368
3.71k
      return FirstLessLess < Other.FirstLessLess;
369
28.6M
    if (IsAligned != Other.IsAligned)
370
2.12k
      return IsAligned;
371
28.6M
    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
372
0
      return BreakBeforeClosingBrace;
373
28.6M
    if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
374
0
      return BreakBeforeClosingParen;
375
28.6M
    if (QuestionColumn != Other.QuestionColumn)
376
1.47k
      return QuestionColumn < Other.QuestionColumn;
377
28.6M
    if (AvoidBinPacking != Other.AvoidBinPacking)
378
0
      return AvoidBinPacking;
379
28.6M
    if (BreakBeforeParameter != Other.BreakBeforeParameter)
380
110k
      return BreakBeforeParameter;
381
28.5M
    if (NoLineBreak != Other.NoLineBreak)
382
128
      return NoLineBreak;
383
28.5M
    if (LastOperatorWrapped != Other.LastOperatorWrapped)
384
61.3k
      return LastOperatorWrapped;
385
28.4M
    if (ColonPos != Other.ColonPos)
386
0
      return ColonPos < Other.ColonPos;
387
28.4M
    if (StartOfFunctionCall != Other.StartOfFunctionCall)
388
9.08k
      return StartOfFunctionCall < Other.StartOfFunctionCall;
389
28.4M
    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390
105
      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391
28.4M
    if (CallContinuation != Other.CallContinuation)
392
27.1k
      return CallContinuation < Other.CallContinuation;
393
28.4M
    if (VariablePos != Other.VariablePos)
394
889k
      return VariablePos < Other.VariablePos;
395
27.5M
    if (ContainsLineBreak != Other.ContainsLineBreak)
396
55.6k
      return ContainsLineBreak;
397
27.5M
    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
398
0
      return ContainsUnwrappedBuilder;
399
27.5M
    if (NestedBlockInlined != Other.NestedBlockInlined)
400
460k
      return NestedBlockInlined;
401
27.0M
    if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
402
0
      return IsCSharpGenericTypeConstraint;
403
27.0M
    if (IsChainedConditional != Other.IsChainedConditional)
404
0
      return IsChainedConditional;
405
27.0M
    if (IsWrappedConditional != Other.IsWrappedConditional)
406
0
      return IsWrappedConditional;
407
27.0M
    if (UnindentOperator != Other.UnindentOperator)
408
0
      return UnindentOperator;
409
27.0M
    return false;
410
27.0M
  }
411
};
412
413
/// The current state when indenting a unwrapped line.
414
///
415
/// As the indenting tries different combinations this is copied by value.
416
struct LineState {
417
  /// The number of used columns in the current line.
418
  unsigned Column;
419
420
  /// The token that needs to be next formatted.
421
  FormatToken *NextToken;
422
423
  /// \c true if \p NextToken should not continue this line.
424
  bool NoContinuation;
425
426
  /// The \c NestingLevel at the start of this line.
427
  unsigned StartOfLineLevel;
428
429
  /// The lowest \c NestingLevel on the current line.
430
  unsigned LowestLevelOnLine;
431
432
  /// The start column of the string literal, if we're in a string
433
  /// literal sequence, 0 otherwise.
434
  unsigned StartOfStringLiteral;
435
436
  /// Disallow line breaks for this line.
437
  bool NoLineBreak;
438
439
  /// A stack keeping track of properties applying to parenthesis
440
  /// levels.
441
  SmallVector<ParenState> Stack;
442
443
  /// Ignore the stack of \c ParenStates for state comparison.
444
  ///
445
  /// In long and deeply nested unwrapped lines, the current algorithm can
446
  /// be insufficient for finding the best formatting with a reasonable amount
447
  /// of time and memory. Setting this flag will effectively lead to the
448
  /// algorithm not analyzing some combinations. However, these combinations
449
  /// rarely contain the optimal solution: In short, accepting a higher
450
  /// penalty early would need to lead to different values in the \c
451
  /// ParenState stack (in an otherwise identical state) and these different
452
  /// values would need to lead to a significant amount of avoided penalty
453
  /// later.
454
  ///
455
  /// FIXME: Come up with a better algorithm instead.
456
  bool IgnoreStackForComparison;
457
458
  /// The indent of the first token.
459
  unsigned FirstIndent;
460
461
  /// The line that is being formatted.
462
  ///
463
  /// Does not need to be considered for memoization because it doesn't change.
464
  const AnnotatedLine *Line;
465
466
  /// Comparison operator to be able to used \c LineState in \c map.
467
99.6M
  bool operator<(const LineState &Other) const {
468
99.6M
    if (NextToken != Other.NextToken)
469
75.0M
      return NextToken < Other.NextToken;
470
24.6M
    if (Column != Other.Column)
471
18.5M
      return Column < Other.Column;
472
6.11M
    if (NoContinuation != Other.NoContinuation)
473
0
      return NoContinuation;
474
6.11M
    if (StartOfLineLevel != Other.StartOfLineLevel)
475
638k
      return StartOfLineLevel < Other.StartOfLineLevel;
476
5.47M
    if (LowestLevelOnLine != Other.LowestLevelOnLine)
477
7.04k
      return LowestLevelOnLine < Other.LowestLevelOnLine;
478
5.47M
    if (StartOfStringLiteral != Other.StartOfStringLiteral)
479
13.4k
      return StartOfStringLiteral < Other.StartOfStringLiteral;
480
5.45M
    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
481
346k
      return false;
482
5.11M
    return Stack < Other.Stack;
483
5.45M
  }
484
};
485
486
} // end namespace format
487
} // end namespace clang
488
489
#endif