/src/llvm-project/clang/lib/Format/ContinuationIndenter.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements an indenter that manages the indentation of |
11 | | /// continuations. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H |
17 | | |
18 | | #include "Encoding.h" |
19 | | #include "FormatToken.h" |
20 | | #include "clang/Format/Format.h" |
21 | | #include "llvm/Support/Regex.h" |
22 | | #include <map> |
23 | | #include <optional> |
24 | | #include <tuple> |
25 | | |
26 | | namespace clang { |
27 | | class SourceManager; |
28 | | |
29 | | namespace format { |
30 | | |
31 | | class AnnotatedLine; |
32 | | class BreakableToken; |
33 | | struct FormatToken; |
34 | | struct LineState; |
35 | | struct ParenState; |
36 | | struct RawStringFormatStyleManager; |
37 | | class WhitespaceManager; |
38 | | |
39 | | struct RawStringFormatStyleManager { |
40 | | llvm::StringMap<FormatStyle> DelimiterStyle; |
41 | | llvm::StringMap<FormatStyle> EnclosingFunctionStyle; |
42 | | |
43 | | RawStringFormatStyleManager(const FormatStyle &CodeStyle); |
44 | | |
45 | | std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; |
46 | | |
47 | | std::optional<FormatStyle> |
48 | | getEnclosingFunctionStyle(StringRef EnclosingFunction) const; |
49 | | }; |
50 | | |
51 | | class ContinuationIndenter { |
52 | | public: |
53 | | /// Constructs a \c ContinuationIndenter to format \p Line starting in |
54 | | /// column \p FirstIndent. |
55 | | ContinuationIndenter(const FormatStyle &Style, |
56 | | const AdditionalKeywords &Keywords, |
57 | | const SourceManager &SourceMgr, |
58 | | WhitespaceManager &Whitespaces, |
59 | | encoding::Encoding Encoding, |
60 | | bool BinPackInconclusiveFunctions); |
61 | | |
62 | | /// Get the initial state, i.e. the state after placing \p Line's |
63 | | /// first token at \p FirstIndent. When reformatting a fragment of code, as in |
64 | | /// the case of formatting inside raw string literals, \p FirstStartColumn is |
65 | | /// the column at which the state of the parent formatter is. |
66 | | LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, |
67 | | const AnnotatedLine *Line, bool DryRun); |
68 | | |
69 | | // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a |
70 | | // better home. |
71 | | /// Returns \c true, if a line break after \p State is allowed. |
72 | | bool canBreak(const LineState &State); |
73 | | |
74 | | /// Returns \c true, if a line break after \p State is mandatory. |
75 | | bool mustBreak(const LineState &State); |
76 | | |
77 | | /// Appends the next token to \p State and updates information |
78 | | /// necessary for indentation. |
79 | | /// |
80 | | /// Puts the token on the current line if \p Newline is \c false and adds a |
81 | | /// line break and necessary indentation otherwise. |
82 | | /// |
83 | | /// If \p DryRun is \c false, also creates and stores the required |
84 | | /// \c Replacement. |
85 | | unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, |
86 | | unsigned ExtraSpaces = 0); |
87 | | |
88 | | /// Get the column limit for this line. This is the style's column |
89 | | /// limit, potentially reduced for preprocessor definitions. |
90 | | unsigned getColumnLimit(const LineState &State) const; |
91 | | |
92 | | private: |
93 | | /// Mark the next token as consumed in \p State and modify its stacks |
94 | | /// accordingly. |
95 | | unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); |
96 | | |
97 | | /// Update 'State' according to the next token's fake left parentheses. |
98 | | void moveStatePastFakeLParens(LineState &State, bool Newline); |
99 | | /// Update 'State' according to the next token's fake r_parens. |
100 | | void moveStatePastFakeRParens(LineState &State); |
101 | | |
102 | | /// Update 'State' according to the next token being one of "(<{[". |
103 | | void moveStatePastScopeOpener(LineState &State, bool Newline); |
104 | | /// Update 'State' according to the next token being one of ")>}]". |
105 | | void moveStatePastScopeCloser(LineState &State); |
106 | | /// Update 'State' with the next token opening a nested block. |
107 | | void moveStateToNewBlock(LineState &State); |
108 | | |
109 | | /// Reformats a raw string literal. |
110 | | /// |
111 | | /// \returns An extra penalty induced by reformatting the token. |
112 | | unsigned reformatRawStringLiteral(const FormatToken &Current, |
113 | | LineState &State, |
114 | | const FormatStyle &RawStringStyle, |
115 | | bool DryRun, bool Newline); |
116 | | |
117 | | /// If the current token is at the end of the current line, handle |
118 | | /// the transition to the next line. |
119 | | unsigned handleEndOfLine(const FormatToken &Current, LineState &State, |
120 | | bool DryRun, bool AllowBreak, bool Newline); |
121 | | |
122 | | /// If \p Current is a raw string that is configured to be reformatted, |
123 | | /// return the style to be used. |
124 | | std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current, |
125 | | const LineState &State); |
126 | | |
127 | | /// If the current token sticks out over the end of the line, break |
128 | | /// it if possible. |
129 | | /// |
130 | | /// \returns A pair (penalty, exceeded), where penalty is the extra penalty |
131 | | /// when tokens are broken or lines exceed the column limit, and exceeded |
132 | | /// indicates whether the algorithm purposefully left lines exceeding the |
133 | | /// column limit. |
134 | | /// |
135 | | /// The returned penalty will cover the cost of the additional line breaks |
136 | | /// and column limit violation in all lines except for the last one. The |
137 | | /// penalty for the column limit violation in the last line (and in single |
138 | | /// line tokens) is handled in \c addNextStateToQueue. |
139 | | /// |
140 | | /// \p Strict indicates whether reflowing is allowed to leave characters |
141 | | /// protruding the column limit; if true, lines will be split strictly within |
142 | | /// the column limit where possible; if false, words are allowed to protrude |
143 | | /// over the column limit as long as the penalty is less than the penalty |
144 | | /// of a break. |
145 | | std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, |
146 | | LineState &State, |
147 | | bool AllowBreak, bool DryRun, |
148 | | bool Strict); |
149 | | |
150 | | /// Returns the \c BreakableToken starting at \p Current, or nullptr |
151 | | /// if the current token cannot be broken. |
152 | | std::unique_ptr<BreakableToken> |
153 | | createBreakableToken(const FormatToken &Current, LineState &State, |
154 | | bool AllowBreak); |
155 | | |
156 | | /// Appends the next token to \p State and updates information |
157 | | /// necessary for indentation. |
158 | | /// |
159 | | /// Puts the token on the current line. |
160 | | /// |
161 | | /// If \p DryRun is \c false, also creates and stores the required |
162 | | /// \c Replacement. |
163 | | void addTokenOnCurrentLine(LineState &State, bool DryRun, |
164 | | unsigned ExtraSpaces); |
165 | | |
166 | | /// Appends the next token to \p State and updates information |
167 | | /// necessary for indentation. |
168 | | /// |
169 | | /// Adds a line break and necessary indentation. |
170 | | /// |
171 | | /// If \p DryRun is \c false, also creates and stores the required |
172 | | /// \c Replacement. |
173 | | unsigned addTokenOnNewLine(LineState &State, bool DryRun); |
174 | | |
175 | | /// Calculate the new column for a line wrap before the next token. |
176 | | unsigned getNewLineColumn(const LineState &State); |
177 | | |
178 | | /// Adds a multiline token to the \p State. |
179 | | /// |
180 | | /// \returns Extra penalty for the first line of the literal: last line is |
181 | | /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't |
182 | | /// matter, as we don't change them. |
183 | | unsigned addMultilineToken(const FormatToken &Current, LineState &State); |
184 | | |
185 | | /// Returns \c true if the next token starts a multiline string |
186 | | /// literal. |
187 | | /// |
188 | | /// This includes implicitly concatenated strings, strings that will be broken |
189 | | /// by clang-format and string literals with escaped newlines. |
190 | | bool nextIsMultilineString(const LineState &State); |
191 | | |
192 | | FormatStyle Style; |
193 | | const AdditionalKeywords &Keywords; |
194 | | const SourceManager &SourceMgr; |
195 | | WhitespaceManager &Whitespaces; |
196 | | encoding::Encoding Encoding; |
197 | | bool BinPackInconclusiveFunctions; |
198 | | llvm::Regex CommentPragmasRegex; |
199 | | const RawStringFormatStyleManager RawStringFormats; |
200 | | }; |
201 | | |
202 | | struct ParenState { |
203 | | ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, |
204 | | bool AvoidBinPacking, bool NoLineBreak) |
205 | | : Tok(Tok), Indent(Indent), LastSpace(LastSpace), |
206 | | NestedBlockIndent(Indent), IsAligned(false), |
207 | | BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), |
208 | | AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), |
209 | | NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), |
210 | | LastOperatorWrapped(true), ContainsLineBreak(false), |
211 | | ContainsUnwrappedBuilder(false), AlignColons(true), |
212 | | ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), |
213 | | NestedBlockInlined(false), IsInsideObjCArrayLiteral(false), |
214 | | IsCSharpGenericTypeConstraint(false), IsChainedConditional(false), |
215 | 928k | IsWrappedConditional(false), UnindentOperator(false) {} |
216 | | |
217 | | /// \brief The token opening this parenthesis level, or nullptr if this level |
218 | | /// is opened by fake parenthesis. |
219 | | /// |
220 | | /// Not considered for memoization as it will always have the same value at |
221 | | /// the same token. |
222 | | const FormatToken *Tok; |
223 | | |
224 | | /// The position to which a specific parenthesis level needs to be |
225 | | /// indented. |
226 | | unsigned Indent; |
227 | | |
228 | | /// The position of the last space on each level. |
229 | | /// |
230 | | /// Used e.g. to break like: |
231 | | /// functionCall(Parameter, otherCall( |
232 | | /// OtherParameter)); |
233 | | unsigned LastSpace; |
234 | | |
235 | | /// If a block relative to this parenthesis level gets wrapped, indent |
236 | | /// it this much. |
237 | | unsigned NestedBlockIndent; |
238 | | |
239 | | /// The position the first "<<" operator encountered on each level. |
240 | | /// |
241 | | /// Used to align "<<" operators. 0 if no such operator has been encountered |
242 | | /// on a level. |
243 | | unsigned FirstLessLess = 0; |
244 | | |
245 | | /// The column of a \c ? in a conditional expression; |
246 | | unsigned QuestionColumn = 0; |
247 | | |
248 | | /// The position of the colon in an ObjC method declaration/call. |
249 | | unsigned ColonPos = 0; |
250 | | |
251 | | /// The start of the most recent function in a builder-type call. |
252 | | unsigned StartOfFunctionCall = 0; |
253 | | |
254 | | /// Contains the start of array subscript expressions, so that they |
255 | | /// can be aligned. |
256 | | unsigned StartOfArraySubscripts = 0; |
257 | | |
258 | | /// If a nested name specifier was broken over multiple lines, this |
259 | | /// contains the start column of the second line. Otherwise 0. |
260 | | unsigned NestedNameSpecifierContinuation = 0; |
261 | | |
262 | | /// If a call expression was broken over multiple lines, this |
263 | | /// contains the start column of the second line. Otherwise 0. |
264 | | unsigned CallContinuation = 0; |
265 | | |
266 | | /// The column of the first variable name in a variable declaration. |
267 | | /// |
268 | | /// Used to align further variables if necessary. |
269 | | unsigned VariablePos = 0; |
270 | | |
271 | | /// Whether this block's indentation is used for alignment. |
272 | | bool IsAligned : 1; |
273 | | |
274 | | /// Whether a newline needs to be inserted before the block's closing |
275 | | /// brace. |
276 | | /// |
277 | | /// We only want to insert a newline before the closing brace if there also |
278 | | /// was a newline after the beginning left brace. |
279 | | bool BreakBeforeClosingBrace : 1; |
280 | | |
281 | | /// Whether a newline needs to be inserted before the block's closing |
282 | | /// paren. |
283 | | /// |
284 | | /// We only want to insert a newline before the closing paren if there also |
285 | | /// was a newline after the beginning left paren. |
286 | | bool BreakBeforeClosingParen : 1; |
287 | | |
288 | | /// Avoid bin packing, i.e. multiple parameters/elements on multiple |
289 | | /// lines, in this context. |
290 | | bool AvoidBinPacking : 1; |
291 | | |
292 | | /// Break after the next comma (or all the commas in this context if |
293 | | /// \c AvoidBinPacking is \c true). |
294 | | bool BreakBeforeParameter : 1; |
295 | | |
296 | | /// Line breaking in this context would break a formatting rule. |
297 | | bool NoLineBreak : 1; |
298 | | |
299 | | /// Same as \c NoLineBreak, but is restricted until the end of the |
300 | | /// operand (including the next ","). |
301 | | bool NoLineBreakInOperand : 1; |
302 | | |
303 | | /// True if the last binary operator on this level was wrapped to the |
304 | | /// next line. |
305 | | bool LastOperatorWrapped : 1; |
306 | | |
307 | | /// \c true if this \c ParenState already contains a line-break. |
308 | | /// |
309 | | /// The first line break in a certain \c ParenState causes extra penalty so |
310 | | /// that clang-format prefers similar breaks, i.e. breaks in the same |
311 | | /// parenthesis. |
312 | | bool ContainsLineBreak : 1; |
313 | | |
314 | | /// \c true if this \c ParenState contains multiple segments of a |
315 | | /// builder-type call on one line. |
316 | | bool ContainsUnwrappedBuilder : 1; |
317 | | |
318 | | /// \c true if the colons of the curren ObjC method expression should |
319 | | /// be aligned. |
320 | | /// |
321 | | /// Not considered for memoization as it will always have the same value at |
322 | | /// the same token. |
323 | | bool AlignColons : 1; |
324 | | |
325 | | /// \c true if at least one selector name was found in the current |
326 | | /// ObjC method expression. |
327 | | /// |
328 | | /// Not considered for memoization as it will always have the same value at |
329 | | /// the same token. |
330 | | bool ObjCSelectorNameFound : 1; |
331 | | |
332 | | /// \c true if there are multiple nested blocks inside these parens. |
333 | | /// |
334 | | /// Not considered for memoization as it will always have the same value at |
335 | | /// the same token. |
336 | | bool HasMultipleNestedBlocks : 1; |
337 | | |
338 | | /// The start of a nested block (e.g. lambda introducer in C++ or |
339 | | /// "function" in JavaScript) is not wrapped to a new line. |
340 | | bool NestedBlockInlined : 1; |
341 | | |
342 | | /// \c true if the current \c ParenState represents an Objective-C |
343 | | /// array literal. |
344 | | bool IsInsideObjCArrayLiteral : 1; |
345 | | |
346 | | bool IsCSharpGenericTypeConstraint : 1; |
347 | | |
348 | | /// \brief true if the current \c ParenState represents the false branch of |
349 | | /// a chained conditional expression (e.g. else-if) |
350 | | bool IsChainedConditional : 1; |
351 | | |
352 | | /// \brief true if there conditionnal was wrapped on the first operator (the |
353 | | /// question mark) |
354 | | bool IsWrappedConditional : 1; |
355 | | |
356 | | /// \brief Indicates the indent should be reduced by the length of the |
357 | | /// operator. |
358 | | bool UnindentOperator : 1; |
359 | | |
360 | 33.4M | bool operator<(const ParenState &Other) const { |
361 | 33.4M | if (Indent != Other.Indent) |
362 | 958k | return Indent < Other.Indent; |
363 | 32.4M | if (LastSpace != Other.LastSpace) |
364 | 3.08M | return LastSpace < Other.LastSpace; |
365 | 29.3M | if (NestedBlockIndent != Other.NestedBlockIndent) |
366 | 704k | return NestedBlockIndent < Other.NestedBlockIndent; |
367 | 28.6M | if (FirstLessLess != Other.FirstLessLess) |
368 | 3.71k | return FirstLessLess < Other.FirstLessLess; |
369 | 28.6M | if (IsAligned != Other.IsAligned) |
370 | 2.12k | return IsAligned; |
371 | 28.6M | if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) |
372 | 0 | return BreakBeforeClosingBrace; |
373 | 28.6M | if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) |
374 | 0 | return BreakBeforeClosingParen; |
375 | 28.6M | if (QuestionColumn != Other.QuestionColumn) |
376 | 1.47k | return QuestionColumn < Other.QuestionColumn; |
377 | 28.6M | if (AvoidBinPacking != Other.AvoidBinPacking) |
378 | 0 | return AvoidBinPacking; |
379 | 28.6M | if (BreakBeforeParameter != Other.BreakBeforeParameter) |
380 | 110k | return BreakBeforeParameter; |
381 | 28.5M | if (NoLineBreak != Other.NoLineBreak) |
382 | 128 | return NoLineBreak; |
383 | 28.5M | if (LastOperatorWrapped != Other.LastOperatorWrapped) |
384 | 61.3k | return LastOperatorWrapped; |
385 | 28.4M | if (ColonPos != Other.ColonPos) |
386 | 0 | return ColonPos < Other.ColonPos; |
387 | 28.4M | if (StartOfFunctionCall != Other.StartOfFunctionCall) |
388 | 9.08k | return StartOfFunctionCall < Other.StartOfFunctionCall; |
389 | 28.4M | if (StartOfArraySubscripts != Other.StartOfArraySubscripts) |
390 | 105 | return StartOfArraySubscripts < Other.StartOfArraySubscripts; |
391 | 28.4M | if (CallContinuation != Other.CallContinuation) |
392 | 27.1k | return CallContinuation < Other.CallContinuation; |
393 | 28.4M | if (VariablePos != Other.VariablePos) |
394 | 889k | return VariablePos < Other.VariablePos; |
395 | 27.5M | if (ContainsLineBreak != Other.ContainsLineBreak) |
396 | 55.6k | return ContainsLineBreak; |
397 | 27.5M | if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) |
398 | 0 | return ContainsUnwrappedBuilder; |
399 | 27.5M | if (NestedBlockInlined != Other.NestedBlockInlined) |
400 | 460k | return NestedBlockInlined; |
401 | 27.0M | if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) |
402 | 0 | return IsCSharpGenericTypeConstraint; |
403 | 27.0M | if (IsChainedConditional != Other.IsChainedConditional) |
404 | 0 | return IsChainedConditional; |
405 | 27.0M | if (IsWrappedConditional != Other.IsWrappedConditional) |
406 | 0 | return IsWrappedConditional; |
407 | 27.0M | if (UnindentOperator != Other.UnindentOperator) |
408 | 0 | return UnindentOperator; |
409 | 27.0M | return false; |
410 | 27.0M | } |
411 | | }; |
412 | | |
413 | | /// The current state when indenting a unwrapped line. |
414 | | /// |
415 | | /// As the indenting tries different combinations this is copied by value. |
416 | | struct LineState { |
417 | | /// The number of used columns in the current line. |
418 | | unsigned Column; |
419 | | |
420 | | /// The token that needs to be next formatted. |
421 | | FormatToken *NextToken; |
422 | | |
423 | | /// \c true if \p NextToken should not continue this line. |
424 | | bool NoContinuation; |
425 | | |
426 | | /// The \c NestingLevel at the start of this line. |
427 | | unsigned StartOfLineLevel; |
428 | | |
429 | | /// The lowest \c NestingLevel on the current line. |
430 | | unsigned LowestLevelOnLine; |
431 | | |
432 | | /// The start column of the string literal, if we're in a string |
433 | | /// literal sequence, 0 otherwise. |
434 | | unsigned StartOfStringLiteral; |
435 | | |
436 | | /// Disallow line breaks for this line. |
437 | | bool NoLineBreak; |
438 | | |
439 | | /// A stack keeping track of properties applying to parenthesis |
440 | | /// levels. |
441 | | SmallVector<ParenState> Stack; |
442 | | |
443 | | /// Ignore the stack of \c ParenStates for state comparison. |
444 | | /// |
445 | | /// In long and deeply nested unwrapped lines, the current algorithm can |
446 | | /// be insufficient for finding the best formatting with a reasonable amount |
447 | | /// of time and memory. Setting this flag will effectively lead to the |
448 | | /// algorithm not analyzing some combinations. However, these combinations |
449 | | /// rarely contain the optimal solution: In short, accepting a higher |
450 | | /// penalty early would need to lead to different values in the \c |
451 | | /// ParenState stack (in an otherwise identical state) and these different |
452 | | /// values would need to lead to a significant amount of avoided penalty |
453 | | /// later. |
454 | | /// |
455 | | /// FIXME: Come up with a better algorithm instead. |
456 | | bool IgnoreStackForComparison; |
457 | | |
458 | | /// The indent of the first token. |
459 | | unsigned FirstIndent; |
460 | | |
461 | | /// The line that is being formatted. |
462 | | /// |
463 | | /// Does not need to be considered for memoization because it doesn't change. |
464 | | const AnnotatedLine *Line; |
465 | | |
466 | | /// Comparison operator to be able to used \c LineState in \c map. |
467 | 99.6M | bool operator<(const LineState &Other) const { |
468 | 99.6M | if (NextToken != Other.NextToken) |
469 | 75.0M | return NextToken < Other.NextToken; |
470 | 24.6M | if (Column != Other.Column) |
471 | 18.5M | return Column < Other.Column; |
472 | 6.11M | if (NoContinuation != Other.NoContinuation) |
473 | 0 | return NoContinuation; |
474 | 6.11M | if (StartOfLineLevel != Other.StartOfLineLevel) |
475 | 638k | return StartOfLineLevel < Other.StartOfLineLevel; |
476 | 5.47M | if (LowestLevelOnLine != Other.LowestLevelOnLine) |
477 | 7.04k | return LowestLevelOnLine < Other.LowestLevelOnLine; |
478 | 5.47M | if (StartOfStringLiteral != Other.StartOfStringLiteral) |
479 | 13.4k | return StartOfStringLiteral < Other.StartOfStringLiteral; |
480 | 5.45M | if (IgnoreStackForComparison || Other.IgnoreStackForComparison) |
481 | 346k | return false; |
482 | 5.11M | return Stack < Other.Stack; |
483 | 5.45M | } |
484 | | }; |
485 | | |
486 | | } // end namespace format |
487 | | } // end namespace clang |
488 | | |
489 | | #endif |