Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/Format/TokenAnnotator.h
Line
Count
Source (jump to first uncovered line)
1
//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements a token annotator, i.e. creates
11
/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16
#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18
#include "UnwrappedLineParser.h"
19
#include "clang/Format/Format.h"
20
21
namespace clang {
22
namespace format {
23
24
enum LineType {
25
  LT_Invalid,
26
  LT_ImportStatement,
27
  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28
  LT_ObjCMethodDecl,
29
  LT_ObjCProperty, // An @property line.
30
  LT_Other,
31
  LT_PreprocessorDirective,
32
  LT_VirtualFunctionDecl,
33
  LT_ArrayOfStructInitializer,
34
  LT_CommentAbovePPDirective,
35
};
36
37
enum ScopeType {
38
  // Contained in class declaration/definition.
39
  ST_Class,
40
  // Contained within function definition.
41
  ST_Function,
42
  // Contained within other scope block (loop, if/else, etc).
43
  ST_Other,
44
};
45
46
class AnnotatedLine {
47
public:
48
  AnnotatedLine(const UnwrappedLine &Line)
49
      : First(Line.Tokens.front().Tok), Level(Line.Level),
50
        PPLevel(Line.PPLevel),
51
        MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
52
        MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
53
        InPPDirective(Line.InPPDirective),
54
        InPragmaDirective(Line.InPragmaDirective),
55
        InMacroBody(Line.InMacroBody),
56
        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
57
        IsMultiVariableDeclStmt(false), Affected(false),
58
        LeadingEmptyLinesAffected(false), ChildrenAffected(false),
59
        ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
60
1.59M
        FirstStartColumn(Line.FirstStartColumn) {
61
1.59M
    assert(!Line.Tokens.empty());
62
63
    // Calculate Next and Previous for all tokens. Note that we must overwrite
64
    // Next and Previous for every token, as previous formatting runs might have
65
    // left them in a different state.
66
0
    First->Previous = nullptr;
67
1.59M
    FormatToken *Current = First;
68
1.59M
    addChildren(Line.Tokens.front(), Current);
69
50.6M
    for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
70
50.6M
      if (Node.Tok->MacroParent)
71
0
        ContainsMacroCall = true;
72
50.6M
      Current->Next = Node.Tok;
73
50.6M
      Node.Tok->Previous = Current;
74
50.6M
      Current = Current->Next;
75
50.6M
      addChildren(Node, Current);
76
      // FIXME: if we add children, previous will point to the token before
77
      // the children; changing this requires significant changes across
78
      // clang-format.
79
50.6M
    }
80
1.59M
    Last = Current;
81
1.59M
    Last->Next = nullptr;
82
1.59M
  }
83
84
52.2M
  void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
85
52.2M
    Current->Children.clear();
86
52.2M
    for (const auto &Child : Node.Children) {
87
270k
      Children.push_back(new AnnotatedLine(Child));
88
270k
      if (Children.back()->ContainsMacroCall)
89
0
        ContainsMacroCall = true;
90
270k
      Current->Children.push_back(Children.back());
91
270k
    }
92
52.2M
  }
93
94
0
  size_t size() const {
95
0
    size_t Size = 1;
96
0
    for (const auto *Child : Children)
97
0
      Size += Child->size();
98
0
    return Size;
99
0
  }
100
101
1.59M
  ~AnnotatedLine() {
102
1.59M
    for (AnnotatedLine *Child : Children)
103
270k
      delete Child;
104
1.59M
    FormatToken *Current = First;
105
53.8M
    while (Current) {
106
52.2M
      Current->Children.clear();
107
52.2M
      Current->Role.reset();
108
52.2M
      Current = Current->Next;
109
52.2M
    }
110
1.59M
  }
111
112
381k
  bool isComment() const {
113
381k
    return First && First->is(tok::comment) && !First->getNextNonComment();
114
381k
  }
115
116
  /// \c true if this line starts with the given tokens in order, ignoring
117
  /// comments.
118
42.2M
  template <typename... Ts> bool startsWith(Ts... Tokens) const {
119
42.2M
    return First && First->startsSequence(Tokens...);
120
42.2M
  }
bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind>(clang::tok::TokenKind) const
Line
Count
Source
118
34.9M
  template <typename... Ts> bool startsWith(Ts... Tokens) const {
119
34.9M
    return First && First->startsSequence(Tokens...);
120
34.9M
  }
bool clang::format::AnnotatedLine::startsWith<clang::format::TokenType>(clang::format::TokenType) const
Line
Count
Source
118
6.89M
  template <typename... Ts> bool startsWith(Ts... Tokens) const {
119
6.89M
    return First && First->startsSequence(Tokens...);
120
6.89M
  }
bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind) const
Line
Count
Source
118
368k
  template <typename... Ts> bool startsWith(Ts... Tokens) const {
119
368k
    return First && First->startsSequence(Tokens...);
120
368k
  }
Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::IdentifierInfo*, clang::tok::TokenKind>(clang::IdentifierInfo*, clang::tok::TokenKind) const
Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::IdentifierInfo*, clang::tok::TokenKind>(clang::tok::TokenKind, clang::IdentifierInfo*, clang::tok::TokenKind) const
Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::IdentifierInfo*>(clang::IdentifierInfo*) const
Unexecuted instantiation: bool clang::format::AnnotatedLine::startsWith<clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind) const
121
122
  /// \c true if this line ends with the given tokens in reversed order,
123
  /// ignoring comments.
124
  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
125
  /// this line is like "... T3 T2 T1".
126
7.42k
  template <typename... Ts> bool endsWith(Ts... Tokens) const {
127
7.42k
    return Last && Last->endsSequence(Tokens...);
128
7.42k
  }
bool clang::format::AnnotatedLine::endsWith<clang::tok::TokenKind>(clang::tok::TokenKind) const
Line
Count
Source
126
2.58k
  template <typename... Ts> bool endsWith(Ts... Tokens) const {
127
2.58k
    return Last && Last->endsSequence(Tokens...);
128
2.58k
  }
bool clang::format::AnnotatedLine::endsWith<clang::format::TokenType>(clang::format::TokenType) const
Line
Count
Source
126
4.13k
  template <typename... Ts> bool endsWith(Ts... Tokens) const {
127
4.13k
    return Last && Last->endsSequence(Tokens...);
128
4.13k
  }
bool clang::format::AnnotatedLine::endsWith<clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind) const
Line
Count
Source
126
712
  template <typename... Ts> bool endsWith(Ts... Tokens) const {
127
712
    return Last && Last->endsSequence(Tokens...);
128
712
  }
129
130
  /// \c true if this line looks like a function definition instead of a
131
  /// function declaration. Asserts MightBeFunctionDecl.
132
406
  bool mightBeFunctionDefinition() const {
133
406
    assert(MightBeFunctionDecl);
134
    // Try to determine if the end of a stream of tokens is either the
135
    // Definition or the Declaration for a function. It does this by looking for
136
    // the ';' in foo(); and using that it ends with a ; to know this is the
137
    // Definition, however the line could end with
138
    //    foo(); /* comment */
139
    // or
140
    //    foo(); // comment
141
    // or
142
    //    foo() // comment
143
    // endsWith() ignores the comment.
144
0
    return !endsWith(tok::semi);
145
406
  }
146
147
  /// \c true if this line starts a namespace definition.
148
184k
  bool startsWithNamespace() const {
149
184k
    return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
150
184k
           startsWith(tok::kw_inline, tok::kw_namespace) ||
151
184k
           startsWith(tok::kw_export, tok::kw_namespace);
152
184k
  }
153
154
1.92M
  FormatToken *getFirstNonComment() const {
155
1.92M
    assert(First);
156
1.92M
    return First->is(tok::comment) ? First->getNextNonComment() : First;
157
1.92M
  }
158
159
321k
  FormatToken *getLastNonComment() const {
160
321k
    assert(Last);
161
321k
    return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
162
321k
  }
163
164
  FormatToken *First;
165
  FormatToken *Last;
166
167
  SmallVector<AnnotatedLine *, 0> Children;
168
169
  LineType Type;
170
  unsigned Level;
171
  unsigned PPLevel;
172
  size_t MatchingOpeningBlockLineIndex;
173
  size_t MatchingClosingBlockLineIndex;
174
  bool InPPDirective;
175
  bool InPragmaDirective;
176
  bool InMacroBody;
177
  bool MustBeDeclaration;
178
  bool MightBeFunctionDecl;
179
  bool IsMultiVariableDeclStmt;
180
181
  /// \c True if this line contains a macro call for which an expansion exists.
182
  bool ContainsMacroCall = false;
183
184
  /// \c True if this line should be formatted, i.e. intersects directly or
185
  /// indirectly with one of the input ranges.
186
  bool Affected;
187
188
  /// \c True if the leading empty lines of this line intersect with one of the
189
  /// input ranges.
190
  bool LeadingEmptyLinesAffected;
191
192
  /// \c True if one of this line's children intersects with an input range.
193
  bool ChildrenAffected;
194
195
  /// \c True if breaking after last attribute group in function return type.
196
  bool ReturnTypeWrapped;
197
198
  /// \c True if this line should be indented by ContinuationIndent in addition
199
  /// to the normal indention level.
200
  bool IsContinuation;
201
202
  unsigned FirstStartColumn;
203
204
private:
205
  // Disallow copying.
206
  AnnotatedLine(const AnnotatedLine &) = delete;
207
  void operator=(const AnnotatedLine &) = delete;
208
};
209
210
/// Determines extra information about the tokens comprising an
211
/// \c UnwrappedLine.
212
class TokenAnnotator {
213
public:
214
  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
215
1.05k
      : Style(Style), Keywords(Keywords) {}
216
217
  /// Adapts the indent levels of comment lines to the indent of the
218
  /// subsequent line.
219
  // FIXME: Can/should this be done in the UnwrappedLineParser?
220
  void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
221
222
  void annotate(AnnotatedLine &Line);
223
  void calculateFormattingInformation(AnnotatedLine &Line) const;
224
225
private:
226
  /// Calculate the penalty for splitting before \c Tok.
227
  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
228
                        bool InFunctionDecl) const;
229
230
  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
231
232
  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
233
                            const FormatToken &Right) const;
234
235
  bool spaceRequiredBefore(const AnnotatedLine &Line,
236
                           const FormatToken &Right) const;
237
238
  bool mustBreakBefore(const AnnotatedLine &Line,
239
                       const FormatToken &Right) const;
240
241
  bool canBreakBefore(const AnnotatedLine &Line,
242
                      const FormatToken &Right) const;
243
244
  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
245
246
  void printDebugInfo(const AnnotatedLine &Line) const;
247
248
  void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
249
250
  void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
251
252
  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
253
                                              FormatToken *CurrentToken,
254
                                              unsigned Depth) const;
255
  FormatStyle::PointerAlignmentStyle
256
  getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
257
258
  FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
259
      const FormatToken &PointerOrReference) const;
260
261
  const FormatStyle &Style;
262
263
  const AdditionalKeywords &Keywords;
264
265
  SmallVector<ScopeType> Scopes;
266
};
267
268
} // end namespace format
269
} // end namespace clang
270
271
#endif