/src/llvm-project/clang/lib/Format/MacroExpander.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains the implementation of MacroExpander, which handles macro |
11 | | /// configuration and expansion while formatting. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "Macros.h" |
16 | | |
17 | | #include "Encoding.h" |
18 | | #include "FormatToken.h" |
19 | | #include "FormatTokenLexer.h" |
20 | | #include "clang/Basic/TokenKinds.h" |
21 | | #include "clang/Format/Format.h" |
22 | | #include "clang/Lex/HeaderSearch.h" |
23 | | #include "clang/Lex/HeaderSearchOptions.h" |
24 | | #include "clang/Lex/Lexer.h" |
25 | | #include "clang/Lex/ModuleLoader.h" |
26 | | #include "clang/Lex/Preprocessor.h" |
27 | | #include "clang/Lex/PreprocessorOptions.h" |
28 | | #include "llvm/ADT/StringSet.h" |
29 | | #include "llvm/Support/ErrorHandling.h" |
30 | | |
31 | | namespace clang { |
32 | | namespace format { |
33 | | |
34 | | struct MacroExpander::Definition { |
35 | | StringRef Name; |
36 | | SmallVector<FormatToken *, 8> Params; |
37 | | SmallVector<FormatToken *, 8> Body; |
38 | | |
39 | | // Map from each argument's name to its position in the argument list. |
40 | | // With "M(x, y) x + y": |
41 | | // x -> 0 |
42 | | // y -> 1 |
43 | | llvm::StringMap<size_t> ArgMap; |
44 | | |
45 | | bool ObjectLike = true; |
46 | | }; |
47 | | |
48 | | class MacroExpander::DefinitionParser { |
49 | | public: |
50 | 3.03k | DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { |
51 | 3.03k | assert(!Tokens.empty()); |
52 | 0 | Current = Tokens[0]; |
53 | 3.03k | } |
54 | | |
55 | | // Parse the token stream and return the corresponding Definition object. |
56 | | // Returns an empty definition object with a null-Name on error. |
57 | 3.03k | MacroExpander::Definition parse() { |
58 | 3.03k | if (Current->isNot(tok::identifier)) |
59 | 0 | return {}; |
60 | 3.03k | Def.Name = Current->TokenText; |
61 | 3.03k | nextToken(); |
62 | 3.03k | if (Current->is(tok::l_paren)) { |
63 | 3.03k | Def.ObjectLike = false; |
64 | 3.03k | if (!parseParams()) |
65 | 0 | return {}; |
66 | 3.03k | } |
67 | 3.03k | if (!parseExpansion()) |
68 | 0 | return {}; |
69 | | |
70 | 3.03k | return Def; |
71 | 3.03k | } |
72 | | |
73 | | private: |
74 | 3.03k | bool parseParams() { |
75 | 3.03k | assert(Current->is(tok::l_paren)); |
76 | 0 | nextToken(); |
77 | 9.09k | while (Current->is(tok::identifier)) { |
78 | 9.09k | Def.Params.push_back(Current); |
79 | 9.09k | Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; |
80 | 9.09k | nextToken(); |
81 | 9.09k | if (Current->isNot(tok::comma)) |
82 | 3.03k | break; |
83 | 6.06k | nextToken(); |
84 | 6.06k | } |
85 | 3.03k | if (Current->isNot(tok::r_paren)) |
86 | 0 | return false; |
87 | 3.03k | nextToken(); |
88 | 3.03k | return true; |
89 | 3.03k | } |
90 | | |
91 | 3.03k | bool parseExpansion() { |
92 | 3.03k | if (!Current->isOneOf(tok::equal, tok::eof)) |
93 | 0 | return false; |
94 | 3.03k | if (Current->is(tok::equal)) |
95 | 3.03k | nextToken(); |
96 | 3.03k | parseTail(); |
97 | 3.03k | return true; |
98 | 3.03k | } |
99 | | |
100 | 3.03k | void parseTail() { |
101 | 24.2k | while (Current->isNot(tok::eof)) { |
102 | 21.2k | Def.Body.push_back(Current); |
103 | 21.2k | nextToken(); |
104 | 21.2k | } |
105 | 3.03k | Def.Body.push_back(Current); |
106 | 3.03k | } |
107 | | |
108 | 48.5k | void nextToken() { |
109 | 48.5k | if (Pos + 1 < Tokens.size()) |
110 | 48.5k | ++Pos; |
111 | 48.5k | Current = Tokens[Pos]; |
112 | 48.5k | Current->Finalized = true; |
113 | 48.5k | } |
114 | | |
115 | | size_t Pos = 0; |
116 | | FormatToken *Current = nullptr; |
117 | | Definition Def; |
118 | | ArrayRef<FormatToken *> Tokens; |
119 | | }; |
120 | | |
121 | | MacroExpander::MacroExpander( |
122 | | const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr, |
123 | | const FormatStyle &Style, |
124 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
125 | | IdentifierTable &IdentTable) |
126 | | : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), |
127 | 1.01k | IdentTable(IdentTable) { |
128 | 1.01k | for (const std::string &Macro : Macros) |
129 | 3.03k | parseDefinition(Macro); |
130 | 1.01k | } |
131 | | |
132 | 1.01k | MacroExpander::~MacroExpander() = default; |
133 | | |
134 | 3.03k | void MacroExpander::parseDefinition(const std::string &Macro) { |
135 | 3.03k | Buffers.push_back( |
136 | 3.03k | llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>")); |
137 | 3.03k | clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef()); |
138 | 3.03k | FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, |
139 | 3.03k | Allocator, IdentTable); |
140 | 3.03k | const auto Tokens = Lex.lex(); |
141 | 3.03k | if (!Tokens.empty()) { |
142 | 3.03k | DefinitionParser Parser(Tokens); |
143 | 3.03k | auto Definition = Parser.parse(); |
144 | 3.03k | if (Definition.ObjectLike) { |
145 | 0 | ObjectLike[Definition.Name] = std::move(Definition); |
146 | 3.03k | } else { |
147 | 3.03k | FunctionLike[Definition.Name][Definition.Params.size()] = |
148 | 3.03k | std::move(Definition); |
149 | 3.03k | } |
150 | 3.03k | } |
151 | 3.03k | } |
152 | | |
153 | 6.93M | bool MacroExpander::defined(llvm::StringRef Name) const { |
154 | 6.93M | return FunctionLike.contains(Name) || ObjectLike.contains(Name); |
155 | 6.93M | } |
156 | | |
157 | 0 | bool MacroExpander::objectLike(llvm::StringRef Name) const { |
158 | 0 | return ObjectLike.contains(Name); |
159 | 0 | } |
160 | | |
161 | 0 | bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const { |
162 | 0 | auto it = FunctionLike.find(Name); |
163 | 0 | return it != FunctionLike.end() && it->second.contains(Arity); |
164 | 0 | } |
165 | | |
166 | | llvm::SmallVector<FormatToken *, 8> |
167 | | MacroExpander::expand(FormatToken *ID, |
168 | 0 | std::optional<ArgsList> OptionalArgs) const { |
169 | 0 | if (OptionalArgs) |
170 | 0 | assert(hasArity(ID->TokenText, OptionalArgs->size())); |
171 | 0 | else |
172 | 0 | assert(objectLike(ID->TokenText)); |
173 | 0 | const Definition &Def = OptionalArgs |
174 | 0 | ? FunctionLike.find(ID->TokenText) |
175 | 0 | ->second.find(OptionalArgs.value().size()) |
176 | 0 | ->second |
177 | 0 | : ObjectLike.find(ID->TokenText)->second; |
178 | 0 | ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); |
179 | 0 | SmallVector<FormatToken *, 8> Result; |
180 | | // Expand each argument at most once. |
181 | 0 | llvm::StringSet<> ExpandedArgs; |
182 | | |
183 | | // Adds the given token to Result. |
184 | 0 | auto pushToken = [&](FormatToken *Tok) { |
185 | 0 | Tok->MacroCtx->ExpandedFrom.push_back(ID); |
186 | 0 | Result.push_back(Tok); |
187 | 0 | }; |
188 | | |
189 | | // If Tok references a parameter, adds the corresponding argument to Result. |
190 | | // Returns false if Tok does not reference a parameter. |
191 | 0 | auto expandArgument = [&](FormatToken *Tok) -> bool { |
192 | | // If the current token references a parameter, expand the corresponding |
193 | | // argument. |
194 | 0 | if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText)) |
195 | 0 | return false; |
196 | 0 | ExpandedArgs.insert(Tok->TokenText); |
197 | 0 | auto I = Def.ArgMap.find(Tok->TokenText); |
198 | 0 | if (I == Def.ArgMap.end()) |
199 | 0 | return false; |
200 | | // If there are fewer arguments than referenced parameters, treat the |
201 | | // parameter as empty. |
202 | | // FIXME: Potentially fully abort the expansion instead. |
203 | 0 | if (I->getValue() >= Args.size()) |
204 | 0 | return true; |
205 | 0 | for (FormatToken *Arg : Args[I->getValue()]) { |
206 | | // A token can be part of a macro argument at multiple levels. |
207 | | // For example, with "ID(x) x": |
208 | | // in ID(ID(x)), 'x' is expanded first as argument to the inner |
209 | | // ID, then again as argument to the outer ID. We keep the macro |
210 | | // role the token had from the inner expansion. |
211 | 0 | if (!Arg->MacroCtx) |
212 | 0 | Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); |
213 | 0 | pushToken(Arg); |
214 | 0 | } |
215 | 0 | return true; |
216 | 0 | }; |
217 | | |
218 | | // Expand the definition into Result. |
219 | 0 | for (FormatToken *Tok : Def.Body) { |
220 | 0 | if (expandArgument(Tok)) |
221 | 0 | continue; |
222 | | // Create a copy of the tokens from the macro body, i.e. were not provided |
223 | | // by user code. |
224 | 0 | FormatToken *New = new (Allocator.Allocate()) FormatToken; |
225 | 0 | New->copyFrom(*Tok); |
226 | 0 | assert(!New->MacroCtx); |
227 | | // Tokens that are not part of the user code are not formatted. |
228 | 0 | New->MacroCtx = MacroExpansion(MR_Hidden); |
229 | 0 | pushToken(New); |
230 | 0 | } |
231 | 0 | assert(Result.size() >= 1 && Result.back()->is(tok::eof)); |
232 | 0 | if (Result.size() > 1) { |
233 | 0 | ++Result[0]->MacroCtx->StartOfExpansion; |
234 | 0 | ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; |
235 | 0 | } |
236 | 0 | return Result; |
237 | 0 | } |
238 | | |
239 | | } // namespace format |
240 | | } // namespace clang |