/src/solidity/libevmasm/Assembly.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | This file is part of solidity. |
3 | | |
4 | | solidity is free software: you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License as published by |
6 | | the Free Software Foundation, either version 3 of the License, or |
7 | | (at your option) any later version. |
8 | | |
9 | | solidity is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with solidity. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | // SPDX-License-Identifier: GPL-3.0 |
18 | | /** @file Assembly.cpp |
19 | | * @author Gav Wood <i@gavwood.com> |
20 | | * @date 2014 |
21 | | */ |
22 | | |
23 | | #include <libevmasm/Assembly.h> |
24 | | |
25 | | #include <libevmasm/CommonSubexpressionEliminator.h> |
26 | | #include <libevmasm/ControlFlowGraph.h> |
27 | | #include <libevmasm/PeepholeOptimiser.h> |
28 | | #include <libevmasm/Inliner.h> |
29 | | #include <libevmasm/JumpdestRemover.h> |
30 | | #include <libevmasm/BlockDeduplicator.h> |
31 | | #include <libevmasm/ConstantOptimiser.h> |
32 | | |
33 | | #include <liblangutil/CharStream.h> |
34 | | #include <liblangutil/Exceptions.h> |
35 | | |
36 | | #include <libsolutil/JSON.h> |
37 | | #include <libsolutil/StringUtils.h> |
38 | | |
39 | | #include <fmt/format.h> |
40 | | |
41 | | #include <range/v3/algorithm/any_of.hpp> |
42 | | #include <range/v3/view/drop_exactly.hpp> |
43 | | #include <range/v3/view/enumerate.hpp> |
44 | | #include <range/v3/view/map.hpp> |
45 | | |
46 | | #include <fstream> |
47 | | #include <limits> |
48 | | #include <iterator> |
49 | | #include <stack> |
50 | | |
51 | | using namespace solidity; |
52 | | using namespace solidity::evmasm; |
53 | | using namespace solidity::langutil; |
54 | | using namespace solidity::util; |
55 | | |
56 | | namespace |
57 | | { |
58 | | |
59 | | /// Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode, |
60 | | /// this class can be instantiated in that scope. It will record the current bytecode size (before addition) |
61 | | /// and, at destruction time, record the new bytecode size. This information is then added to an external |
62 | | /// instruction locations vector. |
63 | | /// If the instruction decomposes into multiple individual evm instructions, `emit` can be |
64 | | /// called for all but the last one (which will be emitted by the destructor). |
65 | | class InstructionLocationEmitter |
66 | | { |
67 | | public: |
68 | | InstructionLocationEmitter( |
69 | | std::vector<LinkerObject::InstructionLocation>& _instructionLocations, |
70 | | bytes const& _bytecode, |
71 | | size_t const _assemblyItemIndex |
72 | | ): |
73 | | m_instructionLocations(_instructionLocations), |
74 | | m_bytecode(_bytecode), |
75 | | m_assemblyItemIndex(_assemblyItemIndex), |
76 | | m_instructionLocationStart(_bytecode.size()) |
77 | 19.1M | {} |
78 | | |
79 | | ~InstructionLocationEmitter() |
80 | 19.1M | { |
81 | 19.1M | emit(); |
82 | 19.1M | } |
83 | | |
84 | | void emit() |
85 | 19.1M | { |
86 | 19.1M | auto const end = m_bytecode.size(); |
87 | 19.1M | m_instructionLocations.push_back(LinkerObject::InstructionLocation{ |
88 | 19.1M | .start = m_instructionLocationStart, |
89 | 19.1M | .end = end, |
90 | 19.1M | .assemblyItemIndex = m_assemblyItemIndex |
91 | 19.1M | }); |
92 | 19.1M | m_instructionLocationStart = end; |
93 | 19.1M | } |
94 | | |
95 | | private: |
96 | | std::vector<LinkerObject::InstructionLocation>& m_instructionLocations; |
97 | | bytes const& m_bytecode; |
98 | | size_t const m_assemblyItemIndex{}; |
99 | | size_t m_instructionLocationStart{}; |
100 | | }; |
101 | | |
102 | | } |
103 | | |
104 | | std::map<std::string, std::shared_ptr<std::string const>> Assembly::s_sharedSourceNames; |
105 | | |
106 | | AssemblyItem const& Assembly::append(AssemblyItem _i) |
107 | 20.3M | { |
108 | 20.3M | assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); |
109 | 20.3M | m_deposit += static_cast<int>(_i.deposit()); |
110 | 20.3M | solAssert(m_currentCodeSection < m_codeSections.size()); |
111 | 20.3M | auto& currentItems = m_codeSections.at(m_currentCodeSection).items; |
112 | 20.3M | currentItems.emplace_back(std::move(_i)); |
113 | 20.3M | if (!currentItems.back().location().isValid() && m_currentSourceLocation.isValid()) |
114 | 20.1M | currentItems.back().setLocation(m_currentSourceLocation); |
115 | 20.3M | currentItems.back().m_modifierDepth = m_currentModifierDepth; |
116 | 20.3M | return currentItems.back(); |
117 | 20.3M | } |
118 | | |
119 | | unsigned Assembly::codeSize(unsigned subTagSize) const |
120 | 86.6k | { |
121 | 104k | for (unsigned tagSize = subTagSize; true; ++tagSize) |
122 | 104k | { |
123 | 104k | size_t ret = 1; |
124 | 104k | for (auto const& i: m_data) |
125 | 18.9k | ret += i.second.size(); |
126 | | |
127 | 104k | for (auto const& codeSection: m_codeSections) |
128 | 104k | for (AssemblyItem const& i: codeSection.items) |
129 | 38.9M | ret += i.bytesRequired(tagSize, m_evmVersion, Precision::Precise); |
130 | 104k | if (numberEncodingSize(ret) <= tagSize) |
131 | 86.6k | return static_cast<unsigned>(ret); |
132 | 104k | } |
133 | 86.6k | } |
134 | | |
135 | | void Assembly::importAssemblyItemsFromJSON(Json const& _code, std::vector<std::string> const& _sourceList) |
136 | 0 | { |
137 | | // Assembly constructor creates first code section with proper type and empty `items` |
138 | 0 | solAssert(m_codeSections.size() == 1); |
139 | 0 | solAssert(m_codeSections[0].items.empty()); |
140 | | // TODO: Add support for EOF and more than one code sections. |
141 | 0 | solUnimplementedAssert(!m_eofVersion.has_value(), "Assembly import for EOF is not yet implemented."); |
142 | 0 | solRequire(_code.is_array(), AssemblyImportException, "Supplied JSON is not an array."); |
143 | 0 | for (auto jsonItemIter = std::begin(_code); jsonItemIter != std::end(_code); ++jsonItemIter) |
144 | 0 | { |
145 | 0 | AssemblyItem const& newItem = m_codeSections[0].items.emplace_back(createAssemblyItemFromJSON(*jsonItemIter, _sourceList)); |
146 | 0 | if (newItem == Instruction::JUMPDEST) |
147 | 0 | solThrow(AssemblyImportException, "JUMPDEST instruction without a tag"); |
148 | 0 | else if (newItem.type() == AssemblyItemType::Tag) |
149 | 0 | { |
150 | 0 | ++jsonItemIter; |
151 | 0 | if (jsonItemIter != std::end(_code) && createAssemblyItemFromJSON(*jsonItemIter, _sourceList) != Instruction::JUMPDEST) |
152 | 0 | solThrow(AssemblyImportException, "JUMPDEST expected after tag."); |
153 | 0 | } |
154 | 0 | } |
155 | 0 | } |
156 | | |
157 | | AssemblyItem Assembly::createAssemblyItemFromJSON(Json const& _json, std::vector<std::string> const& _sourceList) |
158 | 0 | { |
159 | 0 | solRequire(_json.is_object(), AssemblyImportException, "Supplied JSON is not an object."); |
160 | 0 | static std::set<std::string> const validMembers{"name", "begin", "end", "source", "value", "modifierDepth", "jumpType"}; |
161 | 0 | for (auto const& [member, _]: _json.items()) |
162 | 0 | solRequire( |
163 | 0 | validMembers.count(member), |
164 | 0 | AssemblyImportException, |
165 | 0 | fmt::format( |
166 | 0 | "Unknown member '{}'. Valid members are: {}.", |
167 | 0 | member, |
168 | 0 | solidity::util::joinHumanReadable(validMembers, ", ") |
169 | 0 | ) |
170 | 0 | ); |
171 | 0 | solRequire(isOfType<std::string>(_json["name"]), AssemblyImportException, "Member 'name' missing or not of type string."); |
172 | 0 | solRequire(isOfTypeIfExists<int>(_json, "begin"), AssemblyImportException, "Optional member 'begin' not of type int."); |
173 | 0 | solRequire(isOfTypeIfExists<int>(_json, "end"), AssemblyImportException, "Optional member 'end' not of type int."); |
174 | 0 | solRequire(isOfTypeIfExists<int>(_json, "source"), AssemblyImportException, "Optional member 'source' not of type int."); |
175 | 0 | solRequire(isOfTypeIfExists<std::string>(_json, "value"), AssemblyImportException, "Optional member 'value' not of type string."); |
176 | 0 | solRequire(isOfTypeIfExists<int>(_json, "modifierDepth"), AssemblyImportException, "Optional member 'modifierDepth' not of type int."); |
177 | 0 | solRequire(isOfTypeIfExists<std::string>(_json, "jumpType"), AssemblyImportException, "Optional member 'jumpType' not of type string."); |
178 | | |
179 | 0 | std::string name = get<std::string>(_json["name"]); |
180 | 0 | solRequire(!name.empty(), AssemblyImportException, "Member 'name' is empty."); |
181 | | |
182 | 0 | SourceLocation location; |
183 | 0 | if (_json.contains("begin")) |
184 | 0 | location.start = get<int>(_json["begin"]); |
185 | 0 | if (_json.contains("end")) |
186 | 0 | location.end = get<int>(_json["end"]); |
187 | 0 | int srcIndex = getOrDefault<int>(_json, "source", -1); |
188 | 0 | size_t modifierDepth = static_cast<size_t>(getOrDefault<int>(_json, "modifierDepth", 0)); |
189 | 0 | std::string value = getOrDefault<std::string>(_json, "value", ""); |
190 | 0 | std::string jumpType = getOrDefault<std::string>(_json, "jumpType", ""); |
191 | |
|
192 | 0 | auto updateUsedTags = [&](u256 const& data) |
193 | 0 | { |
194 | 0 | m_usedTags = std::max(m_usedTags, static_cast<unsigned>(data) + 1); |
195 | 0 | return data; |
196 | 0 | }; |
197 | |
|
198 | 0 | auto storeImmutableHash = [&](std::string const& _immutableName) -> h256 |
199 | 0 | { |
200 | 0 | h256 hash(util::keccak256(_immutableName)); |
201 | 0 | solAssert(m_immutables.count(hash) == 0 || m_immutables[hash] == _immutableName); |
202 | 0 | m_immutables[hash] = _immutableName; |
203 | 0 | return hash; |
204 | 0 | }; |
205 | |
|
206 | 0 | auto storeLibraryHash = [&](std::string const& _libraryName) -> h256 |
207 | 0 | { |
208 | 0 | h256 hash(util::keccak256(_libraryName)); |
209 | 0 | solAssert(m_libraries.count(hash) == 0 || m_libraries[hash] == _libraryName); |
210 | 0 | m_libraries[hash] = _libraryName; |
211 | 0 | return hash; |
212 | 0 | }; |
213 | |
|
214 | 0 | auto requireValueDefinedForInstruction = [&](std::string const& _name, std::string const& _value) |
215 | 0 | { |
216 | 0 | solRequire( |
217 | 0 | !_value.empty(), |
218 | 0 | AssemblyImportException, |
219 | 0 | "Member 'value' is missing for instruction '" + _name + "', but the instruction needs a value." |
220 | 0 | ); |
221 | 0 | }; |
222 | |
|
223 | 0 | auto requireValueUndefinedForInstruction = [&](std::string const& _name, std::string const& _value) |
224 | 0 | { |
225 | 0 | solRequire( |
226 | 0 | _value.empty(), |
227 | 0 | AssemblyImportException, |
228 | 0 | "Member 'value' defined for instruction '" + _name + "', but the instruction does not need a value." |
229 | 0 | ); |
230 | 0 | }; |
231 | |
|
232 | 0 | solRequire(srcIndex >= -1 && srcIndex < static_cast<int>(_sourceList.size()), AssemblyImportException, "Source index out of bounds."); |
233 | 0 | if (srcIndex != -1) |
234 | 0 | location.sourceName = sharedSourceName(_sourceList[static_cast<size_t>(srcIndex)]); |
235 | |
|
236 | 0 | AssemblyItem result(0); |
237 | |
|
238 | 0 | if (c_instructions.count(name)) |
239 | 0 | { |
240 | 0 | AssemblyItem item{c_instructions.at(name), langutil::DebugData::create(location)}; |
241 | 0 | if (!jumpType.empty()) |
242 | 0 | { |
243 | 0 | if (item.instruction() == Instruction::JUMP || item.instruction() == Instruction::JUMPI) |
244 | 0 | { |
245 | 0 | std::optional<AssemblyItem::JumpType> parsedJumpType = AssemblyItem::parseJumpType(jumpType); |
246 | 0 | if (!parsedJumpType.has_value()) |
247 | 0 | solThrow(AssemblyImportException, "Invalid jump type."); |
248 | 0 | item.setJumpType(parsedJumpType.value()); |
249 | 0 | } |
250 | 0 | else |
251 | 0 | solThrow( |
252 | 0 | AssemblyImportException, |
253 | 0 | "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')" |
254 | 0 | ); |
255 | 0 | } |
256 | 0 | requireValueUndefinedForInstruction(name, value); |
257 | 0 | result = item; |
258 | 0 | } |
259 | 0 | else |
260 | 0 | { |
261 | 0 | solRequire( |
262 | 0 | jumpType.empty(), |
263 | 0 | AssemblyImportException, |
264 | 0 | "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')" |
265 | 0 | ); |
266 | 0 | if (name == "PUSH") |
267 | 0 | { |
268 | 0 | requireValueDefinedForInstruction(name, value); |
269 | 0 | result = {AssemblyItemType::Push, u256("0x" + value)}; |
270 | 0 | } |
271 | 0 | else if (name == "PUSH [ErrorTag]") |
272 | 0 | { |
273 | 0 | requireValueUndefinedForInstruction(name, value); |
274 | 0 | result = {AssemblyItemType::PushTag, 0}; |
275 | 0 | } |
276 | 0 | else if (name == "PUSH [tag]") |
277 | 0 | { |
278 | 0 | requireValueDefinedForInstruction(name, value); |
279 | 0 | result = {AssemblyItemType::PushTag, updateUsedTags(u256(value))}; |
280 | 0 | } |
281 | 0 | else if (name == "PUSH [$]") |
282 | 0 | { |
283 | 0 | requireValueDefinedForInstruction(name, value); |
284 | 0 | result = {AssemblyItemType::PushSub, u256("0x" + value)}; |
285 | 0 | } |
286 | 0 | else if (name == "PUSH #[$]") |
287 | 0 | { |
288 | 0 | requireValueDefinedForInstruction(name, value); |
289 | 0 | result = {AssemblyItemType::PushSubSize, u256("0x" + value)}; |
290 | 0 | } |
291 | 0 | else if (name == "PUSHSIZE") |
292 | 0 | { |
293 | 0 | requireValueUndefinedForInstruction(name, value); |
294 | 0 | result = {AssemblyItemType::PushProgramSize, 0}; |
295 | 0 | } |
296 | 0 | else if (name == "PUSHLIB") |
297 | 0 | { |
298 | 0 | requireValueDefinedForInstruction(name, value); |
299 | 0 | result = {AssemblyItemType::PushLibraryAddress, storeLibraryHash(value)}; |
300 | 0 | } |
301 | 0 | else if (name == "PUSHDEPLOYADDRESS") |
302 | 0 | { |
303 | 0 | requireValueUndefinedForInstruction(name, value); |
304 | 0 | result = {AssemblyItemType::PushDeployTimeAddress, 0}; |
305 | 0 | } |
306 | 0 | else if (name == "PUSHIMMUTABLE") |
307 | 0 | { |
308 | 0 | requireValueDefinedForInstruction(name, value); |
309 | 0 | result = {AssemblyItemType::PushImmutable, storeImmutableHash(value)}; |
310 | 0 | } |
311 | 0 | else if (name == "ASSIGNIMMUTABLE") |
312 | 0 | { |
313 | 0 | requireValueDefinedForInstruction(name, value); |
314 | 0 | result = {AssemblyItemType::AssignImmutable, storeImmutableHash(value)}; |
315 | 0 | } |
316 | 0 | else if (name == "tag") |
317 | 0 | { |
318 | 0 | requireValueDefinedForInstruction(name, value); |
319 | 0 | result = {AssemblyItemType::Tag, updateUsedTags(u256(value))}; |
320 | 0 | } |
321 | 0 | else if (name == "PUSH data") |
322 | 0 | { |
323 | 0 | requireValueDefinedForInstruction(name, value); |
324 | 0 | result = {AssemblyItemType::PushData, u256("0x" + value)}; |
325 | 0 | } |
326 | 0 | else if (name == "VERBATIM") |
327 | 0 | { |
328 | 0 | requireValueDefinedForInstruction(name, value); |
329 | 0 | AssemblyItem item(fromHex(value), 0, 0); |
330 | 0 | result = item; |
331 | 0 | } |
332 | 0 | else |
333 | 0 | solThrow(AssemblyImportException, "Invalid opcode (" + name + ")"); |
334 | 0 | } |
335 | 0 | result.setLocation(location); |
336 | 0 | result.m_modifierDepth = modifierDepth; |
337 | 0 | return result; |
338 | 0 | } |
339 | | |
340 | | namespace |
341 | | { |
342 | | |
343 | | std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) |
344 | 0 | { |
345 | 0 | if (!_location.hasText() || _sourceCodes.empty()) |
346 | 0 | return {}; |
347 | | |
348 | 0 | auto it = _sourceCodes.find(*_location.sourceName); |
349 | 0 | if (it == _sourceCodes.end()) |
350 | 0 | return {}; |
351 | | |
352 | 0 | return CharStream::singleLineSnippet(it->second, _location); |
353 | 0 | } |
354 | | |
355 | | class Functionalizer |
356 | | { |
357 | | public: |
358 | | Functionalizer (std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes, Assembly const& _assembly): |
359 | | m_out(_out), m_prefix(_prefix), m_sourceCodes(_sourceCodes), m_assembly(_assembly) |
360 | 0 | {} |
361 | | |
362 | | void feed(AssemblyItem const& _item, DebugInfoSelection const& _debugInfoSelection) |
363 | 0 | { |
364 | 0 | if (_item.location().isValid() && _item.location() != m_location) |
365 | 0 | { |
366 | 0 | flush(); |
367 | 0 | m_location = _item.location(); |
368 | 0 | printLocation(_debugInfoSelection); |
369 | 0 | } |
370 | |
|
371 | 0 | std::string expression = _item.toAssemblyText(m_assembly); |
372 | |
|
373 | 0 | if (!( |
374 | 0 | _item.canBeFunctional() && |
375 | 0 | _item.returnValues() <= 1 && |
376 | 0 | _item.arguments() <= m_pending.size() |
377 | 0 | )) |
378 | 0 | { |
379 | 0 | flush(); |
380 | 0 | m_out << m_prefix << (_item.type() == Tag ? "" : " ") << expression << std::endl; |
381 | 0 | return; |
382 | 0 | } |
383 | 0 | if (_item.arguments() > 0) |
384 | 0 | { |
385 | 0 | expression += "("; |
386 | 0 | for (size_t i = 0; i < _item.arguments(); ++i) |
387 | 0 | { |
388 | 0 | expression += m_pending.back(); |
389 | 0 | m_pending.pop_back(); |
390 | 0 | if (i + 1 < _item.arguments()) |
391 | 0 | expression += ", "; |
392 | 0 | } |
393 | 0 | expression += ")"; |
394 | 0 | } |
395 | |
|
396 | 0 | m_pending.push_back(expression); |
397 | 0 | if (_item.returnValues() != 1) |
398 | 0 | flush(); |
399 | 0 | } |
400 | | |
401 | | void flush() |
402 | 0 | { |
403 | 0 | for (std::string const& expression: m_pending) |
404 | 0 | m_out << m_prefix << " " << expression << std::endl; |
405 | 0 | m_pending.clear(); |
406 | 0 | } |
407 | | |
408 | | void printLocation(DebugInfoSelection const& _debugInfoSelection) |
409 | 0 | { |
410 | 0 | if (!m_location.isValid() || (!_debugInfoSelection.location && !_debugInfoSelection.snippet)) |
411 | 0 | return; |
412 | | |
413 | 0 | m_out << m_prefix << " /*"; |
414 | |
|
415 | 0 | if (_debugInfoSelection.location) |
416 | 0 | { |
417 | 0 | if (m_location.sourceName) |
418 | 0 | m_out << " " + escapeAndQuoteString(*m_location.sourceName); |
419 | 0 | if (m_location.hasText()) |
420 | 0 | m_out << ":" << std::to_string(m_location.start) + ":" + std::to_string(m_location.end); |
421 | 0 | } |
422 | |
|
423 | 0 | if (_debugInfoSelection.snippet) |
424 | 0 | { |
425 | 0 | if (_debugInfoSelection.location) |
426 | 0 | m_out << " "; |
427 | |
|
428 | 0 | m_out << locationFromSources(m_sourceCodes, m_location); |
429 | 0 | } |
430 | |
|
431 | 0 | m_out << " */" << std::endl; |
432 | 0 | } |
433 | | |
434 | | private: |
435 | | strings m_pending; |
436 | | SourceLocation m_location; |
437 | | |
438 | | std::ostream& m_out; |
439 | | std::string const& m_prefix; |
440 | | StringMap const& m_sourceCodes; |
441 | | Assembly const& m_assembly; |
442 | | }; |
443 | | |
444 | | } |
445 | | |
446 | | void Assembly::assemblyStream( |
447 | | std::ostream& _out, |
448 | | DebugInfoSelection const& _debugInfoSelection, |
449 | | std::string const& _prefix, |
450 | | StringMap const& _sourceCodes |
451 | | ) const |
452 | 0 | { |
453 | 0 | Functionalizer f(_out, _prefix, _sourceCodes, *this); |
454 | |
|
455 | 0 | for (auto const& i: m_codeSections.front().items) |
456 | 0 | f.feed(i, _debugInfoSelection); |
457 | 0 | f.flush(); |
458 | |
|
459 | 0 | for (size_t i = 1; i < m_codeSections.size(); ++i) |
460 | 0 | { |
461 | 0 | _out << std::endl << _prefix << "code_section_" << i << ": assembly {\n"; |
462 | 0 | Functionalizer codeSectionF(_out, _prefix + " ", _sourceCodes, *this); |
463 | 0 | for (auto const& item: m_codeSections[i].items) |
464 | 0 | codeSectionF.feed(item, _debugInfoSelection); |
465 | 0 | codeSectionF.flush(); |
466 | 0 | _out << _prefix << "}" << std::endl; |
467 | 0 | } |
468 | |
|
469 | 0 | if (!m_data.empty() || !m_subs.empty()) |
470 | 0 | { |
471 | 0 | _out << _prefix << "stop" << std::endl; |
472 | 0 | for (auto const& i: m_data) |
473 | 0 | if (u256(i.first) >= m_subs.size()) |
474 | 0 | _out << _prefix << "data_" << toHex(u256(i.first)) << " " << util::toHex(i.second) << std::endl; |
475 | |
|
476 | 0 | for (size_t i = 0; i < m_subs.size(); ++i) |
477 | 0 | { |
478 | 0 | _out << std::endl << _prefix << "sub_" << i << ": assembly {\n"; |
479 | 0 | m_subs[i]->assemblyStream(_out, _debugInfoSelection, _prefix + " ", _sourceCodes); |
480 | 0 | _out << _prefix << "}" << std::endl; |
481 | 0 | } |
482 | 0 | } |
483 | |
|
484 | 0 | if (m_auxiliaryData.size() > 0) |
485 | 0 | _out << std::endl << _prefix << "auxdata: 0x" << util::toHex(m_auxiliaryData) << std::endl; |
486 | 0 | } |
487 | | |
488 | | std::string Assembly::assemblyString( |
489 | | DebugInfoSelection const& _debugInfoSelection, |
490 | | StringMap const& _sourceCodes |
491 | | ) const |
492 | 0 | { |
493 | 0 | std::ostringstream tmp; |
494 | 0 | assemblyStream(tmp, _debugInfoSelection, "", _sourceCodes); |
495 | 0 | return (_debugInfoSelection.ethdebug ? "/// ethdebug: enabled\n" : "") + tmp.str(); |
496 | 0 | } |
497 | | |
498 | | Json Assembly::assemblyJSON(std::map<std::string, unsigned> const& _sourceIndices, bool _includeSourceList) const |
499 | 0 | { |
500 | 0 | Json root; |
501 | 0 | root[".code"] = Json::array(); |
502 | 0 | Json& code = root[".code"]; |
503 | | // TODO: support EOF |
504 | 0 | solUnimplementedAssert(!m_eofVersion.has_value(), "Assembly output for EOF is not yet implemented."); |
505 | 0 | solAssert(m_codeSections.size() == 1); |
506 | 0 | for (AssemblyItem const& item: m_codeSections.front().items) |
507 | 0 | { |
508 | 0 | int sourceIndex = -1; |
509 | 0 | if (item.location().sourceName) |
510 | 0 | { |
511 | 0 | auto iter = _sourceIndices.find(*item.location().sourceName); |
512 | 0 | if (iter != _sourceIndices.end()) |
513 | 0 | sourceIndex = static_cast<int>(iter->second); |
514 | 0 | } |
515 | |
|
516 | 0 | auto [name, data] = item.nameAndData(m_evmVersion); |
517 | 0 | Json jsonItem; |
518 | 0 | jsonItem["name"] = name; |
519 | 0 | jsonItem["begin"] = item.location().start; |
520 | 0 | jsonItem["end"] = item.location().end; |
521 | 0 | if (item.m_modifierDepth != 0) |
522 | 0 | jsonItem["modifierDepth"] = static_cast<int>(item.m_modifierDepth); |
523 | 0 | std::string jumpType = item.getJumpTypeAsString(); |
524 | 0 | if (!jumpType.empty()) |
525 | 0 | jsonItem["jumpType"] = jumpType; |
526 | 0 | if (name == "PUSHLIB") |
527 | 0 | data = m_libraries.at(h256(data)); |
528 | 0 | else if (name == "PUSHIMMUTABLE" || name == "ASSIGNIMMUTABLE") |
529 | 0 | data = m_immutables.at(h256(data)); |
530 | 0 | if (!data.empty()) |
531 | 0 | jsonItem["value"] = data; |
532 | 0 | jsonItem["source"] = sourceIndex; |
533 | 0 | code.emplace_back(std::move(jsonItem)); |
534 | |
|
535 | 0 | if (item.type() == AssemblyItemType::Tag) |
536 | 0 | { |
537 | 0 | Json jumpdest; |
538 | 0 | jumpdest["name"] = "JUMPDEST"; |
539 | 0 | jumpdest["begin"] = item.location().start; |
540 | 0 | jumpdest["end"] = item.location().end; |
541 | 0 | jumpdest["source"] = sourceIndex; |
542 | 0 | if (item.m_modifierDepth != 0) |
543 | 0 | jumpdest["modifierDepth"] = static_cast<int>(item.m_modifierDepth); |
544 | 0 | code.emplace_back(std::move(jumpdest)); |
545 | 0 | } |
546 | 0 | } |
547 | 0 | if (_includeSourceList) |
548 | 0 | { |
549 | 0 | root["sourceList"] = Json::array(); |
550 | 0 | Json& jsonSourceList = root["sourceList"]; |
551 | 0 | unsigned maxSourceIndex = 0; |
552 | 0 | for (auto const& [sourceName, sourceIndex]: _sourceIndices) |
553 | 0 | { |
554 | 0 | maxSourceIndex = std::max(sourceIndex, maxSourceIndex); |
555 | 0 | jsonSourceList[sourceIndex] = sourceName; |
556 | 0 | } |
557 | 0 | solAssert(maxSourceIndex + 1 >= _sourceIndices.size()); |
558 | 0 | solRequire( |
559 | 0 | _sourceIndices.size() == 0 || _sourceIndices.size() == maxSourceIndex + 1, |
560 | 0 | AssemblyImportException, |
561 | 0 | "The 'sourceList' array contains invalid 'null' item." |
562 | 0 | ); |
563 | 0 | } |
564 | | |
565 | 0 | if (!m_data.empty() || !m_subs.empty()) |
566 | 0 | { |
567 | 0 | root[".data"] = Json::object(); |
568 | 0 | Json& data = root[".data"]; |
569 | 0 | for (auto const& i: m_data) |
570 | 0 | if (u256(i.first) >= m_subs.size()) |
571 | 0 | data[util::toHex(toBigEndian((u256)i.first), util::HexPrefix::DontAdd, util::HexCase::Upper)] = util::toHex(i.second); |
572 | |
|
573 | 0 | for (size_t i = 0; i < m_subs.size(); ++i) |
574 | 0 | { |
575 | 0 | std::stringstream hexStr; |
576 | 0 | hexStr << std::hex << i; |
577 | 0 | data[hexStr.str()] = m_subs[i]->assemblyJSON(_sourceIndices, /*_includeSourceList = */false); |
578 | 0 | } |
579 | 0 | } |
580 | |
|
581 | 0 | if (!m_auxiliaryData.empty()) |
582 | 0 | root[".auxdata"] = util::toHex(m_auxiliaryData); |
583 | |
|
584 | 0 | return root; |
585 | 0 | } |
586 | | |
587 | | std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSON( |
588 | | Json const& _json, |
589 | | std::vector<std::string> const& _sourceList, |
590 | | size_t _level, |
591 | | std::optional<uint8_t> _eofVersion |
592 | | ) |
593 | 0 | { |
594 | 0 | solRequire(_json.is_object(), AssemblyImportException, "Supplied JSON is not an object."); |
595 | 0 | static std::set<std::string> const validMembers{".code", ".data", ".auxdata", "sourceList"}; |
596 | 0 | for (auto const& [attribute, _]: _json.items()) |
597 | 0 | solRequire(validMembers.count(attribute), AssemblyImportException, "Unknown attribute '" + attribute + "'."); |
598 | | |
599 | 0 | if (_level == 0) |
600 | 0 | { |
601 | 0 | if (_json.contains("sourceList")) |
602 | 0 | { |
603 | 0 | solRequire(_json["sourceList"].is_array(), AssemblyImportException, "Optional member 'sourceList' is not an array."); |
604 | 0 | for (Json const& sourceName: _json["sourceList"]) |
605 | 0 | { |
606 | 0 | solRequire(!sourceName.is_null(), AssemblyImportException, "The 'sourceList' array contains invalid 'null' item."); |
607 | 0 | solRequire( |
608 | 0 | sourceName.is_string(), |
609 | 0 | AssemblyImportException, |
610 | 0 | "The 'sourceList' array contains an item that is not a string." |
611 | 0 | ); |
612 | 0 | } |
613 | 0 | } |
614 | 0 | } |
615 | 0 | else |
616 | 0 | solRequire( |
617 | 0 | !_json.contains("sourceList"), |
618 | 0 | AssemblyImportException, |
619 | 0 | "Member 'sourceList' may only be present in the root JSON object." |
620 | 0 | ); |
621 | | |
622 | 0 | auto result = std::make_shared<Assembly>(EVMVersion{}, _level == 0 /* _creation */, _eofVersion, "" /* _name */); |
623 | 0 | std::vector<std::string> parsedSourceList; |
624 | 0 | if (_json.contains("sourceList")) |
625 | 0 | { |
626 | 0 | solAssert(_level == 0); |
627 | 0 | solAssert(_sourceList.empty()); |
628 | 0 | for (Json const& sourceName: _json["sourceList"]) |
629 | 0 | { |
630 | 0 | solRequire( |
631 | 0 | std::find(parsedSourceList.begin(), parsedSourceList.end(), sourceName.get<std::string>()) == parsedSourceList.end(), |
632 | 0 | AssemblyImportException, |
633 | 0 | "Items in 'sourceList' array are not unique." |
634 | 0 | ); |
635 | 0 | parsedSourceList.emplace_back(sourceName.get<std::string>()); |
636 | 0 | } |
637 | 0 | } |
638 | | |
639 | 0 | solRequire(_json.contains(".code"), AssemblyImportException, "Member '.code' is missing."); |
640 | 0 | solRequire(_json[".code"].is_array(), AssemblyImportException, "Member '.code' is not an array."); |
641 | 0 | for (Json const& codeItem: _json[".code"]) |
642 | 0 | solRequire(codeItem.is_object(), AssemblyImportException, "The '.code' array contains an item that is not an object."); |
643 | | |
644 | 0 | result->importAssemblyItemsFromJSON(_json[".code"], _level == 0 ? parsedSourceList : _sourceList); |
645 | |
|
646 | 0 | if (_json.contains(".auxdata")) |
647 | 0 | { |
648 | 0 | solRequire(_json[".auxdata"].is_string(), AssemblyImportException, "Optional member '.auxdata' is not a string."); |
649 | 0 | result->m_auxiliaryData = fromHex(_json[".auxdata"].get<std::string>()); |
650 | 0 | solRequire(!result->m_auxiliaryData.empty(), AssemblyImportException, "Optional member '.auxdata' is not a valid hexadecimal string."); |
651 | 0 | } |
652 | | |
653 | 0 | if (_json.contains(".data")) |
654 | 0 | { |
655 | 0 | solRequire(_json[".data"].is_object(), AssemblyImportException, "Optional member '.data' is not an object."); |
656 | 0 | Json const& data = _json[".data"]; |
657 | 0 | std::map<size_t, std::shared_ptr<Assembly>> subAssemblies; |
658 | 0 | for (auto const& [key, value] : data.items()) |
659 | 0 | { |
660 | 0 | if (value.is_string()) |
661 | 0 | { |
662 | 0 | solRequire( |
663 | 0 | value.get<std::string>().empty() || !fromHex(value.get<std::string>()).empty(), |
664 | 0 | AssemblyImportException, |
665 | 0 | "The value for key '" + key + "' inside '.data' is not a valid hexadecimal string." |
666 | 0 | ); |
667 | 0 | result->m_data[h256(fromHex(key))] = fromHex(value.get<std::string>()); |
668 | 0 | } |
669 | 0 | else if (value.is_object()) |
670 | 0 | { |
671 | 0 | size_t index{}; |
672 | 0 | try |
673 | 0 | { |
674 | | // Using signed variant because stoul() still accepts negative numbers and |
675 | | // just lets them wrap around. |
676 | 0 | int parsedDataItemID = std::stoi(key, nullptr, 16); |
677 | 0 | solRequire(parsedDataItemID >= 0, AssemblyImportException, "The key '" + key + "' inside '.data' is out of the supported integer range."); |
678 | 0 | index = static_cast<size_t>(parsedDataItemID); |
679 | 0 | } |
680 | 0 | catch (std::invalid_argument const&) |
681 | 0 | { |
682 | 0 | solThrow(AssemblyImportException, "The key '" + key + "' inside '.data' is not an integer."); |
683 | 0 | } |
684 | 0 | catch (std::out_of_range const&) |
685 | 0 | { |
686 | 0 | solThrow(AssemblyImportException, "The key '" + key + "' inside '.data' is out of the supported integer range."); |
687 | 0 | } |
688 | | |
689 | 0 | auto [subAssembly, emptySourceList] = Assembly::fromJSON(value, _level == 0 ? parsedSourceList : _sourceList, _level + 1, _eofVersion); |
690 | 0 | solAssert(subAssembly); |
691 | 0 | solAssert(emptySourceList.empty()); |
692 | 0 | solAssert(subAssemblies.count(index) == 0); |
693 | 0 | subAssemblies[index] = subAssembly; |
694 | 0 | } |
695 | 0 | else |
696 | 0 | solThrow(AssemblyImportException, "The value of key '" + key + "' inside '.data' is neither a hex string nor an object."); |
697 | 0 | } |
698 | | |
699 | 0 | if (!subAssemblies.empty()) |
700 | 0 | solRequire( |
701 | 0 | ranges::max(subAssemblies | ranges::views::keys) == subAssemblies.size() - 1, |
702 | 0 | AssemblyImportException, |
703 | 0 | fmt::format( |
704 | 0 | "Invalid subassembly indices in '.data'. Not all numbers between 0 and {} are present.", |
705 | 0 | subAssemblies.size() - 1 |
706 | 0 | ) |
707 | 0 | ); |
708 | | |
709 | 0 | result->m_subs = subAssemblies | ranges::views::values | ranges::to<std::vector>; |
710 | 0 | } |
711 | | |
712 | 0 | if (_level == 0) |
713 | 0 | result->encodeAllPossibleSubPathsInAssemblyTree(); |
714 | |
|
715 | 0 | return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{}); |
716 | 0 | } |
717 | | |
718 | | void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath) |
719 | 0 | { |
720 | 0 | _assembliesOnPath.push_back(this); |
721 | 0 | for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back()) |
722 | 0 | { |
723 | 0 | for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot) |
724 | 0 | _assembliesOnPath[distanceFromRoot]->encodeSubPath( |
725 | 0 | _pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector> |
726 | 0 | ); |
727 | |
|
728 | 0 | m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath); |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | | std::shared_ptr<std::string const> Assembly::sharedSourceName(std::string const& _name) const |
733 | 0 | { |
734 | 0 | if (s_sharedSourceNames.find(_name) == s_sharedSourceNames.end()) |
735 | 0 | s_sharedSourceNames[_name] = std::make_shared<std::string>(_name); |
736 | |
|
737 | 0 | return s_sharedSourceNames[_name]; |
738 | 0 | } |
739 | | |
740 | | AssemblyItem Assembly::namedTag(std::string const& _name, size_t _params, size_t _returns, std::optional<uint64_t> _sourceID) |
741 | 558k | { |
742 | 558k | assertThrow(!_name.empty(), AssemblyException, "Empty named tag."); |
743 | 558k | if (m_namedTags.count(_name)) |
744 | 220k | { |
745 | 220k | assertThrow(m_namedTags.at(_name).params == _params, AssemblyException, ""); |
746 | 220k | assertThrow(m_namedTags.at(_name).returns == _returns, AssemblyException, ""); |
747 | 220k | assertThrow(m_namedTags.at(_name).sourceID == _sourceID, AssemblyException, ""); |
748 | 220k | } |
749 | 337k | else |
750 | 337k | m_namedTags[_name] = {static_cast<size_t>(newTag().data()), _sourceID, _params, _returns}; |
751 | 558k | return AssemblyItem{Tag, m_namedTags.at(_name).id}; |
752 | 558k | } |
753 | | |
754 | | AssemblyItem Assembly::newFunctionCall(uint16_t _functionID) const |
755 | 0 | { |
756 | 0 | solAssert(_functionID < m_codeSections.size(), "Call to undeclared function."); |
757 | 0 | solAssert(_functionID > 0, "Cannot call section 0"); |
758 | 0 | auto const& section = m_codeSections.at(_functionID); |
759 | 0 | if (section.nonReturning) |
760 | 0 | return AssemblyItem::jumpToFunction(_functionID, section.inputs, section.outputs); |
761 | 0 | else |
762 | 0 | return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs); |
763 | 0 | } |
764 | | |
765 | | AssemblyItem Assembly::newFunctionReturn() const |
766 | 0 | { |
767 | 0 | solAssert(m_currentCodeSection != 0, "Appending function return without begin function."); |
768 | 0 | return AssemblyItem::functionReturn(); |
769 | 0 | } |
770 | | |
771 | | uint16_t Assembly::createFunction(uint8_t _args, uint8_t _rets, bool _nonReturning) |
772 | 0 | { |
773 | 0 | size_t functionID = m_codeSections.size(); |
774 | 0 | solRequire(functionID < 1024, AssemblyException, "Too many functions for EOF"); |
775 | 0 | solAssert(m_currentCodeSection == 0, "Functions need to be declared from the main block."); |
776 | 0 | solRequire(_rets <= 127, AssemblyException, "Too many function returns."); |
777 | 0 | solRequire(_args <= 127, AssemblyException, "Too many function inputs."); |
778 | 0 | m_codeSections.emplace_back(CodeSection{_args, _rets, _nonReturning, {}}); |
779 | 0 | return static_cast<uint16_t>(functionID); |
780 | 0 | } |
781 | | |
782 | | void Assembly::beginFunction(uint16_t _functionID) |
783 | 0 | { |
784 | 0 | solAssert(m_currentCodeSection == 0, "Attempted to begin a function before ending the last one."); |
785 | 0 | solAssert(_functionID != 0, "Attempt to begin a function with id 0"); |
786 | 0 | solAssert(_functionID < m_codeSections.size(), "Attempt to begin an undeclared function."); |
787 | 0 | auto& section = m_codeSections.at(_functionID); |
788 | 0 | solAssert(section.items.empty(), "Function already defined."); |
789 | 0 | m_currentCodeSection = _functionID; |
790 | 0 | } |
791 | | void Assembly::endFunction() |
792 | 0 | { |
793 | 0 | solAssert(m_currentCodeSection != 0, "End function without begin function."); |
794 | 0 | m_currentCodeSection = 0; |
795 | 0 | } |
796 | | |
797 | | AssemblyItem Assembly::newPushLibraryAddress(std::string const& _identifier) |
798 | 2.01k | { |
799 | 2.01k | h256 h(util::keccak256(_identifier)); |
800 | 2.01k | m_libraries[h] = _identifier; |
801 | 2.01k | return AssemblyItem{PushLibraryAddress, h}; |
802 | 2.01k | } |
803 | | |
804 | | AssemblyItem Assembly::newPushImmutable(std::string const& _identifier) |
805 | 238 | { |
806 | 238 | h256 h(util::keccak256(_identifier)); |
807 | 238 | m_immutables[h] = _identifier; |
808 | 238 | return AssemblyItem{PushImmutable, h}; |
809 | 238 | } |
810 | | |
811 | | AssemblyItem Assembly::newImmutableAssignment(std::string const& _identifier) |
812 | 450 | { |
813 | 450 | h256 h(util::keccak256(_identifier)); |
814 | 450 | m_immutables[h] = _identifier; |
815 | 450 | return AssemblyItem{AssignImmutable, h}; |
816 | 450 | } |
817 | | |
818 | | AssemblyItem Assembly::newAuxDataLoadN(size_t _offset) const |
819 | 0 | { |
820 | 0 | return AssemblyItem{AuxDataLoadN, _offset}; |
821 | 0 | } |
822 | | |
823 | | AssemblyItem Assembly::newSwapN(size_t _depth) const |
824 | 0 | { |
825 | 0 | return AssemblyItem::swapN(_depth); |
826 | 0 | } |
827 | | |
828 | | AssemblyItem Assembly::newDupN(size_t _depth) const |
829 | 0 | { |
830 | 0 | return AssemblyItem::dupN(_depth); |
831 | 0 | } |
832 | | |
833 | | Assembly& Assembly::optimise(OptimiserSettings const& _settings) |
834 | 59.1k | { |
835 | 59.1k | optimiseInternal(_settings, {}); |
836 | 59.1k | return *this; |
837 | 59.1k | } |
838 | | |
839 | | std::map<u256, u256> const& Assembly::optimiseInternal( |
840 | | OptimiserSettings const& _settings, |
841 | | std::set<size_t> _tagsReferencedFromOutside |
842 | | ) |
843 | 80.2k | { |
844 | 80.2k | if (m_tagReplacements) |
845 | 180 | return *m_tagReplacements; |
846 | | |
847 | | // Run optimisation for sub-assemblies. |
848 | | // TODO: verify and double-check this for EOF. |
849 | 101k | for (size_t subId = 0; subId < m_subs.size(); ++subId) |
850 | 21.1k | { |
851 | 21.1k | OptimiserSettings settings = _settings; |
852 | 21.1k | Assembly& sub = *m_subs[subId]; |
853 | 21.1k | std::set<size_t> referencedTags; |
854 | 21.1k | for (auto& codeSection: m_codeSections) |
855 | 21.1k | referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId); |
856 | 21.1k | std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal( |
857 | 21.1k | settings, |
858 | 21.1k | referencedTags |
859 | 21.1k | ); |
860 | | // Apply the replacements (can be empty). |
861 | 21.1k | for (auto& codeSection: m_codeSections) |
862 | 21.1k | BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId); |
863 | 21.1k | } |
864 | | |
865 | 80.0k | std::map<u256, u256> tagReplacements; |
866 | | // Iterate until no new optimisation possibilities are found. |
867 | 285k | for (unsigned count = 1; count > 0;) |
868 | 205k | { |
869 | 205k | count = 0; |
870 | | |
871 | | // TODO: verify this for EOF. |
872 | 205k | if (_settings.runInliner && !m_eofVersion.has_value()) |
873 | 106k | { |
874 | 106k | solAssert(m_codeSections.size() == 1); |
875 | 106k | Inliner{ |
876 | 106k | m_codeSections.front().items, |
877 | 106k | _tagsReferencedFromOutside, |
878 | 106k | _settings.expectedExecutionsPerDeployment, |
879 | 106k | isCreation(), |
880 | 106k | m_evmVersion |
881 | 106k | }.optimise(); |
882 | 106k | } |
883 | | // TODO: verify this for EOF. |
884 | 205k | if (_settings.runJumpdestRemover && !m_eofVersion.has_value()) |
885 | 205k | { |
886 | 205k | for (auto& codeSection: m_codeSections) |
887 | 205k | { |
888 | 205k | JumpdestRemover jumpdestOpt{codeSection.items}; |
889 | 205k | if (jumpdestOpt.optimise(_tagsReferencedFromOutside)) |
890 | 91.1k | count++; |
891 | 205k | } |
892 | 205k | } |
893 | | |
894 | 205k | if (_settings.runPeephole) |
895 | 205k | { |
896 | 205k | for (auto& codeSection: m_codeSections) |
897 | 205k | { |
898 | 205k | PeepholeOptimiser peepOpt{codeSection.items, m_evmVersion}; |
899 | 352k | while (peepOpt.optimise()) |
900 | 146k | { |
901 | 146k | count++; |
902 | 146k | assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck."); |
903 | 146k | } |
904 | 205k | } |
905 | 205k | } |
906 | | |
907 | | // This only modifies PushTags, we have to run again to actually remove code. |
908 | | // TODO: implement for EOF. |
909 | 205k | if (_settings.runDeduplicate && !m_eofVersion.has_value()) |
910 | 106k | for (auto& section: m_codeSections) |
911 | 106k | { |
912 | 106k | BlockDeduplicator deduplicator{section.items}; |
913 | 106k | if (deduplicator.deduplicate()) |
914 | 7.62k | { |
915 | 7.62k | for (auto const& replacement: deduplicator.replacedTags()) |
916 | 90.7k | { |
917 | 90.7k | assertThrow( |
918 | 90.7k | replacement.first <= std::numeric_limits<size_t>::max() && replacement.second <= std::numeric_limits<size_t>::max(), |
919 | 90.7k | OptimizerException, |
920 | 90.7k | "Invalid tag replacement." |
921 | 90.7k | ); |
922 | 90.7k | assertThrow( |
923 | 90.7k | !tagReplacements.count(replacement.first), |
924 | 90.7k | OptimizerException, |
925 | 90.7k | "Replacement already known." |
926 | 90.7k | ); |
927 | 90.7k | tagReplacements[replacement.first] = replacement.second; |
928 | 90.7k | if (_tagsReferencedFromOutside.erase(static_cast<size_t>(replacement.first))) |
929 | 0 | _tagsReferencedFromOutside.insert(static_cast<size_t>(replacement.second)); |
930 | 90.7k | } |
931 | 7.62k | count++; |
932 | 7.62k | } |
933 | 106k | } |
934 | | |
935 | | // TODO: investigate for EOF |
936 | 205k | if (_settings.runCSE && !m_eofVersion.has_value()) |
937 | 106k | { |
938 | | // Control flow graph optimization has been here before but is disabled because it |
939 | | // assumes we only jump to tags that are pushed. This is not the case anymore with |
940 | | // function types that can be stored in storage. |
941 | 106k | AssemblyItems optimisedItems; |
942 | | |
943 | 106k | solAssert(m_codeSections.size() == 1); |
944 | 106k | auto& items = m_codeSections.front().items; |
945 | 17.9M | bool usesMSize = ranges::any_of(items, [](AssemblyItem const& _i) { |
946 | 17.9M | return _i == AssemblyItem{Instruction::MSIZE} || _i.type() == VerbatimBytecode; |
947 | 17.9M | }); |
948 | | |
949 | 106k | auto iter = items.begin(); |
950 | 3.26M | while (iter != items.end()) |
951 | 3.15M | { |
952 | 3.15M | KnownState emptyState; |
953 | 3.15M | CommonSubexpressionEliminator eliminator{emptyState}; |
954 | 3.15M | auto orig = iter; |
955 | 3.15M | iter = eliminator.feedItems(iter, items.end(), usesMSize); |
956 | 3.15M | bool shouldReplace = false; |
957 | 3.15M | AssemblyItems optimisedChunk; |
958 | 3.15M | try |
959 | 3.15M | { |
960 | 3.15M | optimisedChunk = eliminator.getOptimizedItems(); |
961 | 3.15M | shouldReplace = (optimisedChunk.size() < static_cast<size_t>(iter - orig)); |
962 | 3.15M | } |
963 | 3.15M | catch (StackTooDeepException const&) |
964 | 3.15M | { |
965 | | // This might happen if the opcode reconstruction is not as efficient |
966 | | // as the hand-crafted code. |
967 | 1.72k | } |
968 | 3.15M | catch (ItemNotAvailableException const&) |
969 | 3.15M | { |
970 | | // This might happen if e.g. associativity and commutativity rules |
971 | | // reorganise the expression tree, but not all leaves are available. |
972 | 0 | } |
973 | | |
974 | 3.15M | if (shouldReplace) |
975 | 135k | { |
976 | 135k | count++; |
977 | 135k | optimisedItems += optimisedChunk; |
978 | 135k | } |
979 | 3.01M | else |
980 | 3.01M | copy(orig, iter, back_inserter(optimisedItems)); |
981 | 3.15M | } |
982 | 106k | if (optimisedItems.size() < items.size()) |
983 | 37.3k | { |
984 | 37.3k | items = std::move(optimisedItems); |
985 | 37.3k | count++; |
986 | 37.3k | } |
987 | 106k | } |
988 | 205k | } |
989 | | |
990 | | // TODO: investigate for EOF |
991 | 80.0k | if (_settings.runConstantOptimiser && !m_eofVersion.has_value()) |
992 | 39.3k | ConstantOptimisationMethod::optimiseConstants( |
993 | 39.3k | isCreation(), |
994 | 39.3k | isCreation() ? 1 : _settings.expectedExecutionsPerDeployment, |
995 | 39.3k | m_evmVersion, |
996 | 39.3k | *this |
997 | 39.3k | ); |
998 | | |
999 | 80.0k | m_tagReplacements = std::move(tagReplacements); |
1000 | 80.0k | return *m_tagReplacements; |
1001 | 80.0k | } |
1002 | | |
1003 | | namespace |
1004 | | { |
1005 | | template<typename ValueT> |
1006 | | void setBigEndian(bytes& _dest, size_t _offset, size_t _size, ValueT _value) |
1007 | 2.94M | { |
1008 | 2.94M | assertThrow(numberEncodingSize(_value) <= _size, AssemblyException, ""); |
1009 | 2.94M | toBigEndian(_value, bytesRef(_dest.data() + _offset, _size)); |
1010 | 2.94M | } Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::setBigEndian<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long, unsigned long) Assembly.cpp:void (anonymous namespace)::setBigEndian<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>) Line | Count | Source | 1007 | 2.94M | { | 1008 | 2.94M | assertThrow(numberEncodingSize(_value) <= _size, AssemblyException, ""); | 1009 | 2.94M | toBigEndian(_value, bytesRef(_dest.data() + _offset, _size)); | 1010 | 2.94M | } |
|
1011 | | |
1012 | | template<typename ValueT> |
1013 | | void appendBigEndian(bytes& _dest, size_t _size, ValueT _value) |
1014 | 2.94M | { |
1015 | 2.94M | _dest.resize(_dest.size() + _size); |
1016 | 2.94M | setBigEndian(_dest, _dest.size() - _size, _size, _value); |
1017 | 2.94M | } Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndian<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long) Assembly.cpp:void (anonymous namespace)::appendBigEndian<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>) Line | Count | Source | 1014 | 2.94M | { | 1015 | 2.94M | _dest.resize(_dest.size() + _size); | 1016 | 2.94M | setBigEndian(_dest, _dest.size() - _size, _size, _value); | 1017 | 2.94M | } |
|
1018 | | |
1019 | | template<typename ValueT> |
1020 | | void setBigEndianUint16(bytes& _dest, size_t _offset, ValueT _value) |
1021 | 0 | { |
1022 | 0 | setBigEndian(_dest, _offset, 2, _value); |
1023 | 0 | } |
1024 | | |
1025 | | template<typename ValueT> |
1026 | | void appendBigEndianUint16(bytes& _dest, ValueT _value) |
1027 | 0 | { |
1028 | 0 | static_assert(!std::numeric_limits<ValueT>::is_signed, "only unsigned types or bigint supported"); |
1029 | 0 | assertThrow(_value <= 0xFFFF, AssemblyException, ""); |
1030 | 0 | appendBigEndian(_dest, 2, static_cast<size_t>(_value)); |
1031 | 0 | } Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long) Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned int>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned int) Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned short>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned short) Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>) |
1032 | | |
1033 | | // Calculates maximum stack height for given code section. According to EIP5450 https://eips.ethereum.org/EIPS/eip-5450 |
1034 | | uint16_t calculateMaxStackHeight(Assembly::CodeSection const& _section) |
1035 | 0 | { |
1036 | 0 | static auto constexpr UNVISITED = std::numeric_limits<size_t>::max(); |
1037 | |
|
1038 | 0 | AssemblyItems const& items = _section.items; |
1039 | 0 | solAssert(!items.empty()); |
1040 | 0 | uint16_t overallMaxHeight = _section.inputs; |
1041 | 0 | std::stack<size_t> worklist; |
1042 | 0 | std::vector<size_t> maxStackHeights(items.size(), UNVISITED); |
1043 | | |
1044 | | // Init first item stack height to number of inputs to the code section |
1045 | | // maxStackHeights stores stack height for an item before the item execution |
1046 | 0 | maxStackHeights[0] = _section.inputs; |
1047 | | // Push first item index to the worklist |
1048 | 0 | worklist.push(0u); |
1049 | 0 | while (!worklist.empty()) |
1050 | 0 | { |
1051 | 0 | size_t idx = worklist.top(); |
1052 | 0 | worklist.pop(); |
1053 | 0 | AssemblyItem const& item = items[idx]; |
1054 | 0 | size_t stackHeightChange = item.deposit(); |
1055 | 0 | size_t currentMaxHeight = maxStackHeights[idx]; |
1056 | 0 | solAssert(currentMaxHeight != UNVISITED); |
1057 | | |
1058 | 0 | std::vector<size_t> successors; |
1059 | | |
1060 | | // Add next instruction to successors for non-control-flow-changing instructions |
1061 | 0 | if ( |
1062 | 0 | !(item.hasInstruction() && SemanticInformation::terminatesControlFlow(item.instruction())) && |
1063 | 0 | item.type() != RelativeJump && |
1064 | 0 | item.type() != RetF && |
1065 | 0 | item.type() != JumpF |
1066 | 0 | ) |
1067 | 0 | { |
1068 | 0 | solAssert(idx < items.size() - 1, "No terminating instruction."); |
1069 | 0 | successors.emplace_back(idx + 1); |
1070 | 0 | } |
1071 | | |
1072 | | // Add jumps destinations to successors |
1073 | | // TODO: Remember to add RJUMPV when it is supported. |
1074 | 0 | if (item.type() == RelativeJump || item.type() == ConditionalRelativeJump) |
1075 | 0 | { |
1076 | 0 | auto const tagIt = std::find(items.begin(), items.end(), item.tag()); |
1077 | 0 | solAssert(tagIt != items.end(), "Tag not found."); |
1078 | 0 | successors.emplace_back(static_cast<size_t>(std::distance(items.begin(), tagIt))); |
1079 | | // TODO: This assert fails until the code is not topologically sorted. Uncomment when sorting introduced. |
1080 | | // If backward jump the successor must be already visited. |
1081 | | // solAssert(idx <= successors.back() || maxStackHeights[successors.back()] != UNVISITED); |
1082 | 0 | } |
1083 | | |
1084 | 0 | solRequire( |
1085 | 0 | currentMaxHeight + stackHeightChange <= std::numeric_limits<uint16_t>::max(), |
1086 | 0 | AssemblyException, |
1087 | 0 | "Stack overflow in EOF function." |
1088 | 0 | ); |
1089 | 0 | overallMaxHeight = std::max(overallMaxHeight, static_cast<uint16_t>(currentMaxHeight + stackHeightChange)); |
1090 | 0 | currentMaxHeight += stackHeightChange; |
1091 | | |
1092 | | // Set stack height for all instruction successors |
1093 | 0 | for (size_t successor: successors) |
1094 | 0 | { |
1095 | 0 | solAssert(successor < maxStackHeights.size()); |
1096 | | // Set stack height for newly visited |
1097 | 0 | if (maxStackHeights[successor] == UNVISITED) |
1098 | 0 | { |
1099 | 0 | maxStackHeights[successor] = currentMaxHeight; |
1100 | 0 | worklist.push(successor); |
1101 | 0 | } |
1102 | 0 | else |
1103 | 0 | { |
1104 | 0 | solAssert(successor < maxStackHeights.size()); |
1105 | | // For backward jump successor stack height must be equal |
1106 | 0 | if (successor < idx) |
1107 | 0 | solAssert(maxStackHeights[successor] == currentMaxHeight, "Stack height mismatch."); |
1108 | | |
1109 | | // If successor stack height is smaller update it and recalculate |
1110 | 0 | if (currentMaxHeight > maxStackHeights[successor]) |
1111 | 0 | { |
1112 | 0 | maxStackHeights[successor] = currentMaxHeight; |
1113 | 0 | worklist.push(successor); |
1114 | 0 | } |
1115 | 0 | } |
1116 | 0 | } |
1117 | 0 | } |
1118 | 0 | return overallMaxHeight; |
1119 | 0 | } |
1120 | | } |
1121 | | |
1122 | | std::tuple<bytes, std::vector<size_t>, size_t> Assembly::createEOFHeader(std::set<ContainerID> const& _referencedSubIds) const |
1123 | 0 | { |
1124 | 0 | bytes retBytecode; |
1125 | 0 | std::vector<size_t> codeSectionSizePositions; |
1126 | 0 | size_t dataSectionSizePosition; |
1127 | |
|
1128 | 0 | retBytecode.push_back(0xef); |
1129 | 0 | retBytecode.push_back(0x00); |
1130 | 0 | retBytecode.push_back(0x01); // version 1 |
1131 | |
|
1132 | 0 | retBytecode.push_back(0x01); // kind=type |
1133 | 0 | appendBigEndianUint16(retBytecode, m_codeSections.size() * 4u); // length of type section |
1134 | |
|
1135 | 0 | retBytecode.push_back(0x02); // kind=code |
1136 | 0 | appendBigEndianUint16(retBytecode, m_codeSections.size()); // placeholder for number of code sections |
1137 | |
|
1138 | 0 | for (auto const& codeSection: m_codeSections) |
1139 | 0 | { |
1140 | 0 | (void) codeSection; |
1141 | 0 | codeSectionSizePositions.emplace_back(retBytecode.size()); |
1142 | 0 | appendBigEndianUint16(retBytecode, 0u); // placeholder for length of code |
1143 | 0 | } |
1144 | |
|
1145 | 0 | if (!_referencedSubIds.empty()) |
1146 | 0 | { |
1147 | 0 | retBytecode.push_back(0x03); |
1148 | 0 | appendBigEndianUint16(retBytecode, _referencedSubIds.size()); |
1149 | |
|
1150 | 0 | for (auto subId: _referencedSubIds) |
1151 | 0 | appendBigEndianUint16(retBytecode, m_subs[subId]->assemble().bytecode.size()); |
1152 | 0 | } |
1153 | |
|
1154 | 0 | retBytecode.push_back(0x04); // kind=data |
1155 | 0 | dataSectionSizePosition = retBytecode.size(); |
1156 | 0 | appendBigEndianUint16(retBytecode, 0u); // length of data |
1157 | |
|
1158 | 0 | retBytecode.push_back(0x00); // terminator |
1159 | |
|
1160 | 0 | for (auto const& codeSection: m_codeSections) |
1161 | 0 | { |
1162 | 0 | retBytecode.push_back(codeSection.inputs); |
1163 | | // According to EOF spec function output num equals 0x80 means non-returning function |
1164 | 0 | retBytecode.push_back(codeSection.nonReturning ? 0x80 : codeSection.outputs); |
1165 | 0 | appendBigEndianUint16(retBytecode, calculateMaxStackHeight(codeSection)); |
1166 | 0 | } |
1167 | |
|
1168 | 0 | return {retBytecode, codeSectionSizePositions, dataSectionSizePosition}; |
1169 | 0 | } |
1170 | | |
1171 | | LinkerObject const& Assembly::assemble() const |
1172 | 159k | { |
1173 | 159k | solRequire(!m_invalid, AssemblyException, "Attempted to assemble invalid Assembly object."); |
1174 | | // Return the already assembled object, if present. |
1175 | 159k | if (!m_assembledObject.bytecode.empty()) |
1176 | 72.4k | return m_assembledObject; |
1177 | | |
1178 | | // Otherwise ensure the object is actually clear. |
1179 | 86.6k | solRequire(m_assembledObject.linkReferences.empty(), AssemblyException, "Unexpected link references."); |
1180 | | |
1181 | 86.6k | bool const eof = m_eofVersion.has_value(); |
1182 | 86.6k | solRequire(!eof || m_eofVersion == 1, AssemblyException, "Invalid EOF version."); |
1183 | | |
1184 | 86.6k | if (!eof) |
1185 | 86.6k | return assembleLegacy(); |
1186 | 0 | else |
1187 | 0 | return assembleEOF(); |
1188 | 86.6k | } |
1189 | | |
1190 | | [[nodiscard]] bytes Assembly::assembleOperation(AssemblyItem const& _item) const |
1191 | 11.2M | { |
1192 | | // solidity::evmasm::Instructions underlying type is uint8_t |
1193 | | // TODO: Change to std::to_underlying since C++23 |
1194 | 11.2M | return {static_cast<uint8_t>(_item.instruction())}; |
1195 | 11.2M | } |
1196 | | |
1197 | | [[nodiscard]] bytes Assembly::assemblePush(AssemblyItem const& _item) const |
1198 | 3.71M | { |
1199 | 3.71M | bytes ret; |
1200 | 3.71M | unsigned pushValueSize = numberEncodingSize(_item.data()); |
1201 | 3.71M | if (pushValueSize == 0 && !m_evmVersion.hasPush0()) |
1202 | 234k | pushValueSize = 1; |
1203 | | |
1204 | | // solidity::evmasm::Instructions underlying type is uint8_t |
1205 | | // TODO: Change to std::to_underlying since C++23 |
1206 | 3.71M | ret.push_back(static_cast<uint8_t>(pushInstruction(pushValueSize))); |
1207 | 3.71M | if (pushValueSize > 0) |
1208 | 2.94M | appendBigEndian(ret, pushValueSize, _item.data()); |
1209 | | |
1210 | 3.71M | return ret; |
1211 | 3.71M | } |
1212 | | |
1213 | | [[nodiscard]] std::pair<bytes, Assembly::LinkRef> Assembly::assemblePushLibraryAddress(AssemblyItem const& _item, size_t _pos) const |
1214 | 1.76k | { |
1215 | 1.76k | return { |
1216 | | // solidity::evmasm::Instructions underlying type is uint8_t |
1217 | | // TODO: Change to std::to_underlying since C++23 |
1218 | 1.76k | bytes(1, static_cast<uint8_t>(Instruction::PUSH20)) + bytes(20), |
1219 | 1.76k | {_pos + 1, m_libraries.at(_item.data())} |
1220 | 1.76k | }; |
1221 | 1.76k | } |
1222 | | |
1223 | | [[nodiscard]] bytes Assembly::assembleVerbatimBytecode(AssemblyItem const& item) const |
1224 | 1.17k | { |
1225 | 1.17k | return item.verbatimData(); |
1226 | 1.17k | } |
1227 | | |
1228 | | [[nodiscard]] bytes Assembly::assemblePushDeployTimeAddress() const |
1229 | 420 | { |
1230 | | // solidity::evmasm::Instructions underlying type is uint8_t |
1231 | | // TODO: Change to std::to_underlying since C++23 |
1232 | 420 | return bytes(1, static_cast<uint8_t>(Instruction::PUSH20)) + bytes(20); |
1233 | 420 | } |
1234 | | |
1235 | | [[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const |
1236 | 1.76M | { |
1237 | 1.76M | solRequire(_item.data() != 0, AssemblyException, "Invalid tag position."); |
1238 | 1.76M | solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag."); |
1239 | 1.76M | solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large."); |
1240 | 1.76M | size_t tagId = static_cast<size_t>(_item.data()); |
1241 | 1.76M | solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position."); |
1242 | 1.76M | m_tagPositionsInBytecode[tagId] = _pos; |
1243 | | |
1244 | | // solidity::evmasm::Instructions underlying type is uint8_t |
1245 | | // TODO: Change to std::to_underlying since C++23 |
1246 | 1.76M | return _addJumpDest ? bytes(1, static_cast<uint8_t>(Instruction::JUMPDEST)) : bytes(); |
1247 | 1.76M | } |
1248 | | |
1249 | | LinkerObject const& Assembly::assembleLegacy() const |
1250 | 86.6k | { |
1251 | 86.6k | solAssert(!m_eofVersion.has_value()); |
1252 | 86.6k | solAssert(!m_invalid); |
1253 | | // Return the already assembled object, if present. |
1254 | 86.6k | if (!m_assembledObject.bytecode.empty()) |
1255 | 0 | return m_assembledObject; |
1256 | | // Otherwise ensure the object is actually clear. |
1257 | 86.6k | solAssert(m_assembledObject.linkReferences.empty()); |
1258 | | |
1259 | 86.6k | LinkerObject& ret = m_assembledObject; |
1260 | | |
1261 | 86.6k | size_t subTagSize = 1; |
1262 | 86.6k | std::map<u256, LinkerObject::ImmutableRefs> immutableReferencesBySub; |
1263 | 86.6k | for (auto const& sub: m_subs) |
1264 | 22.0k | { |
1265 | 22.0k | auto const& linkerObject = sub->assemble(); |
1266 | 22.0k | if (!linkerObject.immutableReferences.empty()) |
1267 | 176 | { |
1268 | 176 | assertThrow( |
1269 | 176 | immutableReferencesBySub.empty(), |
1270 | 176 | AssemblyException, |
1271 | 176 | "More than one sub-assembly references immutables." |
1272 | 176 | ); |
1273 | 176 | immutableReferencesBySub = linkerObject.immutableReferences; |
1274 | 176 | } |
1275 | 22.0k | for (size_t tagPos: sub->m_tagPositionsInBytecode) |
1276 | 1.54M | if (tagPos != std::numeric_limits<size_t>::max() && numberEncodingSize(tagPos) > subTagSize) |
1277 | 7.60k | subTagSize = numberEncodingSize(tagPos); |
1278 | 22.0k | } |
1279 | | |
1280 | 86.6k | bool setsImmutables = false; |
1281 | 86.6k | bool pushesImmutables = false; |
1282 | | |
1283 | 86.6k | assertThrow(m_codeSections.size() == 1, AssemblyException, "Expected exactly one code section in non-EOF code."); |
1284 | 86.6k | AssemblyItems const& items = m_codeSections.front().items; |
1285 | | |
1286 | 86.6k | for (auto const& item: items) |
1287 | 19.1M | if (item.type() == AssignImmutable) |
1288 | 450 | { |
1289 | 450 | item.setImmutableOccurrences(immutableReferencesBySub[item.data()].second.size()); |
1290 | 450 | setsImmutables = true; |
1291 | 450 | } |
1292 | 19.1M | else if (item.type() == PushImmutable) |
1293 | 311 | pushesImmutables = true; |
1294 | 86.6k | if (setsImmutables || pushesImmutables) |
1295 | 86.6k | assertThrow( |
1296 | 86.6k | setsImmutables != pushesImmutables, |
1297 | 86.6k | AssemblyException, |
1298 | 86.6k | "Cannot push and assign immutables in the same assembly subroutine." |
1299 | 86.6k | ); |
1300 | | |
1301 | 86.6k | unsigned bytesRequiredForCode = codeSize(static_cast<unsigned>(subTagSize)); |
1302 | 86.6k | m_tagPositionsInBytecode = std::vector<size_t>(m_usedTags, std::numeric_limits<size_t>::max()); |
1303 | 86.6k | unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); |
1304 | | // Adjust bytesPerTag for references to sub assemblies. |
1305 | 86.6k | for (AssemblyItem const& item: items) |
1306 | 19.1M | if (item.type() == PushTag) |
1307 | 2.34M | { |
1308 | 2.34M | auto [subId, tagId] = item.splitForeignPushTag(); |
1309 | 2.34M | if (subId == std::numeric_limits<size_t>::max()) |
1310 | 2.34M | continue; |
1311 | 89 | assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id"); |
1312 | 89 | auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId); |
1313 | 89 | assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position."); |
1314 | 89 | bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition)); |
1315 | 89 | } |
1316 | | |
1317 | 86.6k | unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast<unsigned>(m_auxiliaryData.size()); |
1318 | 86.6k | for (auto const& sub: m_subs) |
1319 | 22.0k | bytesRequiredIncludingData += static_cast<unsigned>(sub->assemble().bytecode.size()); |
1320 | | |
1321 | 86.6k | unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); |
1322 | 86.6k | ret.bytecode.reserve(bytesRequiredIncludingData); |
1323 | | |
1324 | 86.6k | TagRefs tagRefs; |
1325 | 86.6k | DataRefs dataRefs; |
1326 | 86.6k | SubAssemblyRefs subRefs; |
1327 | 86.6k | ProgramSizeRefs sizeRefs; |
1328 | 86.6k | uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag)); |
1329 | 86.6k | uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef)); |
1330 | | |
1331 | 86.6k | LinkerObject::CodeSectionLocation codeSectionLocation; |
1332 | 86.6k | codeSectionLocation.instructionLocations.reserve(items.size()); |
1333 | 86.6k | codeSectionLocation.start = 0; |
1334 | 86.6k | for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate) |
1335 | 19.1M | { |
1336 | | // collect instruction locations via side effects |
1337 | 19.1M | InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex); |
1338 | | // store position of the invalid jump destination |
1339 | 19.1M | if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max()) |
1340 | 84.3k | m_tagPositionsInBytecode[0] = ret.bytecode.size(); |
1341 | | |
1342 | 19.1M | switch (item.type()) |
1343 | 19.1M | { |
1344 | 11.2M | case Operation: |
1345 | 11.2M | ret.bytecode += assembleOperation(item); |
1346 | 11.2M | break; |
1347 | 3.71M | case Push: |
1348 | 3.71M | ret.bytecode += assemblePush(item); |
1349 | 3.71M | break; |
1350 | 2.34M | case PushTag: |
1351 | 2.34M | ret.bytecode.push_back(tagPush); |
1352 | 2.34M | tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); |
1353 | 2.34M | ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); |
1354 | 2.34M | break; |
1355 | 15.6k | case PushData: |
1356 | 15.6k | ret.bytecode.push_back(dataRefPush); |
1357 | 15.6k | dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); |
1358 | 15.6k | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
1359 | 15.6k | break; |
1360 | 18.4k | case PushSub: |
1361 | 18.4k | assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); |
1362 | 18.4k | ret.bytecode.push_back(dataRefPush); |
1363 | 18.4k | subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size())); |
1364 | 18.4k | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
1365 | 18.4k | break; |
1366 | 19.7k | case PushSubSize: |
1367 | 19.7k | { |
1368 | 19.7k | assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); |
1369 | 19.7k | auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size(); |
1370 | 19.7k | item.setPushedValue(u256(s)); |
1371 | 19.7k | unsigned b = std::max<unsigned>(1, numberEncodingSize(s)); |
1372 | 19.7k | ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b))); |
1373 | 19.7k | ret.bytecode.resize(ret.bytecode.size() + b); |
1374 | 19.7k | bytesRef byr(&ret.bytecode.back() + 1 - b, b); |
1375 | 19.7k | toBigEndian(s, byr); |
1376 | 19.7k | break; |
1377 | 19.7k | } |
1378 | 903 | case PushProgramSize: |
1379 | 903 | ret.bytecode.push_back(dataRefPush); |
1380 | 903 | sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size())); |
1381 | 903 | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
1382 | 903 | break; |
1383 | 1.76k | case PushLibraryAddress: |
1384 | 1.76k | { |
1385 | 1.76k | auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); |
1386 | 1.76k | ret.bytecode += bytecode; |
1387 | 1.76k | ret.linkReferences.insert(linkRef); |
1388 | 1.76k | break; |
1389 | 19.7k | } |
1390 | 311 | case PushImmutable: |
1391 | 311 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32)); |
1392 | | // Maps keccak back to the "identifier" std::string of that immutable. |
1393 | 311 | ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); |
1394 | | // Record the bytecode offset of the PUSH32 argument. |
1395 | 311 | ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); |
1396 | | // Advance bytecode by 32 bytes (default initialized). |
1397 | 311 | ret.bytecode.resize(ret.bytecode.size() + 32); |
1398 | 311 | break; |
1399 | 1.17k | case VerbatimBytecode: |
1400 | 1.17k | ret.bytecode += assembleVerbatimBytecode(item); |
1401 | 1.17k | break; |
1402 | 450 | case AssignImmutable: |
1403 | 450 | { |
1404 | | // Expect 2 elements on stack (source, dest_base) |
1405 | 450 | auto const& offsets = immutableReferencesBySub[item.data()].second; |
1406 | 682 | for (size_t i = 0; i < offsets.size(); ++i) |
1407 | 232 | { |
1408 | 232 | if (i != offsets.size() - 1) |
1409 | 13 | { |
1410 | 13 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2)); |
1411 | | // This item type decomposes into multiple evm instructions, so we manually call emit() |
1412 | 13 | instructionLocationEmitter.emit(); |
1413 | 13 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2)); |
1414 | 13 | instructionLocationEmitter.emit(); |
1415 | 13 | } |
1416 | | // TODO: should we make use of the constant optimizer methods for pushing the offsets? |
1417 | 232 | bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); |
1418 | 232 | ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size())))); |
1419 | 232 | ret.bytecode += offsetBytes; |
1420 | 232 | instructionLocationEmitter.emit(); |
1421 | 232 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD)); |
1422 | 232 | instructionLocationEmitter.emit(); |
1423 | 232 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE)); |
1424 | | // No emit needed here, it's taken care of by the destructor of instructionLocationEmitter. |
1425 | 232 | } |
1426 | 450 | if (offsets.empty()) |
1427 | 231 | { |
1428 | 231 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP)); |
1429 | 231 | instructionLocationEmitter.emit(); |
1430 | 231 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP)); |
1431 | | // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter |
1432 | 231 | } |
1433 | 450 | immutableReferencesBySub.erase(item.data()); |
1434 | 450 | break; |
1435 | 19.7k | } |
1436 | 420 | case PushDeployTimeAddress: |
1437 | 420 | ret.bytecode += assemblePushDeployTimeAddress(); |
1438 | 420 | break; |
1439 | 1.76M | case Tag: |
1440 | 1.76M | ret.bytecode += assembleTag(item, ret.bytecode.size(), true); |
1441 | 1.76M | break; |
1442 | 0 | default: |
1443 | 0 | solAssert(false, "Unexpected opcode while assembling."); |
1444 | 19.1M | } |
1445 | 19.1M | } |
1446 | | |
1447 | 86.6k | codeSectionLocation.end = ret.bytecode.size(); |
1448 | | |
1449 | 86.6k | ret.codeSectionLocations.emplace_back(std::move(codeSectionLocation)); |
1450 | | |
1451 | 86.6k | if (!immutableReferencesBySub.empty()) |
1452 | 0 | throw |
1453 | 0 | langutil::Error( |
1454 | 0 | 1284_error, |
1455 | 0 | langutil::Error::Type::CodeGenerationError, |
1456 | 0 | "Some immutables were read from but never assigned, possibly because of optimization." |
1457 | 0 | ); |
1458 | | |
1459 | 86.6k | if (!m_subs.empty() || !m_data.empty() || !m_auxiliaryData.empty()) |
1460 | | // Append an INVALID here to help tests find miscompilation. |
1461 | 42.8k | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::INVALID)); |
1462 | | |
1463 | 86.6k | std::map<LinkerObject, size_t> subAssemblyOffsets; |
1464 | 86.6k | for (auto const& [subIdPath, bytecodeOffset]: subRefs) |
1465 | 18.4k | { |
1466 | 18.4k | LinkerObject subObject = subAssemblyById(subIdPath)->assemble(); |
1467 | 18.4k | bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef); |
1468 | | |
1469 | | // In order for de-duplication to kick in, not only must the bytecode be identical, but |
1470 | | // link and immutables references as well. |
1471 | 18.4k | if (size_t* subAssemblyOffset = util::valueOrNullptr(subAssemblyOffsets, subObject)) |
1472 | 1.34k | toBigEndian(*subAssemblyOffset, r); |
1473 | 17.1k | else |
1474 | 17.1k | { |
1475 | 17.1k | toBigEndian(ret.bytecode.size(), r); |
1476 | 17.1k | subAssemblyOffsets[subObject] = ret.bytecode.size(); |
1477 | 17.1k | ret.bytecode += subObject.bytecode; |
1478 | 17.1k | } |
1479 | 18.4k | for (auto const& ref: subObject.linkReferences) |
1480 | 2.80k | ret.linkReferences[ref.first + subAssemblyOffsets[subObject]] = ref.second; |
1481 | 18.4k | } |
1482 | 86.6k | for (auto const& i: tagRefs) |
1483 | 2.34M | { |
1484 | 2.34M | size_t subId; |
1485 | 2.34M | size_t tagId; |
1486 | 2.34M | std::tie(subId, tagId) = i.second; |
1487 | 2.34M | assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id"); |
1488 | 2.34M | std::vector<size_t> const& tagPositions = |
1489 | 2.34M | subId == std::numeric_limits<size_t>::max() ? |
1490 | 2.34M | m_tagPositionsInBytecode : |
1491 | 2.34M | m_subs[subId]->m_tagPositionsInBytecode; |
1492 | 2.34M | assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag."); |
1493 | 2.34M | size_t pos = tagPositions[tagId]; |
1494 | 2.34M | assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position."); |
1495 | 2.34M | assertThrow(numberEncodingSize(pos) <= bytesPerTag, AssemblyException, "Tag too large for reserved space."); |
1496 | 2.34M | bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); |
1497 | 2.34M | toBigEndian(pos, r); |
1498 | 2.34M | } |
1499 | 86.6k | for (auto const& [name, tagInfo]: m_namedTags) |
1500 | 412k | { |
1501 | 412k | size_t position = m_tagPositionsInBytecode.at(tagInfo.id); |
1502 | 412k | std::optional<size_t> tagIndex; |
1503 | 412k | for (auto&& [index, item]: items | ranges::views::enumerate) |
1504 | 1.99G | if (item.type() == Tag && static_cast<size_t>(item.data()) == tagInfo.id) |
1505 | 362k | { |
1506 | 362k | tagIndex = index; |
1507 | 362k | break; |
1508 | 362k | } |
1509 | 412k | ret.functionDebugData[name] = { |
1510 | 412k | position == std::numeric_limits<size_t>::max() ? std::nullopt : std::optional<size_t>{position}, |
1511 | 412k | tagIndex, |
1512 | 412k | tagInfo.sourceID, |
1513 | 412k | tagInfo.params, |
1514 | 412k | tagInfo.returns |
1515 | 412k | }; |
1516 | 412k | } |
1517 | | |
1518 | 86.6k | for (auto const& dataItem: m_data) |
1519 | 11.0k | { |
1520 | 11.0k | auto references = dataRefs.equal_range(dataItem.first); |
1521 | 11.0k | if (references.first == references.second) |
1522 | 5.09k | continue; |
1523 | 21.6k | for (auto ref = references.first; ref != references.second; ++ref) |
1524 | 15.6k | { |
1525 | 15.6k | bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); |
1526 | 15.6k | toBigEndian(ret.bytecode.size(), r); |
1527 | 15.6k | } |
1528 | 5.95k | ret.bytecode += dataItem.second; |
1529 | 5.95k | } |
1530 | | |
1531 | 86.6k | ret.bytecode += m_auxiliaryData; |
1532 | | |
1533 | 86.6k | for (unsigned pos: sizeRefs) |
1534 | 903 | { |
1535 | 903 | bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); |
1536 | 903 | toBigEndian(ret.bytecode.size(), r); |
1537 | 903 | } |
1538 | 86.6k | return ret; |
1539 | 86.6k | } |
1540 | | |
1541 | | std::map<ContainerID, ContainerID> Assembly::findReferencedContainers() const |
1542 | 0 | { |
1543 | 0 | std::set<ContainerID> referencedSubcontainersIds; |
1544 | 0 | solAssert(m_subs.size() <= 0x100); // According to EOF spec |
1545 | | |
1546 | 0 | for (auto&& codeSection: m_codeSections) |
1547 | 0 | for (AssemblyItem const& item: codeSection.items) |
1548 | 0 | if (item.type() == EOFCreate || item.type() == ReturnContract) |
1549 | 0 | { |
1550 | 0 | solAssert(item.data() <= m_subs.size(), "Invalid subcontainer index."); |
1551 | 0 | auto const containerId = static_cast<ContainerID>(item.data()); |
1552 | 0 | referencedSubcontainersIds.insert(containerId); |
1553 | 0 | } |
1554 | | |
1555 | 0 | std::map<ContainerID, ContainerID> replacements; |
1556 | 0 | uint8_t nUnreferenced = 0; |
1557 | 0 | for (size_t i = 0; i < m_subs.size(); ++i) |
1558 | 0 | { |
1559 | 0 | solAssert(i <= std::numeric_limits<ContainerID>::max()); |
1560 | 0 | if (referencedSubcontainersIds.count(static_cast<ContainerID>(i)) > 0) |
1561 | 0 | replacements[static_cast<ContainerID>(i)] = static_cast<ContainerID>(i - nUnreferenced); |
1562 | 0 | else |
1563 | 0 | nUnreferenced++; |
1564 | 0 | } |
1565 | | |
1566 | 0 | return replacements; |
1567 | 0 | } |
1568 | | |
1569 | | std::optional<uint16_t> Assembly::findMaxAuxDataLoadNOffset() const |
1570 | 0 | { |
1571 | 0 | std::optional<unsigned> maxOffset = std::nullopt; |
1572 | 0 | for (auto&& codeSection: m_codeSections) |
1573 | 0 | for (AssemblyItem const& item: codeSection.items) |
1574 | 0 | if (item.type() == AuxDataLoadN) |
1575 | 0 | { |
1576 | 0 | solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid auxdataloadn index value."); |
1577 | 0 | auto const offset = static_cast<unsigned>(item.data()); |
1578 | 0 | if (!maxOffset.has_value() || offset > maxOffset.value()) |
1579 | 0 | maxOffset = offset; |
1580 | |
|
1581 | 0 | } |
1582 | | |
1583 | 0 | return maxOffset; |
1584 | 0 | } |
1585 | | |
1586 | | LinkerObject const& Assembly::assembleEOF() const |
1587 | 0 | { |
1588 | 0 | solAssert(m_eofVersion.has_value() && m_eofVersion == 1); |
1589 | 0 | LinkerObject& ret = m_assembledObject; |
1590 | |
|
1591 | 0 | auto const subIdsReplacements = findReferencedContainers(); |
1592 | 0 | auto const referencedSubIds = keys(subIdsReplacements); |
1593 | |
|
1594 | 0 | solAssert(!m_codeSections.empty(), "Expected at least one code section."); |
1595 | 0 | solAssert( |
1596 | 0 | m_codeSections.front().inputs == 0 && m_codeSections.front().outputs == 0 && m_codeSections.front().nonReturning, |
1597 | 0 | "Expected the first code section to have zero inputs and be non-returning." |
1598 | 0 | ); |
1599 | | |
1600 | 0 | auto const maxAuxDataLoadNOffset = findMaxAuxDataLoadNOffset(); |
1601 | | |
1602 | | // Insert EOF1 header. |
1603 | 0 | auto [headerBytecode, codeSectionSizePositions, dataSectionSizePosition] = createEOFHeader(referencedSubIds); |
1604 | 0 | ret.bytecode = headerBytecode; |
1605 | |
|
1606 | 0 | m_tagPositionsInBytecode = std::vector<size_t>(m_usedTags, std::numeric_limits<size_t>::max()); |
1607 | 0 | std::map<size_t, uint16_t> dataSectionRef; |
1608 | 0 | std::map<size_t, size_t> tagRef; |
1609 | |
|
1610 | 0 | for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) |
1611 | 0 | { |
1612 | 0 | auto const sectionStart = ret.bytecode.size(); |
1613 | |
|
1614 | 0 | std::vector<LinkerObject::InstructionLocation> instructionLocations; |
1615 | 0 | instructionLocations.reserve(codeSection.items.size()); |
1616 | |
|
1617 | 0 | solAssert(!codeSection.items.empty(), "Empty code section."); |
1618 | | |
1619 | 0 | for (auto const& [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate) |
1620 | 0 | { |
1621 | | // collect instruction locations via side effects |
1622 | 0 | InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode, assemblyItemIndex}; |
1623 | | |
1624 | | // store position of the invalid jump destination |
1625 | 0 | if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max()) |
1626 | 0 | m_tagPositionsInBytecode[0] = ret.bytecode.size(); |
1627 | |
|
1628 | 0 | switch (item.type()) |
1629 | 0 | { |
1630 | 0 | case Operation: |
1631 | 0 | solAssert( |
1632 | 0 | item.instruction() != Instruction::DATALOADN && |
1633 | 0 | item.instruction() != Instruction::RETURNCONTRACT && |
1634 | 0 | item.instruction() != Instruction::EOFCREATE && |
1635 | 0 | item.instruction() != Instruction::RJUMP && |
1636 | 0 | item.instruction() != Instruction::RJUMPI && |
1637 | 0 | item.instruction() != Instruction::CALLF && |
1638 | 0 | item.instruction() != Instruction::JUMPF && |
1639 | 0 | item.instruction() != Instruction::RETF && |
1640 | 0 | item.instruction() != Instruction::DUPN && |
1641 | 0 | item.instruction() != Instruction::SWAPN |
1642 | 0 | ); |
1643 | 0 | solAssert(!(item.instruction() >= Instruction::PUSH0 && item.instruction() <= Instruction::PUSH32)); |
1644 | 0 | ret.bytecode += assembleOperation(item); |
1645 | 0 | break; |
1646 | 0 | case Push: |
1647 | 0 | ret.bytecode += assemblePush(item); |
1648 | 0 | break; |
1649 | 0 | case PushLibraryAddress: |
1650 | 0 | { |
1651 | 0 | auto const [pushLibraryAddressBytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); |
1652 | 0 | ret.bytecode += pushLibraryAddressBytecode; |
1653 | 0 | ret.linkReferences.insert(linkRef); |
1654 | 0 | break; |
1655 | 0 | } |
1656 | 0 | case RelativeJump: |
1657 | 0 | case ConditionalRelativeJump: |
1658 | 0 | { |
1659 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(item.instruction())); |
1660 | 0 | tagRef[ret.bytecode.size()] = item.relativeJumpTagID(); |
1661 | 0 | appendBigEndianUint16(ret.bytecode, 0u); |
1662 | 0 | break; |
1663 | 0 | } |
1664 | 0 | case EOFCreate: |
1665 | 0 | { |
1666 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::EOFCREATE)); |
1667 | 0 | solAssert(item.data() <= std::numeric_limits<ContainerID>::max()); |
1668 | 0 | auto const containerID = static_cast<ContainerID>(item.data()); |
1669 | 0 | solAssert(subIdsReplacements.count(containerID) == 1); |
1670 | 0 | ret.bytecode.push_back(subIdsReplacements.at(containerID)); |
1671 | 0 | break; |
1672 | 0 | } |
1673 | 0 | case ReturnContract: |
1674 | 0 | { |
1675 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::RETURNCONTRACT)); |
1676 | 0 | solAssert(item.data() <= std::numeric_limits<ContainerID>::max()); |
1677 | 0 | auto const containerID = static_cast<ContainerID>(item.data()); |
1678 | 0 | solAssert(subIdsReplacements.count(containerID) == 1); |
1679 | 0 | ret.bytecode.push_back(subIdsReplacements.at(containerID)); |
1680 | 0 | break; |
1681 | 0 | } |
1682 | 0 | case VerbatimBytecode: |
1683 | 0 | ret.bytecode += assembleVerbatimBytecode(item); |
1684 | 0 | break; |
1685 | 0 | case PushDeployTimeAddress: |
1686 | 0 | ret.bytecode += assemblePushDeployTimeAddress(); |
1687 | 0 | break; |
1688 | 0 | case Tag: |
1689 | 0 | ret.bytecode += assembleTag(item, ret.bytecode.size(), false); |
1690 | 0 | break; |
1691 | 0 | case AuxDataLoadN: |
1692 | 0 | { |
1693 | | // In findMaxAuxDataLoadNOffset we already verified that unsigned data value fits 2 bytes |
1694 | 0 | solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid auxdataloadn position."); |
1695 | 0 | ret.bytecode.push_back(uint8_t(Instruction::DATALOADN)); |
1696 | 0 | dataSectionRef[ret.bytecode.size()] = static_cast<uint16_t>(item.data()); |
1697 | 0 | appendBigEndianUint16(ret.bytecode, item.data()); |
1698 | 0 | break; |
1699 | 0 | } |
1700 | 0 | case CallF: |
1701 | 0 | case JumpF: |
1702 | 0 | { |
1703 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(item.instruction())); |
1704 | 0 | solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid callf/jumpf index value."); |
1705 | 0 | size_t const index = static_cast<uint16_t>(item.data()); |
1706 | 0 | solAssert(index < m_codeSections.size()); |
1707 | 0 | solAssert(item.functionSignature().argsNum <= 127); |
1708 | 0 | solAssert(item.functionSignature().retsNum <= 127); |
1709 | 0 | solAssert(m_codeSections[index].inputs == item.functionSignature().argsNum); |
1710 | 0 | solAssert(m_codeSections[index].outputs == item.functionSignature().retsNum); |
1711 | | // If CallF the function cannot be non-returning. |
1712 | 0 | solAssert(item.type() == JumpF || !m_codeSections[index].nonReturning); |
1713 | 0 | appendBigEndianUint16(ret.bytecode, item.data()); |
1714 | 0 | break; |
1715 | 0 | } |
1716 | 0 | case RetF: |
1717 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::RETF)); |
1718 | 0 | break; |
1719 | 0 | case SwapN: |
1720 | 0 | case DupN: |
1721 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(item.instruction())); |
1722 | 0 | solAssert(item.data() >= 1 && item.data() <= 256); |
1723 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(item.data() - 1)); |
1724 | 0 | break; |
1725 | 0 | default: |
1726 | 0 | solAssert(false, "Unexpected opcode while assembling."); |
1727 | 0 | } |
1728 | 0 | } |
1729 | | |
1730 | 0 | if (ret.bytecode.size() - sectionStart > std::numeric_limits<uint16_t>::max()) |
1731 | | // TODO: Include source location. Note that origin locations we have in debug data are |
1732 | | // not usable for error reporting when compiling pure Yul because they point at the optimized source. |
1733 | 0 | throw Error( |
1734 | 0 | 2202_error, |
1735 | 0 | Error::Type::CodeGenerationError, |
1736 | 0 | "Code section too large for EOF." |
1737 | 0 | ); |
1738 | 0 | setBigEndianUint16(ret.bytecode, codeSectionSizePositions[codeSectionIndex], ret.bytecode.size() - sectionStart); |
1739 | |
|
1740 | 0 | ret.codeSectionLocations.push_back(LinkerObject::CodeSectionLocation{ |
1741 | 0 | .start = sectionStart, |
1742 | 0 | .end = ret.bytecode.size(), |
1743 | 0 | .instructionLocations = std::move(instructionLocations) |
1744 | 0 | }); |
1745 | 0 | } |
1746 | | |
1747 | 0 | for (auto const& [refPos, tagId]: tagRef) |
1748 | 0 | { |
1749 | 0 | solAssert(tagId < m_tagPositionsInBytecode.size(), "Reference to non-existing tag."); |
1750 | 0 | size_t tagPos = m_tagPositionsInBytecode[tagId]; |
1751 | 0 | solAssert(tagPos != std::numeric_limits<size_t>::max(), "Reference to tag without position."); |
1752 | | |
1753 | 0 | ptrdiff_t const relativeJumpOffset = static_cast<ptrdiff_t>(tagPos) - (static_cast<ptrdiff_t>(refPos) + 2); |
1754 | | // This cannot happen in practice because we'll run into section size limit first. |
1755 | 0 | if (!(-0x8000 <= relativeJumpOffset && relativeJumpOffset <= 0x7FFF)) |
1756 | | // TODO: Include source location. Note that origin locations we have in debug data are |
1757 | | // not usable for error reporting when compiling pure Yul because they point at the optimized source. |
1758 | 0 | throw Error( |
1759 | 0 | 2703_error, |
1760 | 0 | Error::Type::CodeGenerationError, |
1761 | 0 | "Relative jump too far" |
1762 | 0 | ); |
1763 | 0 | solAssert(relativeJumpOffset < -2 || 0 <= relativeJumpOffset, "Relative jump offset into immediate argument."); |
1764 | 0 | setBigEndianUint16(ret.bytecode, refPos, static_cast<size_t>(static_cast<uint16_t>(relativeJumpOffset))); |
1765 | 0 | } |
1766 | | |
1767 | 0 | for (auto i: referencedSubIds) |
1768 | 0 | { |
1769 | 0 | size_t const subAssemblyPositionInParentObject = ret.bytecode.size(); |
1770 | 0 | auto const& subAssemblyLinkerObject = m_subs[i]->assemble(); |
1771 | | // Append subassembly bytecode to the parent assembly result bytecode |
1772 | 0 | ret.bytecode += subAssemblyLinkerObject.bytecode; |
1773 | | // Add subassembly link references to parent linker object. |
1774 | | // Offset accordingly to subassembly position in parent object bytecode |
1775 | 0 | for (auto const& [subAssemblyLinkRefPosition, linkRef]: subAssemblyLinkerObject.linkReferences) |
1776 | 0 | ret.linkReferences[subAssemblyPositionInParentObject + subAssemblyLinkRefPosition] = linkRef; |
1777 | 0 | } |
1778 | | |
1779 | | // TODO: Fill functionDebugData for EOF. It probably should be handled for new code section in the loop above. |
1780 | 0 | solRequire(m_namedTags.empty(), AssemblyException, "Named tags must be empty in EOF context."); |
1781 | | |
1782 | 0 | auto const dataStart = ret.bytecode.size(); |
1783 | |
|
1784 | 0 | for (auto const& dataItem: m_data) |
1785 | 0 | ret.bytecode += dataItem.second; |
1786 | |
|
1787 | 0 | ret.bytecode += m_auxiliaryData; |
1788 | |
|
1789 | 0 | auto const preDeployDataSectionSize = ret.bytecode.size() - dataStart; |
1790 | | // DATALOADN loads 32 bytes from EOF data section zero padded if reading out of data bounds. |
1791 | | // In our case we do not allow DATALOADN with offsets which reads out of data bounds. |
1792 | 0 | auto const staticAuxDataSize = maxAuxDataLoadNOffset.has_value() ? (*maxAuxDataLoadNOffset + 32u) : 0u; |
1793 | 0 | auto const preDeployAndStaticAuxDataSize = preDeployDataSectionSize + staticAuxDataSize; |
1794 | |
|
1795 | 0 | if (preDeployAndStaticAuxDataSize > std::numeric_limits<uint16_t>::max()) |
1796 | 0 | throw Error( |
1797 | 0 | 3965_error, |
1798 | 0 | Error::Type::CodeGenerationError, |
1799 | 0 | "The highest accessed data offset exceeds the maximum possible size of the static auxdata section." |
1800 | 0 | ); |
1801 | | |
1802 | | // If some data was already added to data section we need to update data section refs accordingly |
1803 | 0 | if (preDeployDataSectionSize > 0) |
1804 | 0 | for (auto [refPosition, staticAuxDataOffset] : dataSectionRef) |
1805 | 0 | { |
1806 | | // staticAuxDataOffset + preDeployDataSectionSize value is already verified to fit 2 bytes because |
1807 | | // staticAuxDataOffset < staticAuxDataSize |
1808 | 0 | setBigEndianUint16(ret.bytecode, refPosition, staticAuxDataOffset + preDeployDataSectionSize); |
1809 | 0 | } |
1810 | |
|
1811 | 0 | setBigEndianUint16(ret.bytecode, dataSectionSizePosition, preDeployAndStaticAuxDataSize); |
1812 | |
|
1813 | 0 | return ret; |
1814 | 0 | } |
1815 | | |
1816 | | std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const |
1817 | 38.2k | { |
1818 | 38.2k | if (_subObjectId < m_subs.size()) |
1819 | 38.2k | return {_subObjectId}; |
1820 | | |
1821 | 0 | auto subIdPathIt = find_if( |
1822 | 0 | m_subPaths.begin(), |
1823 | 0 | m_subPaths.end(), |
1824 | 0 | [_subObjectId](auto const& subId) { return subId.second == _subObjectId; } |
1825 | 0 | ); |
1826 | |
|
1827 | 0 | assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, ""); |
1828 | 0 | return subIdPathIt->first; |
1829 | 0 | } |
1830 | | |
1831 | | size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath) |
1832 | 12.0k | { |
1833 | 12.0k | assertThrow(!_subPath.empty(), AssemblyException, ""); |
1834 | 12.0k | if (_subPath.size() == 1) |
1835 | 12.0k | { |
1836 | 12.0k | assertThrow(_subPath[0] < m_subs.size(), AssemblyException, ""); |
1837 | 12.0k | return _subPath[0]; |
1838 | 12.0k | } |
1839 | | |
1840 | 0 | if (m_subPaths.find(_subPath) == m_subPaths.end()) |
1841 | 0 | { |
1842 | 0 | size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size(); |
1843 | 0 | assertThrow(objectId >= m_subs.size(), AssemblyException, ""); |
1844 | 0 | m_subPaths[_subPath] = objectId; |
1845 | 0 | } |
1846 | | |
1847 | 0 | return m_subPaths[_subPath]; |
1848 | 0 | } |
1849 | | |
1850 | | Assembly const* Assembly::subAssemblyById(size_t _subId) const |
1851 | 38.2k | { |
1852 | 38.2k | std::vector<size_t> subIds = decodeSubPath(_subId); |
1853 | 38.2k | Assembly const* currentAssembly = this; |
1854 | 38.2k | for (size_t currentSubId: subIds) |
1855 | 38.2k | { |
1856 | 38.2k | currentAssembly = currentAssembly->m_subs.at(currentSubId).get(); |
1857 | 38.2k | assertThrow(currentAssembly, AssemblyException, ""); |
1858 | 38.2k | } |
1859 | | |
1860 | 38.2k | assertThrow(currentAssembly != this, AssemblyException, ""); |
1861 | 38.2k | return currentAssembly; |
1862 | 38.2k | } |
1863 | | |
1864 | | Assembly::OptimiserSettings Assembly::OptimiserSettings::translateSettings(frontend::OptimiserSettings const& _settings) |
1865 | 59.1k | { |
1866 | | // Constructing it this way so that we notice changes in the fields. |
1867 | 59.1k | OptimiserSettings asmSettings{false, false, false, false, false, false, 0}; |
1868 | 59.1k | asmSettings.runInliner = _settings.runInliner; |
1869 | 59.1k | asmSettings.runJumpdestRemover = _settings.runJumpdestRemover; |
1870 | 59.1k | asmSettings.runPeephole = _settings.runPeephole; |
1871 | 59.1k | asmSettings.runDeduplicate = _settings.runDeduplicate; |
1872 | 59.1k | asmSettings.runCSE = _settings.runCSE; |
1873 | 59.1k | asmSettings.runConstantOptimiser = _settings.runConstantOptimiser; |
1874 | 59.1k | asmSettings.expectedExecutionsPerDeployment = _settings.expectedExecutionsPerDeployment; |
1875 | 59.1k | return asmSettings; |
1876 | 59.1k | } |