/src/solidity/libevmasm/Assembly.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | This file is part of solidity. |
3 | | |
4 | | solidity is free software: you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License as published by |
6 | | the Free Software Foundation, either version 3 of the License, or |
7 | | (at your option) any later version. |
8 | | |
9 | | solidity is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with solidity. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | // SPDX-License-Identifier: GPL-3.0 |
18 | | /** @file Assembly.cpp |
19 | | * @author Gav Wood <i@gavwood.com> |
20 | | * @date 2014 |
21 | | */ |
22 | | |
23 | | #include <libevmasm/Assembly.h> |
24 | | |
25 | | #include <libevmasm/CommonSubexpressionEliminator.h> |
26 | | #include <libevmasm/ControlFlowGraph.h> |
27 | | #include <libevmasm/PeepholeOptimiser.h> |
28 | | #include <libevmasm/Inliner.h> |
29 | | #include <libevmasm/JumpdestRemover.h> |
30 | | #include <libevmasm/BlockDeduplicator.h> |
31 | | #include <libevmasm/ConstantOptimiser.h> |
32 | | #include <libevmasm/GasMeter.h> |
33 | | |
34 | | #include <liblangutil/CharStream.h> |
35 | | #include <liblangutil/Exceptions.h> |
36 | | |
37 | | #include <json/json.h> |
38 | | |
39 | | #include <range/v3/algorithm/any_of.hpp> |
40 | | #include <range/v3/view/enumerate.hpp> |
41 | | |
42 | | #include <fstream> |
43 | | #include <limits> |
44 | | |
45 | | using namespace std; |
46 | | using namespace solidity; |
47 | | using namespace solidity::evmasm; |
48 | | using namespace solidity::langutil; |
49 | | using namespace solidity::util; |
50 | | |
51 | | AssemblyItem const& Assembly::append(AssemblyItem _i) |
52 | 116M | { |
53 | 116M | assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); |
54 | 116M | m_deposit += static_cast<int>(_i.deposit()); |
55 | 116M | m_items.emplace_back(move(_i)); |
56 | 116M | if (!m_items.back().location().isValid() && m_currentSourceLocation.isValid()) |
57 | 116M | m_items.back().setLocation(m_currentSourceLocation); |
58 | 116M | m_items.back().m_modifierDepth = m_currentModifierDepth; |
59 | 116M | return m_items.back(); |
60 | 116M | } |
61 | | |
62 | | unsigned Assembly::codeSize(unsigned subTagSize) const |
63 | 9.10k | { |
64 | 14.5k | for (unsigned tagSize = subTagSize; true; ++tagSize) |
65 | 14.5k | { |
66 | 14.5k | size_t ret = 1; |
67 | 14.5k | for (auto const& i: m_data) |
68 | 145k | ret += i.second.size(); |
69 | | |
70 | 14.5k | for (AssemblyItem const& i: m_items) |
71 | 276M | ret += i.bytesRequired(tagSize, Precision::Approximate); |
72 | 14.5k | if (numberEncodingSize(ret) <= tagSize) |
73 | 9.10k | return static_cast<unsigned>(ret); |
74 | 14.5k | } |
75 | 9.10k | } |
76 | | |
77 | | namespace |
78 | | { |
79 | | |
80 | | string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) |
81 | 0 | { |
82 | 0 | if (!_location.hasText() || _sourceCodes.empty()) |
83 | 0 | return {}; |
84 | | |
85 | 0 | auto it = _sourceCodes.find(*_location.sourceName); |
86 | 0 | if (it == _sourceCodes.end()) |
87 | 0 | return {}; |
88 | | |
89 | 0 | return CharStream::singleLineSnippet(it->second, _location); |
90 | 0 | } |
91 | | |
92 | | class Functionalizer |
93 | | { |
94 | | public: |
95 | | Functionalizer (ostream& _out, string const& _prefix, StringMap const& _sourceCodes, Assembly const& _assembly): |
96 | | m_out(_out), m_prefix(_prefix), m_sourceCodes(_sourceCodes), m_assembly(_assembly) |
97 | 0 | {} |
98 | | |
99 | | void feed(AssemblyItem const& _item, DebugInfoSelection const& _debugInfoSelection) |
100 | 0 | { |
101 | 0 | if (_item.location().isValid() && _item.location() != m_location) |
102 | 0 | { |
103 | 0 | flush(); |
104 | 0 | m_location = _item.location(); |
105 | 0 | printLocation(_debugInfoSelection); |
106 | 0 | } |
107 | |
|
108 | 0 | string expression = _item.toAssemblyText(m_assembly); |
109 | |
|
110 | 0 | if (!( |
111 | 0 | _item.canBeFunctional() && |
112 | 0 | _item.returnValues() <= 1 && |
113 | 0 | _item.arguments() <= m_pending.size() |
114 | 0 | )) |
115 | 0 | { |
116 | 0 | flush(); |
117 | 0 | m_out << m_prefix << (_item.type() == Tag ? "" : " ") << expression << endl; |
118 | 0 | return; |
119 | 0 | } |
120 | 0 | if (_item.arguments() > 0) |
121 | 0 | { |
122 | 0 | expression += "("; |
123 | 0 | for (size_t i = 0; i < _item.arguments(); ++i) |
124 | 0 | { |
125 | 0 | expression += m_pending.back(); |
126 | 0 | m_pending.pop_back(); |
127 | 0 | if (i + 1 < _item.arguments()) |
128 | 0 | expression += ", "; |
129 | 0 | } |
130 | 0 | expression += ")"; |
131 | 0 | } |
132 | |
|
133 | 0 | m_pending.push_back(expression); |
134 | 0 | if (_item.returnValues() != 1) |
135 | 0 | flush(); |
136 | 0 | } |
137 | | |
138 | | void flush() |
139 | 0 | { |
140 | 0 | for (string const& expression: m_pending) |
141 | 0 | m_out << m_prefix << " " << expression << endl; |
142 | 0 | m_pending.clear(); |
143 | 0 | } |
144 | | |
145 | | void printLocation(DebugInfoSelection const& _debugInfoSelection) |
146 | 0 | { |
147 | 0 | if (!m_location.isValid() || (!_debugInfoSelection.location && !_debugInfoSelection.snippet)) |
148 | 0 | return; |
149 | | |
150 | 0 | m_out << m_prefix << " /*"; |
151 | |
|
152 | 0 | if (_debugInfoSelection.location) |
153 | 0 | { |
154 | 0 | if (m_location.sourceName) |
155 | 0 | m_out << " " + escapeAndQuoteString(*m_location.sourceName); |
156 | 0 | if (m_location.hasText()) |
157 | 0 | m_out << ":" << to_string(m_location.start) + ":" + to_string(m_location.end); |
158 | 0 | } |
159 | |
|
160 | 0 | if (_debugInfoSelection.snippet) |
161 | 0 | { |
162 | 0 | if (_debugInfoSelection.location) |
163 | 0 | m_out << " "; |
164 | |
|
165 | 0 | m_out << locationFromSources(m_sourceCodes, m_location); |
166 | 0 | } |
167 | |
|
168 | 0 | m_out << " */" << endl; |
169 | 0 | } |
170 | | |
171 | | private: |
172 | | strings m_pending; |
173 | | SourceLocation m_location; |
174 | | |
175 | | ostream& m_out; |
176 | | string const& m_prefix; |
177 | | StringMap const& m_sourceCodes; |
178 | | Assembly const& m_assembly; |
179 | | }; |
180 | | |
181 | | } |
182 | | |
183 | | void Assembly::assemblyStream( |
184 | | ostream& _out, |
185 | | DebugInfoSelection const& _debugInfoSelection, |
186 | | string const& _prefix, |
187 | | StringMap const& _sourceCodes |
188 | | ) const |
189 | 0 | { |
190 | 0 | Functionalizer f(_out, _prefix, _sourceCodes, *this); |
191 | |
|
192 | 0 | for (auto const& i: m_items) |
193 | 0 | f.feed(i, _debugInfoSelection); |
194 | 0 | f.flush(); |
195 | |
|
196 | 0 | if (!m_data.empty() || !m_subs.empty()) |
197 | 0 | { |
198 | 0 | _out << _prefix << "stop" << endl; |
199 | 0 | for (auto const& i: m_data) |
200 | 0 | if (u256(i.first) >= m_subs.size()) |
201 | 0 | _out << _prefix << "data_" << toHex(u256(i.first)) << " " << util::toHex(i.second) << endl; |
202 | |
|
203 | 0 | for (size_t i = 0; i < m_subs.size(); ++i) |
204 | 0 | { |
205 | 0 | _out << endl << _prefix << "sub_" << i << ": assembly {\n"; |
206 | 0 | m_subs[i]->assemblyStream(_out, _debugInfoSelection, _prefix + " ", _sourceCodes); |
207 | 0 | _out << _prefix << "}" << endl; |
208 | 0 | } |
209 | 0 | } |
210 | |
|
211 | 0 | if (m_auxiliaryData.size() > 0) |
212 | 0 | _out << endl << _prefix << "auxdata: 0x" << util::toHex(m_auxiliaryData) << endl; |
213 | 0 | } |
214 | | |
215 | | string Assembly::assemblyString( |
216 | | DebugInfoSelection const& _debugInfoSelection, |
217 | | StringMap const& _sourceCodes |
218 | | ) const |
219 | 0 | { |
220 | 0 | ostringstream tmp; |
221 | 0 | assemblyStream(tmp, _debugInfoSelection, "", _sourceCodes); |
222 | 0 | return tmp.str(); |
223 | 0 | } |
224 | | |
225 | | Json::Value Assembly::assemblyJSON(map<string, unsigned> const& _sourceIndices, bool _includeSourceList) const |
226 | 0 | { |
227 | 0 | Json::Value root; |
228 | 0 | root[".code"] = Json::arrayValue; |
229 | 0 | Json::Value& code = root[".code"]; |
230 | 0 | for (AssemblyItem const& item: m_items) |
231 | 0 | { |
232 | 0 | int sourceIndex = -1; |
233 | 0 | if (item.location().sourceName) |
234 | 0 | { |
235 | 0 | auto iter = _sourceIndices.find(*item.location().sourceName); |
236 | 0 | if (iter != _sourceIndices.end()) |
237 | 0 | sourceIndex = static_cast<int>(iter->second); |
238 | 0 | } |
239 | |
|
240 | 0 | auto [name, data] = item.nameAndData(); |
241 | 0 | Json::Value jsonItem; |
242 | 0 | jsonItem["name"] = name; |
243 | 0 | jsonItem["begin"] = item.location().start; |
244 | 0 | jsonItem["end"] = item.location().end; |
245 | 0 | if (item.m_modifierDepth != 0) |
246 | 0 | jsonItem["modifierDepth"] = static_cast<int>(item.m_modifierDepth); |
247 | 0 | std::string jumpType = item.getJumpTypeAsString(); |
248 | 0 | if (!jumpType.empty()) |
249 | 0 | jsonItem["jumpType"] = jumpType; |
250 | 0 | if (name == "PUSHLIB") |
251 | 0 | data = m_libraries.at(h256(data)); |
252 | 0 | else if (name == "PUSHIMMUTABLE" || name == "ASSIGNIMMUTABLE") |
253 | 0 | data = m_immutables.at(h256(data)); |
254 | 0 | if (!data.empty()) |
255 | 0 | jsonItem["value"] = data; |
256 | 0 | jsonItem["source"] = sourceIndex; |
257 | 0 | code.append(move(jsonItem)); |
258 | |
|
259 | 0 | if (item.type() == AssemblyItemType::Tag) |
260 | 0 | { |
261 | 0 | Json::Value jumpdest; |
262 | 0 | jumpdest["name"] = "JUMPDEST"; |
263 | 0 | jumpdest["begin"] = item.location().start; |
264 | 0 | jumpdest["end"] = item.location().end; |
265 | 0 | jumpdest["source"] = sourceIndex; |
266 | 0 | if (item.m_modifierDepth != 0) |
267 | 0 | jumpdest["modifierDepth"] = static_cast<int>(item.m_modifierDepth); |
268 | 0 | code.append(move(jumpdest)); |
269 | 0 | } |
270 | 0 | } |
271 | 0 | if (_includeSourceList) |
272 | 0 | { |
273 | 0 | root["sourceList"] = Json::arrayValue; |
274 | 0 | Json::Value& jsonSourceList = root["sourceList"]; |
275 | 0 | for (auto const& [name, index]: _sourceIndices) |
276 | 0 | jsonSourceList[index] = name; |
277 | 0 | } |
278 | |
|
279 | 0 | if (!m_data.empty() || !m_subs.empty()) |
280 | 0 | { |
281 | 0 | root[".data"] = Json::objectValue; |
282 | 0 | Json::Value& data = root[".data"]; |
283 | 0 | for (auto const& i: m_data) |
284 | 0 | if (u256(i.first) >= m_subs.size()) |
285 | 0 | data[util::toHex(toBigEndian((u256)i.first), util::HexPrefix::DontAdd, util::HexCase::Upper)] = util::toHex(i.second); |
286 | |
|
287 | 0 | for (size_t i = 0; i < m_subs.size(); ++i) |
288 | 0 | { |
289 | 0 | std::stringstream hexStr; |
290 | 0 | hexStr << hex << i; |
291 | 0 | data[hexStr.str()] = m_subs[i]->assemblyJSON(_sourceIndices, /*_includeSourceList = */false); |
292 | 0 | } |
293 | 0 | } |
294 | |
|
295 | 0 | if (!m_auxiliaryData.empty()) |
296 | 0 | root[".auxdata"] = util::toHex(m_auxiliaryData); |
297 | |
|
298 | 0 | return root; |
299 | 0 | } |
300 | | |
301 | | AssemblyItem Assembly::namedTag(string const& _name, size_t _params, size_t _returns, optional<uint64_t> _sourceID) |
302 | 4.51M | { |
303 | 4.51M | assertThrow(!_name.empty(), AssemblyException, "Empty named tag."); |
304 | 4.51M | if (m_namedTags.count(_name)) |
305 | 3.92M | { |
306 | 3.92M | assertThrow(m_namedTags.at(_name).params == _params, AssemblyException, ""); |
307 | 3.92M | assertThrow(m_namedTags.at(_name).returns == _returns, AssemblyException, ""); |
308 | 3.92M | assertThrow(m_namedTags.at(_name).sourceID == _sourceID, AssemblyException, ""); |
309 | 3.92M | } |
310 | 585k | else |
311 | 585k | m_namedTags[_name] = {static_cast<size_t>(newTag().data()), _sourceID, _params, _returns}; |
312 | 4.51M | return AssemblyItem{Tag, m_namedTags.at(_name).id}; |
313 | 4.51M | } |
314 | | |
315 | | AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier) |
316 | 0 | { |
317 | 0 | h256 h(util::keccak256(_identifier)); |
318 | 0 | m_libraries[h] = _identifier; |
319 | 0 | return AssemblyItem{PushLibraryAddress, h}; |
320 | 0 | } |
321 | | |
322 | | AssemblyItem Assembly::newPushImmutable(string const& _identifier) |
323 | 0 | { |
324 | 0 | h256 h(util::keccak256(_identifier)); |
325 | 0 | m_immutables[h] = _identifier; |
326 | 0 | return AssemblyItem{PushImmutable, h}; |
327 | 0 | } |
328 | | |
329 | | AssemblyItem Assembly::newImmutableAssignment(string const& _identifier) |
330 | 0 | { |
331 | 0 | h256 h(util::keccak256(_identifier)); |
332 | 0 | m_immutables[h] = _identifier; |
333 | 0 | return AssemblyItem{AssignImmutable, h}; |
334 | 0 | } |
335 | | |
336 | | Assembly& Assembly::optimise(OptimiserSettings const& _settings) |
337 | 4.55k | { |
338 | 4.55k | optimiseInternal(_settings, {}); |
339 | 4.55k | return *this; |
340 | 4.55k | } |
341 | | |
342 | | map<u256, u256> const& Assembly::optimiseInternal( |
343 | | OptimiserSettings const& _settings, |
344 | | std::set<size_t> _tagsReferencedFromOutside |
345 | | ) |
346 | 9.10k | { |
347 | 9.10k | if (m_tagReplacements) |
348 | 0 | return *m_tagReplacements; |
349 | | |
350 | | // Run optimisation for sub-assemblies. |
351 | 13.6k | for (size_t subId = 0; subId < m_subs.size(); ++subId) |
352 | 4.55k | { |
353 | 4.55k | OptimiserSettings settings = _settings; |
354 | 4.55k | Assembly& sub = *m_subs[subId]; |
355 | 4.55k | map<u256, u256> const& subTagReplacements = sub.optimiseInternal( |
356 | 4.55k | settings, |
357 | 4.55k | JumpdestRemover::referencedTags(m_items, subId) |
358 | 4.55k | ); |
359 | | // Apply the replacements (can be empty). |
360 | 4.55k | BlockDeduplicator::applyTagReplacement(m_items, subTagReplacements, subId); |
361 | 4.55k | } |
362 | | |
363 | 9.10k | map<u256, u256> tagReplacements; |
364 | | // Iterate until no new optimisation possibilities are found. |
365 | 36.4k | for (unsigned count = 1; count > 0;) |
366 | 27.3k | { |
367 | 27.3k | count = 0; |
368 | | |
369 | 27.3k | if (_settings.runInliner) |
370 | 0 | Inliner{ |
371 | 0 | m_items, |
372 | 0 | _tagsReferencedFromOutside, |
373 | 0 | _settings.expectedExecutionsPerDeployment, |
374 | 0 | isCreation(), |
375 | 0 | _settings.evmVersion |
376 | 0 | }.optimise(); |
377 | | |
378 | 27.3k | if (_settings.runJumpdestRemover) |
379 | 27.3k | { |
380 | 27.3k | JumpdestRemover jumpdestOpt{m_items}; |
381 | 27.3k | if (jumpdestOpt.optimise(_tagsReferencedFromOutside)) |
382 | 13.6k | count++; |
383 | 27.3k | } |
384 | | |
385 | 27.3k | if (_settings.runPeephole) |
386 | 27.3k | { |
387 | 27.3k | PeepholeOptimiser peepOpt{m_items}; |
388 | 46.3k | while (peepOpt.optimise()) |
389 | 19.0k | { |
390 | 19.0k | count++; |
391 | 19.0k | assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck."); |
392 | 19.0k | } |
393 | 27.3k | } |
394 | | |
395 | | // This only modifies PushTags, we have to run again to actually remove code. |
396 | 27.3k | if (_settings.runDeduplicate) |
397 | 0 | { |
398 | 0 | BlockDeduplicator deduplicator{m_items}; |
399 | 0 | if (deduplicator.deduplicate()) |
400 | 0 | { |
401 | 0 | for (auto const& replacement: deduplicator.replacedTags()) |
402 | 0 | { |
403 | 0 | assertThrow( |
404 | 0 | replacement.first <= numeric_limits<size_t>::max() && replacement.second <= numeric_limits<size_t>::max(), |
405 | 0 | OptimizerException, |
406 | 0 | "Invalid tag replacement." |
407 | 0 | ); |
408 | 0 | assertThrow( |
409 | 0 | !tagReplacements.count(replacement.first), |
410 | 0 | OptimizerException, |
411 | 0 | "Replacement already known." |
412 | 0 | ); |
413 | 0 | tagReplacements[replacement.first] = replacement.second; |
414 | 0 | if (_tagsReferencedFromOutside.erase(static_cast<size_t>(replacement.first))) |
415 | 0 | _tagsReferencedFromOutside.insert(static_cast<size_t>(replacement.second)); |
416 | 0 | } |
417 | 0 | count++; |
418 | 0 | } |
419 | 0 | } |
420 | | |
421 | 27.3k | if (_settings.runCSE) |
422 | 0 | { |
423 | | // Control flow graph optimization has been here before but is disabled because it |
424 | | // assumes we only jump to tags that are pushed. This is not the case anymore with |
425 | | // function types that can be stored in storage. |
426 | 0 | AssemblyItems optimisedItems; |
427 | |
|
428 | 0 | bool usesMSize = ranges::any_of(m_items, [](AssemblyItem const& _i) { |
429 | 0 | return _i == AssemblyItem{Instruction::MSIZE} || _i.type() == VerbatimBytecode; |
430 | 0 | }); |
431 | |
|
432 | 0 | auto iter = m_items.begin(); |
433 | 0 | while (iter != m_items.end()) |
434 | 0 | { |
435 | 0 | KnownState emptyState; |
436 | 0 | CommonSubexpressionEliminator eliminator{emptyState}; |
437 | 0 | auto orig = iter; |
438 | 0 | iter = eliminator.feedItems(iter, m_items.end(), usesMSize); |
439 | 0 | bool shouldReplace = false; |
440 | 0 | AssemblyItems optimisedChunk; |
441 | 0 | try |
442 | 0 | { |
443 | 0 | optimisedChunk = eliminator.getOptimizedItems(); |
444 | 0 | shouldReplace = (optimisedChunk.size() < static_cast<size_t>(iter - orig)); |
445 | 0 | } |
446 | 0 | catch (StackTooDeepException const&) |
447 | 0 | { |
448 | | // This might happen if the opcode reconstruction is not as efficient |
449 | | // as the hand-crafted code. |
450 | 0 | } |
451 | 0 | catch (ItemNotAvailableException const&) |
452 | 0 | { |
453 | | // This might happen if e.g. associativity and commutativity rules |
454 | | // reorganise the expression tree, but not all leaves are available. |
455 | 0 | } |
456 | |
|
457 | 0 | if (shouldReplace) |
458 | 0 | { |
459 | 0 | count++; |
460 | 0 | optimisedItems += optimisedChunk; |
461 | 0 | } |
462 | 0 | else |
463 | 0 | copy(orig, iter, back_inserter(optimisedItems)); |
464 | 0 | } |
465 | 0 | if (optimisedItems.size() < m_items.size()) |
466 | 0 | { |
467 | 0 | m_items = move(optimisedItems); |
468 | 0 | count++; |
469 | 0 | } |
470 | 0 | } |
471 | 27.3k | } |
472 | | |
473 | 9.10k | if (_settings.runConstantOptimiser) |
474 | 0 | ConstantOptimisationMethod::optimiseConstants( |
475 | 0 | isCreation(), |
476 | 0 | isCreation() ? 1 : _settings.expectedExecutionsPerDeployment, |
477 | 0 | _settings.evmVersion, |
478 | 0 | *this |
479 | 0 | ); |
480 | | |
481 | 9.10k | m_tagReplacements = move(tagReplacements); |
482 | 9.10k | return *m_tagReplacements; |
483 | 9.10k | } |
484 | | |
485 | | LinkerObject const& Assembly::assemble() const |
486 | 27.3k | { |
487 | 27.3k | assertThrow(!m_invalid, AssemblyException, "Attempted to assemble invalid Assembly object."); |
488 | | // Return the already assembled object, if present. |
489 | 27.3k | if (!m_assembledObject.bytecode.empty()) |
490 | 18.2k | return m_assembledObject; |
491 | | // Otherwise ensure the object is actually clear. |
492 | 9.10k | assertThrow(m_assembledObject.linkReferences.empty(), AssemblyException, "Unexpected link references."); |
493 | | |
494 | 9.10k | LinkerObject& ret = m_assembledObject; |
495 | | |
496 | 9.10k | size_t subTagSize = 1; |
497 | 9.10k | map<u256, pair<string, vector<size_t>>> immutableReferencesBySub; |
498 | 9.10k | for (auto const& sub: m_subs) |
499 | 4.55k | { |
500 | 4.55k | auto const& linkerObject = sub->assemble(); |
501 | 4.55k | if (!linkerObject.immutableReferences.empty()) |
502 | 0 | { |
503 | 0 | assertThrow( |
504 | 0 | immutableReferencesBySub.empty(), |
505 | 0 | AssemblyException, |
506 | 0 | "More than one sub-assembly references immutables." |
507 | 0 | ); |
508 | 0 | immutableReferencesBySub = linkerObject.immutableReferences; |
509 | 0 | } |
510 | 4.55k | for (size_t tagPos: sub->m_tagPositionsInBytecode) |
511 | 11.5M | if (tagPos != numeric_limits<size_t>::max() && tagPos > subTagSize) |
512 | 109k | subTagSize = tagPos; |
513 | 4.55k | } |
514 | | |
515 | 9.10k | bool setsImmutables = false; |
516 | 9.10k | bool pushesImmutables = false; |
517 | | |
518 | 9.10k | for (auto const& i: m_items) |
519 | 102M | if (i.type() == AssignImmutable) |
520 | 0 | { |
521 | 0 | i.setImmutableOccurrences(immutableReferencesBySub[i.data()].second.size()); |
522 | 0 | setsImmutables = true; |
523 | 0 | } |
524 | 102M | else if (i.type() == PushImmutable) |
525 | 0 | pushesImmutables = true; |
526 | 9.10k | if (setsImmutables || pushesImmutables) |
527 | 9.10k | assertThrow( |
528 | 9.10k | setsImmutables != pushesImmutables, |
529 | 9.10k | AssemblyException, |
530 | 9.10k | "Cannot push and assign immutables in the same assembly subroutine." |
531 | 9.10k | ); |
532 | | |
533 | 9.10k | unsigned bytesRequiredForCode = codeSize(static_cast<unsigned>(subTagSize)); |
534 | 9.10k | m_tagPositionsInBytecode = vector<size_t>(m_usedTags, numeric_limits<size_t>::max()); |
535 | 9.10k | map<size_t, pair<size_t, size_t>> tagRef; |
536 | 9.10k | multimap<h256, unsigned> dataRef; |
537 | 9.10k | multimap<size_t, size_t> subRef; |
538 | 9.10k | vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted |
539 | 9.10k | unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); |
540 | 9.10k | uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag)); |
541 | | |
542 | 9.10k | unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast<unsigned>(m_auxiliaryData.size()); |
543 | 9.10k | for (auto const& sub: m_subs) |
544 | 4.55k | bytesRequiredIncludingData += static_cast<unsigned>(sub->assemble().bytecode.size()); |
545 | | |
546 | 9.10k | unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); |
547 | 9.10k | uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef)); |
548 | 9.10k | ret.bytecode.reserve(bytesRequiredIncludingData); |
549 | | |
550 | 9.10k | for (AssemblyItem const& i: m_items) |
551 | 102M | { |
552 | | // store position of the invalid jump destination |
553 | 102M | if (i.type() != Tag && m_tagPositionsInBytecode[0] == numeric_limits<size_t>::max()) |
554 | 9.10k | m_tagPositionsInBytecode[0] = ret.bytecode.size(); |
555 | | |
556 | 102M | switch (i.type()) |
557 | 102M | { |
558 | 59.3M | case Operation: |
559 | 59.3M | ret.bytecode.push_back(static_cast<uint8_t>(i.instruction())); |
560 | 59.3M | break; |
561 | 18.1M | case Push: |
562 | 18.1M | { |
563 | 18.1M | unsigned b = max<unsigned>(1, numberEncodingSize(i.data())); |
564 | 18.1M | ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b))); |
565 | 18.1M | ret.bytecode.resize(ret.bytecode.size() + b); |
566 | 18.1M | bytesRef byr(&ret.bytecode.back() + 1 - b, b); |
567 | 18.1M | toBigEndian(i.data(), byr); |
568 | 18.1M | break; |
569 | 0 | } |
570 | 15.1M | case PushTag: |
571 | 15.1M | { |
572 | 15.1M | ret.bytecode.push_back(tagPush); |
573 | 15.1M | tagRef[ret.bytecode.size()] = i.splitForeignPushTag(); |
574 | 15.1M | ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); |
575 | 15.1M | break; |
576 | 0 | } |
577 | 137k | case PushData: |
578 | 137k | ret.bytecode.push_back(dataRefPush); |
579 | 137k | dataRef.insert(make_pair(h256(i.data()), ret.bytecode.size())); |
580 | 137k | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
581 | 137k | break; |
582 | 4.55k | case PushSub: |
583 | 4.55k | assertThrow(i.data() <= numeric_limits<size_t>::max(), AssemblyException, ""); |
584 | 4.55k | ret.bytecode.push_back(dataRefPush); |
585 | 4.55k | subRef.insert(make_pair(static_cast<size_t>(i.data()), ret.bytecode.size())); |
586 | 4.55k | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
587 | 4.55k | break; |
588 | 4.55k | case PushSubSize: |
589 | 4.55k | { |
590 | 4.55k | assertThrow(i.data() <= numeric_limits<size_t>::max(), AssemblyException, ""); |
591 | 4.55k | auto s = subAssemblyById(static_cast<size_t>(i.data()))->assemble().bytecode.size(); |
592 | 4.55k | i.setPushedValue(u256(s)); |
593 | 4.55k | unsigned b = max<unsigned>(1, numberEncodingSize(s)); |
594 | 4.55k | ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b))); |
595 | 4.55k | ret.bytecode.resize(ret.bytecode.size() + b); |
596 | 4.55k | bytesRef byr(&ret.bytecode.back() + 1 - b, b); |
597 | 4.55k | toBigEndian(s, byr); |
598 | 4.55k | break; |
599 | 4.55k | } |
600 | 0 | case PushProgramSize: |
601 | 0 | { |
602 | 0 | ret.bytecode.push_back(dataRefPush); |
603 | 0 | sizeRef.push_back(static_cast<unsigned>(ret.bytecode.size())); |
604 | 0 | ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); |
605 | 0 | break; |
606 | 4.55k | } |
607 | 0 | case PushLibraryAddress: |
608 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH20)); |
609 | 0 | ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); |
610 | 0 | ret.bytecode.resize(ret.bytecode.size() + 20); |
611 | 0 | break; |
612 | 0 | case PushImmutable: |
613 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32)); |
614 | | // Maps keccak back to the "identifier" string of that immutable. |
615 | 0 | ret.immutableReferences[i.data()].first = m_immutables.at(i.data()); |
616 | | // Record the bytecode offset of the PUSH32 argument. |
617 | 0 | ret.immutableReferences[i.data()].second.emplace_back(ret.bytecode.size()); |
618 | | // Advance bytecode by 32 bytes (default initialized). |
619 | 0 | ret.bytecode.resize(ret.bytecode.size() + 32); |
620 | 0 | break; |
621 | 0 | case VerbatimBytecode: |
622 | 0 | ret.bytecode += i.verbatimData(); |
623 | 0 | break; |
624 | 0 | case AssignImmutable: |
625 | 0 | { |
626 | | // Expect 2 elements on stack (source, dest_base) |
627 | 0 | auto const& offsets = immutableReferencesBySub[i.data()].second; |
628 | 0 | for (size_t i = 0; i < offsets.size(); ++i) |
629 | 0 | { |
630 | 0 | if (i != offsets.size() - 1) |
631 | 0 | { |
632 | 0 | ret.bytecode.push_back(uint8_t(Instruction::DUP2)); |
633 | 0 | ret.bytecode.push_back(uint8_t(Instruction::DUP2)); |
634 | 0 | } |
635 | | // TODO: should we make use of the constant optimizer methods for pushing the offsets? |
636 | 0 | bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); |
637 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size())))); |
638 | 0 | ret.bytecode += offsetBytes; |
639 | 0 | ret.bytecode.push_back(uint8_t(Instruction::ADD)); |
640 | 0 | ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); |
641 | 0 | } |
642 | 0 | if (offsets.empty()) |
643 | 0 | { |
644 | 0 | ret.bytecode.push_back(uint8_t(Instruction::POP)); |
645 | 0 | ret.bytecode.push_back(uint8_t(Instruction::POP)); |
646 | 0 | } |
647 | 0 | immutableReferencesBySub.erase(i.data()); |
648 | 0 | break; |
649 | 4.55k | } |
650 | 0 | case PushDeployTimeAddress: |
651 | 0 | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH20)); |
652 | 0 | ret.bytecode.resize(ret.bytecode.size() + 20); |
653 | 0 | break; |
654 | 10.1M | case Tag: |
655 | 10.1M | { |
656 | 10.1M | assertThrow(i.data() != 0, AssemblyException, "Invalid tag position."); |
657 | 10.1M | assertThrow(i.splitForeignPushTag().first == numeric_limits<size_t>::max(), AssemblyException, "Foreign tag."); |
658 | 10.1M | size_t tagId = static_cast<size_t>(i.data()); |
659 | 10.1M | assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); |
660 | 10.1M | assertThrow(m_tagPositionsInBytecode[tagId] == numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position."); |
661 | 10.1M | m_tagPositionsInBytecode[tagId] = ret.bytecode.size(); |
662 | 10.1M | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::JUMPDEST)); |
663 | 10.1M | break; |
664 | 10.1M | } |
665 | 0 | default: |
666 | 0 | assertThrow(false, InvalidOpcode, "Unexpected opcode while assembling."); |
667 | 102M | } |
668 | 102M | } |
669 | | |
670 | 9.10k | if (!immutableReferencesBySub.empty()) |
671 | 0 | throw |
672 | 0 | langutil::Error( |
673 | 0 | 1284_error, |
674 | 0 | langutil::Error::Type::CodeGenerationError, |
675 | 0 | "Some immutables were read from but never assigned, possibly because of optimization." |
676 | 0 | ); |
677 | | |
678 | 9.10k | if (!m_subs.empty() || !m_data.empty() || !m_auxiliaryData.empty()) |
679 | | // Append an INVALID here to help tests find miscompilation. |
680 | 9.10k | ret.bytecode.push_back(static_cast<uint8_t>(Instruction::INVALID)); |
681 | | |
682 | 9.10k | for (auto const& [subIdPath, bytecodeOffset]: subRef) |
683 | 4.55k | { |
684 | 4.55k | bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef); |
685 | 4.55k | toBigEndian(ret.bytecode.size(), r); |
686 | 4.55k | ret.append(subAssemblyById(subIdPath)->assemble()); |
687 | 4.55k | } |
688 | | |
689 | 9.10k | for (auto const& i: tagRef) |
690 | 15.1M | { |
691 | 15.1M | size_t subId; |
692 | 15.1M | size_t tagId; |
693 | 15.1M | tie(subId, tagId) = i.second; |
694 | 15.1M | assertThrow(subId == numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id"); |
695 | 15.1M | vector<size_t> const& tagPositions = |
696 | 15.1M | subId == numeric_limits<size_t>::max() ? |
697 | 15.1M | m_tagPositionsInBytecode : |
698 | 15.1M | m_subs[subId]->m_tagPositionsInBytecode; |
699 | 15.1M | assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag."); |
700 | 15.1M | size_t pos = tagPositions[tagId]; |
701 | 15.1M | assertThrow(pos != numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position."); |
702 | 15.1M | assertThrow(numberEncodingSize(pos) <= bytesPerTag, AssemblyException, "Tag too large for reserved space."); |
703 | 15.1M | bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); |
704 | 15.1M | toBigEndian(pos, r); |
705 | 15.1M | } |
706 | 9.10k | for (auto const& [name, tagInfo]: m_namedTags) |
707 | 585k | { |
708 | 585k | size_t position = m_tagPositionsInBytecode.at(tagInfo.id); |
709 | 585k | optional<size_t> tagIndex; |
710 | 585k | for (auto&& [index, item]: m_items | ranges::views::enumerate) |
711 | 15.2G | if (item.type() == Tag && static_cast<size_t>(item.data()) == tagInfo.id) |
712 | 584k | { |
713 | 584k | tagIndex = index; |
714 | 584k | break; |
715 | 584k | } |
716 | 585k | ret.functionDebugData[name] = { |
717 | 585k | position == numeric_limits<size_t>::max() ? nullopt : optional<size_t>{position}, |
718 | 585k | tagIndex, |
719 | 585k | tagInfo.sourceID, |
720 | 585k | tagInfo.params, |
721 | 585k | tagInfo.returns |
722 | 585k | }; |
723 | 585k | } |
724 | | |
725 | 9.10k | for (auto const& dataItem: m_data) |
726 | 53.0k | { |
727 | 53.0k | auto references = dataRef.equal_range(dataItem.first); |
728 | 53.0k | if (references.first == references.second) |
729 | 0 | continue; |
730 | 190k | for (auto ref = references.first; ref != references.second; ++ref) |
731 | 137k | { |
732 | 137k | bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); |
733 | 137k | toBigEndian(ret.bytecode.size(), r); |
734 | 137k | } |
735 | 53.0k | ret.bytecode += dataItem.second; |
736 | 53.0k | } |
737 | | |
738 | 9.10k | ret.bytecode += m_auxiliaryData; |
739 | | |
740 | 9.10k | for (unsigned pos: sizeRef) |
741 | 0 | { |
742 | 0 | bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); |
743 | 0 | toBigEndian(ret.bytecode.size(), r); |
744 | 0 | } |
745 | 9.10k | return ret; |
746 | 9.10k | } |
747 | | |
748 | | vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const |
749 | 9.10k | { |
750 | 9.10k | if (_subObjectId < m_subs.size()) |
751 | 9.10k | return {_subObjectId}; |
752 | | |
753 | 0 | auto subIdPathIt = find_if( |
754 | 0 | m_subPaths.begin(), |
755 | 0 | m_subPaths.end(), |
756 | 0 | [_subObjectId](auto const& subId) { return subId.second == _subObjectId; } |
757 | 0 | ); |
758 | |
|
759 | 0 | assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, ""); |
760 | 0 | return subIdPathIt->first; |
761 | 0 | } |
762 | | |
763 | | size_t Assembly::encodeSubPath(vector<size_t> const& _subPath) |
764 | 0 | { |
765 | 0 | assertThrow(!_subPath.empty(), AssemblyException, ""); |
766 | 0 | if (_subPath.size() == 1) |
767 | 0 | { |
768 | 0 | assertThrow(_subPath[0] < m_subs.size(), AssemblyException, ""); |
769 | 0 | return _subPath[0]; |
770 | 0 | } |
771 | | |
772 | 0 | if (m_subPaths.find(_subPath) == m_subPaths.end()) |
773 | 0 | { |
774 | 0 | size_t objectId = numeric_limits<size_t>::max() - m_subPaths.size(); |
775 | 0 | assertThrow(objectId >= m_subs.size(), AssemblyException, ""); |
776 | 0 | m_subPaths[_subPath] = objectId; |
777 | 0 | } |
778 | | |
779 | 0 | return m_subPaths[_subPath]; |
780 | 0 | } |
781 | | |
782 | | Assembly const* Assembly::subAssemblyById(size_t _subId) const |
783 | 9.10k | { |
784 | 9.10k | vector<size_t> subIds = decodeSubPath(_subId); |
785 | 9.10k | Assembly const* currentAssembly = this; |
786 | 9.10k | for (size_t currentSubId: subIds) |
787 | 9.10k | { |
788 | 9.10k | currentAssembly = currentAssembly->m_subs.at(currentSubId).get(); |
789 | 9.10k | assertThrow(currentAssembly, AssemblyException, ""); |
790 | 9.10k | } |
791 | | |
792 | 9.10k | assertThrow(currentAssembly != this, AssemblyException, ""); |
793 | 9.10k | return currentAssembly; |
794 | 9.10k | } |