/src/solidity/libevmasm/KnownState.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | This file is part of solidity. |
3 | | |
4 | | solidity is free software: you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License as published by |
6 | | the Free Software Foundation, either version 3 of the License, or |
7 | | (at your option) any later version. |
8 | | |
9 | | solidity is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with solidity. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | // SPDX-License-Identifier: GPL-3.0 |
18 | | /** |
19 | | * @file KnownState.cpp |
20 | | * @author Christian <c@ethdev.com> |
21 | | * @date 2015 |
22 | | * Contains knowledge about the state of the virtual machine at a specific instruction. |
23 | | */ |
24 | | |
25 | | #include <libevmasm/KnownState.h> |
26 | | #include <libevmasm/AssemblyItem.h> |
27 | | #include <libsolutil/Keccak256.h> |
28 | | |
29 | | #include <functional> |
30 | | |
31 | | using namespace std; |
32 | | using namespace solidity; |
33 | | using namespace solidity::evmasm; |
34 | | using namespace solidity::langutil; |
35 | | |
36 | | ostream& KnownState::stream(ostream& _out) const |
37 | 0 | { |
38 | 0 | auto streamExpressionClass = [this](ostream& _out, Id _id) |
39 | 0 | { |
40 | 0 | auto const& expr = m_expressionClasses->representative(_id); |
41 | 0 | _out << " " << dec << _id << ": "; |
42 | 0 | if (!expr.item) |
43 | 0 | _out << " no item"; |
44 | 0 | else if (expr.item->type() == UndefinedItem) |
45 | 0 | _out << " unknown " << static_cast<int>(expr.item->data()); |
46 | 0 | else |
47 | 0 | _out << *expr.item; |
48 | 0 | if (expr.sequenceNumber) |
49 | 0 | _out << "@" << dec << expr.sequenceNumber; |
50 | 0 | _out << "("; |
51 | 0 | for (Id arg: expr.arguments) |
52 | 0 | _out << dec << arg << ","; |
53 | 0 | _out << ")" << endl; |
54 | 0 | }; |
55 | |
|
56 | 0 | _out << "=== State ===" << endl; |
57 | 0 | _out << "Stack height: " << dec << m_stackHeight << endl; |
58 | 0 | _out << "Equivalence classes:" << endl; |
59 | 0 | for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) |
60 | 0 | streamExpressionClass(_out, eqClass); |
61 | |
|
62 | 0 | _out << "Stack:" << endl; |
63 | 0 | for (auto const& it: m_stackElements) |
64 | 0 | { |
65 | 0 | _out << " " << dec << it.first << ": "; |
66 | 0 | streamExpressionClass(_out, it.second); |
67 | 0 | } |
68 | 0 | _out << "Storage:" << endl; |
69 | 0 | for (auto const& it: m_storageContent) |
70 | 0 | { |
71 | 0 | _out << " "; |
72 | 0 | streamExpressionClass(_out, it.first); |
73 | 0 | _out << ": "; |
74 | 0 | streamExpressionClass(_out, it.second); |
75 | 0 | } |
76 | 0 | _out << "Memory:" << endl; |
77 | 0 | for (auto const& it: m_memoryContent) |
78 | 0 | { |
79 | 0 | _out << " "; |
80 | 0 | streamExpressionClass(_out, it.first); |
81 | 0 | _out << ": "; |
82 | 0 | streamExpressionClass(_out, it.second); |
83 | 0 | } |
84 | |
|
85 | 0 | return _out; |
86 | 0 | } |
87 | | |
88 | | KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) |
89 | 0 | { |
90 | 0 | StoreOperation op; |
91 | 0 | if (_item.type() == Tag) |
92 | 0 | { |
93 | | // can be ignored |
94 | 0 | } |
95 | 0 | else if (_item.type() == AssignImmutable) |
96 | 0 | { |
97 | | // Since AssignImmutable breaks blocks, it should be fine to only consider its changes to the stack, which |
98 | | // is the same as two POPs. |
99 | | // Note that the StoreOperation for POP is generic and _copyItem is ignored. |
100 | 0 | feedItem(AssemblyItem(Instruction::POP), _copyItem); |
101 | 0 | return feedItem(AssemblyItem(Instruction::POP), _copyItem); |
102 | 0 | } |
103 | 0 | else if (_item.type() == VerbatimBytecode) |
104 | 0 | { |
105 | 0 | m_sequenceNumber += 2; |
106 | 0 | resetMemory(); |
107 | 0 | resetKnownKeccak256Hashes(); |
108 | 0 | resetStorage(); |
109 | | // Consume all arguments and place unknown return values on the stack. |
110 | 0 | m_stackElements.erase( |
111 | 0 | m_stackElements.upper_bound(m_stackHeight - static_cast<int>(_item.arguments())), |
112 | 0 | m_stackElements.end() |
113 | 0 | ); |
114 | 0 | m_stackHeight += static_cast<int>(_item.deposit()); |
115 | 0 | for (size_t i = 0; i < _item.returnValues(); ++i) |
116 | 0 | setStackElement( |
117 | 0 | m_stackHeight - static_cast<int>(i), |
118 | 0 | m_expressionClasses->newClass(_item.location()) |
119 | 0 | ); |
120 | 0 | } |
121 | 0 | else if (_item.type() != Operation) |
122 | 0 | { |
123 | 0 | assertThrow(_item.deposit() == 1, InvalidDeposit, ""); |
124 | 0 | if (_item.pushedValue()) |
125 | | // only available after assembly stage, should not be used for optimisation |
126 | 0 | setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue())); |
127 | 0 | else |
128 | 0 | setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); |
129 | 0 | } |
130 | 0 | else |
131 | 0 | { |
132 | 0 | Instruction instruction = _item.instruction(); |
133 | 0 | InstructionInfo info = instructionInfo(instruction); |
134 | 0 | if (SemanticInformation::isDupInstruction(_item)) |
135 | 0 | setStackElement( |
136 | 0 | m_stackHeight + 1, |
137 | 0 | stackElement( |
138 | 0 | m_stackHeight - static_cast<int>(instruction) + static_cast<int>(Instruction::DUP1), |
139 | 0 | _item.location() |
140 | 0 | ) |
141 | 0 | ); |
142 | 0 | else if (SemanticInformation::isSwapInstruction(_item)) |
143 | 0 | swapStackElements( |
144 | 0 | m_stackHeight, |
145 | 0 | m_stackHeight - 1 - static_cast<int>(instruction) + static_cast<int>(Instruction::SWAP1), |
146 | 0 | _item.location() |
147 | 0 | ); |
148 | 0 | else if (instruction != Instruction::POP) |
149 | 0 | { |
150 | 0 | vector<Id> arguments(static_cast<size_t>(info.args)); |
151 | 0 | for (size_t i = 0; i < static_cast<size_t>(info.args); ++i) |
152 | 0 | arguments[i] = stackElement(m_stackHeight - static_cast<int>(i), _item.location()); |
153 | 0 | switch (_item.instruction()) |
154 | 0 | { |
155 | 0 | case Instruction::SSTORE: |
156 | 0 | op = storeInStorage(arguments[0], arguments[1], _item.location()); |
157 | 0 | break; |
158 | 0 | case Instruction::SLOAD: |
159 | 0 | setStackElement( |
160 | 0 | m_stackHeight + static_cast<int>(_item.deposit()), |
161 | 0 | loadFromStorage(arguments[0], _item.location()) |
162 | 0 | ); |
163 | 0 | break; |
164 | 0 | case Instruction::MSTORE: |
165 | 0 | op = storeInMemory(arguments[0], arguments[1], _item.location()); |
166 | 0 | break; |
167 | 0 | case Instruction::MLOAD: |
168 | 0 | setStackElement( |
169 | 0 | m_stackHeight + static_cast<int>(_item.deposit()), |
170 | 0 | loadFromMemory(arguments[0], _item.location()) |
171 | 0 | ); |
172 | 0 | break; |
173 | 0 | case Instruction::KECCAK256: |
174 | 0 | setStackElement( |
175 | 0 | m_stackHeight + static_cast<int>(_item.deposit()), |
176 | 0 | applyKeccak256(arguments.at(0), arguments.at(1), _item.location()) |
177 | 0 | ); |
178 | 0 | break; |
179 | 0 | default: |
180 | 0 | bool invMem = |
181 | 0 | SemanticInformation::memory(_item.instruction()) == SemanticInformation::Write; |
182 | 0 | bool invStor = |
183 | 0 | SemanticInformation::storage(_item.instruction()) == SemanticInformation::Write; |
184 | | // We could be a bit more fine-grained here (CALL only invalidates part of |
185 | | // memory, etc), but we do not for now. |
186 | 0 | if (invMem) |
187 | 0 | { |
188 | 0 | resetMemory(); |
189 | 0 | resetKnownKeccak256Hashes(); |
190 | 0 | } |
191 | 0 | if (invStor) |
192 | 0 | resetStorage(); |
193 | 0 | if (invMem || invStor) |
194 | 0 | m_sequenceNumber += 2; // Increment by two because it can read and write |
195 | 0 | assertThrow(info.ret <= 1, InvalidDeposit, ""); |
196 | 0 | if (info.ret == 1) |
197 | 0 | setStackElement( |
198 | 0 | m_stackHeight + static_cast<int>(_item.deposit()), |
199 | 0 | m_expressionClasses->find(_item, arguments, _copyItem) |
200 | 0 | ); |
201 | 0 | } |
202 | 0 | } |
203 | 0 | m_stackElements.erase( |
204 | 0 | m_stackElements.upper_bound(m_stackHeight + static_cast<int>(_item.deposit())), |
205 | 0 | m_stackElements.end() |
206 | 0 | ); |
207 | 0 | m_stackHeight += static_cast<int>(_item.deposit()); |
208 | 0 | } |
209 | 0 | return op; |
210 | 0 | } |
211 | | |
212 | | /// Helper function for KnownState::reduceToCommonKnowledge, removes everything from |
213 | | /// _this which is not in or not equal to the value in _other. |
214 | | template <class Mapping> void intersect(Mapping& _this, Mapping const& _other) |
215 | 0 | { |
216 | 0 | for (auto it = _this.begin(); it != _this.end();) |
217 | 0 | if (_other.count(it->first) && _other.at(it->first) == it->second) |
218 | 0 | ++it; |
219 | 0 | else |
220 | 0 | it = _this.erase(it); |
221 | 0 | } |
222 | | |
223 | | void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers) |
224 | 0 | { |
225 | 0 | int stackDiff = m_stackHeight - _other.m_stackHeight; |
226 | 0 | for (auto it = m_stackElements.begin(); it != m_stackElements.end();) |
227 | 0 | if (_other.m_stackElements.count(it->first - stackDiff)) |
228 | 0 | { |
229 | 0 | Id other = _other.m_stackElements.at(it->first - stackDiff); |
230 | 0 | if (it->second == other) |
231 | 0 | ++it; |
232 | 0 | else |
233 | 0 | { |
234 | 0 | set<u256> theseTags = tagsInExpression(it->second); |
235 | 0 | set<u256> otherTags = tagsInExpression(other); |
236 | 0 | if (!theseTags.empty() && !otherTags.empty()) |
237 | 0 | { |
238 | 0 | theseTags.insert(otherTags.begin(), otherTags.end()); |
239 | 0 | it->second = tagUnion(theseTags); |
240 | 0 | ++it; |
241 | 0 | } |
242 | 0 | else |
243 | 0 | it = m_stackElements.erase(it); |
244 | 0 | } |
245 | 0 | } |
246 | 0 | else |
247 | 0 | it = m_stackElements.erase(it); |
248 | | |
249 | | // Use the smaller stack height. Essential to terminate in case of loops. |
250 | 0 | if (m_stackHeight > _other.m_stackHeight) |
251 | 0 | { |
252 | 0 | map<int, Id> shiftedStack; |
253 | 0 | for (auto const& stackElement: m_stackElements) |
254 | 0 | shiftedStack[stackElement.first - stackDiff] = stackElement.second; |
255 | 0 | m_stackElements = move(shiftedStack); |
256 | 0 | m_stackHeight = _other.m_stackHeight; |
257 | 0 | } |
258 | |
|
259 | 0 | intersect(m_storageContent, _other.m_storageContent); |
260 | 0 | intersect(m_memoryContent, _other.m_memoryContent); |
261 | 0 | if (_combineSequenceNumbers) |
262 | 0 | m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber); |
263 | 0 | } |
264 | | |
265 | | bool KnownState::operator==(KnownState const& _other) const |
266 | 0 | { |
267 | 0 | if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) |
268 | 0 | return false; |
269 | 0 | int stackDiff = m_stackHeight - _other.m_stackHeight; |
270 | 0 | auto thisIt = m_stackElements.cbegin(); |
271 | 0 | auto otherIt = _other.m_stackElements.cbegin(); |
272 | 0 | for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt) |
273 | 0 | if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second) |
274 | 0 | return false; |
275 | 0 | return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend()); |
276 | 0 | } |
277 | | |
278 | | ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) |
279 | 0 | { |
280 | 0 | if (m_stackElements.count(_stackHeight)) |
281 | 0 | return m_stackElements.at(_stackHeight); |
282 | | // Stack element not found (not assigned yet), create new unknown equivalence class. |
283 | 0 | return m_stackElements[_stackHeight] = |
284 | 0 | m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); |
285 | 0 | } |
286 | | |
287 | | KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location) |
288 | 0 | { |
289 | 0 | return stackElement(m_stackHeight + _stackOffset, _location); |
290 | 0 | } |
291 | | |
292 | | void KnownState::clearTagUnions() |
293 | 0 | { |
294 | 0 | for (auto it = m_stackElements.begin(); it != m_stackElements.end();) |
295 | 0 | if (m_tagUnions.left.count(it->second)) |
296 | 0 | it = m_stackElements.erase(it); |
297 | 0 | else |
298 | 0 | ++it; |
299 | 0 | } |
300 | | |
301 | | void KnownState::setStackElement(int _stackHeight, Id _class) |
302 | 0 | { |
303 | 0 | m_stackElements[_stackHeight] = _class; |
304 | 0 | } |
305 | | |
306 | | void KnownState::swapStackElements( |
307 | | int _stackHeightA, |
308 | | int _stackHeightB, |
309 | | SourceLocation const& _location |
310 | | ) |
311 | 0 | { |
312 | 0 | assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); |
313 | | // ensure they are created |
314 | 0 | stackElement(_stackHeightA, _location); |
315 | 0 | stackElement(_stackHeightB, _location); |
316 | |
|
317 | 0 | swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); |
318 | 0 | } |
319 | | |
320 | | KnownState::StoreOperation KnownState::storeInStorage( |
321 | | Id _slot, |
322 | | Id _value, |
323 | | SourceLocation const& _location) |
324 | 0 | { |
325 | 0 | if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) |
326 | | // do not execute the storage if we know that the value is already there |
327 | 0 | return StoreOperation(); |
328 | 0 | m_sequenceNumber++; |
329 | 0 | decltype(m_storageContent) storageContents; |
330 | | // Copy over all values (i.e. retain knowledge about them) where we know that this store |
331 | | // operation will not destroy the knowledge. Specifically, we copy storage locations we know |
332 | | // are different from _slot or locations where we know that the stored value is equal to _value. |
333 | 0 | for (auto const& storageItem: m_storageContent) |
334 | 0 | if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) |
335 | 0 | storageContents.insert(storageItem); |
336 | 0 | m_storageContent = move(storageContents); |
337 | |
|
338 | 0 | AssemblyItem item(Instruction::SSTORE, _location); |
339 | 0 | Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); |
340 | 0 | StoreOperation operation{StoreOperation::Storage, _slot, m_sequenceNumber, id}; |
341 | 0 | m_storageContent[_slot] = _value; |
342 | | // increment a second time so that we get unique sequence numbers for writes |
343 | 0 | m_sequenceNumber++; |
344 | |
|
345 | 0 | return operation; |
346 | 0 | } |
347 | | |
348 | | ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) |
349 | 0 | { |
350 | 0 | if (m_storageContent.count(_slot)) |
351 | 0 | return m_storageContent.at(_slot); |
352 | | |
353 | 0 | AssemblyItem item(Instruction::SLOAD, _location); |
354 | 0 | return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); |
355 | 0 | } |
356 | | |
357 | | KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) |
358 | 0 | { |
359 | 0 | if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) |
360 | | // do not execute the store if we know that the value is already there |
361 | 0 | return StoreOperation(); |
362 | 0 | m_sequenceNumber++; |
363 | 0 | decltype(m_memoryContent) memoryContents; |
364 | | // copy over values at points where we know that they are different from _slot by at least 32 |
365 | 0 | for (auto const& memoryItem: m_memoryContent) |
366 | 0 | if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) |
367 | 0 | memoryContents.insert(memoryItem); |
368 | 0 | m_memoryContent = move(memoryContents); |
369 | |
|
370 | 0 | AssemblyItem item(Instruction::MSTORE, _location); |
371 | 0 | Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); |
372 | 0 | StoreOperation operation{StoreOperation::Memory, _slot, m_sequenceNumber, id}; |
373 | 0 | m_memoryContent[_slot] = _value; |
374 | | // increment a second time so that we get unique sequence numbers for writes |
375 | 0 | m_sequenceNumber++; |
376 | 0 | return operation; |
377 | 0 | } |
378 | | |
379 | | ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) |
380 | 0 | { |
381 | 0 | if (m_memoryContent.count(_slot)) |
382 | 0 | return m_memoryContent.at(_slot); |
383 | | |
384 | 0 | AssemblyItem item(Instruction::MLOAD, _location); |
385 | 0 | return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); |
386 | 0 | } |
387 | | |
388 | | KnownState::Id KnownState::applyKeccak256( |
389 | | Id _start, |
390 | | Id _length, |
391 | | SourceLocation const& _location |
392 | | ) |
393 | 0 | { |
394 | 0 | AssemblyItem keccak256Item(Instruction::KECCAK256, _location); |
395 | | // Special logic if length is a short constant, otherwise we cannot tell. |
396 | 0 | u256 const* l = m_expressionClasses->knownConstant(_length); |
397 | | // unknown or too large length |
398 | 0 | if (!l || *l > 128) |
399 | 0 | return m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber); |
400 | 0 | unsigned length = unsigned(*l); |
401 | 0 | vector<Id> arguments; |
402 | 0 | for (unsigned i = 0; i < length; i += 32) |
403 | 0 | { |
404 | 0 | Id slot = m_expressionClasses->find( |
405 | 0 | AssemblyItem(Instruction::ADD, _location), |
406 | 0 | {_start, m_expressionClasses->find(u256(i))} |
407 | 0 | ); |
408 | 0 | arguments.push_back(loadFromMemory(slot, _location)); |
409 | 0 | } |
410 | 0 | if (m_knownKeccak256Hashes.count({arguments, length})) |
411 | 0 | return m_knownKeccak256Hashes.at({arguments, length}); |
412 | 0 | Id v; |
413 | | // If all arguments are known constants, compute the Keccak-256 here |
414 | 0 | if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) |
415 | 0 | { |
416 | 0 | bytes data; |
417 | 0 | for (Id a: arguments) |
418 | 0 | data += toBigEndian(*m_expressionClasses->knownConstant(a)); |
419 | 0 | data.resize(length); |
420 | 0 | v = m_expressionClasses->find(AssemblyItem(u256(util::keccak256(data)), _location)); |
421 | 0 | } |
422 | 0 | else |
423 | 0 | v = m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber); |
424 | 0 | return m_knownKeccak256Hashes[{arguments, length}] = v; |
425 | 0 | } |
426 | | |
427 | | set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId) |
428 | 0 | { |
429 | 0 | if (m_tagUnions.left.count(_expressionId)) |
430 | 0 | return m_tagUnions.left.at(_expressionId); |
431 | | // Might be a tag, then return the set of itself. |
432 | 0 | ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId); |
433 | 0 | if (expr.item && expr.item->type() == PushTag) |
434 | 0 | return set<u256>({expr.item->data()}); |
435 | 0 | else |
436 | 0 | return set<u256>(); |
437 | 0 | } |
438 | | |
439 | | KnownState::Id KnownState::tagUnion(set<u256> _tags) |
440 | 0 | { |
441 | 0 | if (m_tagUnions.right.count(_tags)) |
442 | 0 | return m_tagUnions.right.at(_tags); |
443 | 0 | else |
444 | 0 | { |
445 | 0 | Id id = m_expressionClasses->newClass(SourceLocation()); |
446 | 0 | m_tagUnions.right.insert(make_pair(_tags, id)); |
447 | 0 | return id; |
448 | 0 | } |
449 | 0 | } |