Coverage Report

Created: 2022-08-24 06:28

/src/solidity/libevmasm/KnownState.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
  This file is part of solidity.
3
4
  solidity is free software: you can redistribute it and/or modify
5
  it under the terms of the GNU General Public License as published by
6
  the Free Software Foundation, either version 3 of the License, or
7
  (at your option) any later version.
8
9
  solidity is distributed in the hope that it will be useful,
10
  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
  GNU General Public License for more details.
13
14
  You should have received a copy of the GNU General Public License
15
  along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16
*/
17
// SPDX-License-Identifier: GPL-3.0
18
/**
19
 * @file KnownState.cpp
20
 * @author Christian <c@ethdev.com>
21
 * @date 2015
22
 * Contains knowledge about the state of the virtual machine at a specific instruction.
23
 */
24
25
#include <libevmasm/KnownState.h>
26
#include <libevmasm/AssemblyItem.h>
27
#include <libsolutil/Keccak256.h>
28
29
#include <functional>
30
31
using namespace std;
32
using namespace solidity;
33
using namespace solidity::evmasm;
34
using namespace solidity::langutil;
35
36
ostream& KnownState::stream(ostream& _out) const
37
0
{
38
0
  auto streamExpressionClass = [this](ostream& _out, Id _id)
39
0
  {
40
0
    auto const& expr = m_expressionClasses->representative(_id);
41
0
    _out << "  " << dec << _id << ": ";
42
0
    if (!expr.item)
43
0
      _out << " no item";
44
0
    else if (expr.item->type() == UndefinedItem)
45
0
      _out << " unknown " << static_cast<int>(expr.item->data());
46
0
    else
47
0
      _out << *expr.item;
48
0
    if (expr.sequenceNumber)
49
0
      _out << "@" << dec << expr.sequenceNumber;
50
0
    _out << "(";
51
0
    for (Id arg: expr.arguments)
52
0
      _out << dec << arg << ",";
53
0
    _out << ")" << endl;
54
0
  };
55
56
0
  _out << "=== State ===" << endl;
57
0
  _out << "Stack height: " << dec << m_stackHeight << endl;
58
0
  _out << "Equivalence classes:" << endl;
59
0
  for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
60
0
    streamExpressionClass(_out, eqClass);
61
62
0
  _out << "Stack:" << endl;
63
0
  for (auto const& it: m_stackElements)
64
0
  {
65
0
    _out << "  " << dec << it.first << ": ";
66
0
    streamExpressionClass(_out, it.second);
67
0
  }
68
0
  _out << "Storage:" << endl;
69
0
  for (auto const& it: m_storageContent)
70
0
  {
71
0
    _out << "  ";
72
0
    streamExpressionClass(_out, it.first);
73
0
    _out << ": ";
74
0
    streamExpressionClass(_out, it.second);
75
0
  }
76
0
  _out << "Memory:" << endl;
77
0
  for (auto const& it: m_memoryContent)
78
0
  {
79
0
    _out << "  ";
80
0
    streamExpressionClass(_out, it.first);
81
0
    _out << ": ";
82
0
    streamExpressionClass(_out, it.second);
83
0
  }
84
85
0
  return _out;
86
0
}
87
88
KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
89
0
{
90
0
  StoreOperation op;
91
0
  if (_item.type() == Tag)
92
0
  {
93
    // can be ignored
94
0
  }
95
0
  else if (_item.type() == AssignImmutable)
96
0
  {
97
    // Since AssignImmutable breaks blocks, it should be fine to only consider its changes to the stack, which
98
    // is the same as two POPs.
99
    // Note that the StoreOperation for POP is generic and _copyItem is ignored.
100
0
    feedItem(AssemblyItem(Instruction::POP), _copyItem);
101
0
    return feedItem(AssemblyItem(Instruction::POP), _copyItem);
102
0
  }
103
0
  else if (_item.type() == VerbatimBytecode)
104
0
  {
105
0
    m_sequenceNumber += 2;
106
0
    resetMemory();
107
0
    resetKnownKeccak256Hashes();
108
0
    resetStorage();
109
    // Consume all arguments and place unknown return values on the stack.
110
0
    m_stackElements.erase(
111
0
      m_stackElements.upper_bound(m_stackHeight - static_cast<int>(_item.arguments())),
112
0
      m_stackElements.end()
113
0
    );
114
0
    m_stackHeight += static_cast<int>(_item.deposit());
115
0
    for (size_t i = 0; i < _item.returnValues(); ++i)
116
0
      setStackElement(
117
0
        m_stackHeight - static_cast<int>(i),
118
0
        m_expressionClasses->newClass(_item.location())
119
0
      );
120
0
  }
121
0
  else if (_item.type() != Operation)
122
0
  {
123
0
    assertThrow(_item.deposit() == 1, InvalidDeposit, "");
124
0
    if (_item.pushedValue())
125
      // only available after assembly stage, should not be used for optimisation
126
0
      setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue()));
127
0
    else
128
0
      setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
129
0
  }
130
0
  else
131
0
  {
132
0
    Instruction instruction = _item.instruction();
133
0
    InstructionInfo info = instructionInfo(instruction);
134
0
    if (SemanticInformation::isDupInstruction(_item))
135
0
      setStackElement(
136
0
        m_stackHeight + 1,
137
0
        stackElement(
138
0
          m_stackHeight - static_cast<int>(instruction) + static_cast<int>(Instruction::DUP1),
139
0
          _item.location()
140
0
        )
141
0
      );
142
0
    else if (SemanticInformation::isSwapInstruction(_item))
143
0
      swapStackElements(
144
0
        m_stackHeight,
145
0
        m_stackHeight - 1 - static_cast<int>(instruction) + static_cast<int>(Instruction::SWAP1),
146
0
        _item.location()
147
0
      );
148
0
    else if (instruction != Instruction::POP)
149
0
    {
150
0
      vector<Id> arguments(static_cast<size_t>(info.args));
151
0
      for (size_t i = 0; i < static_cast<size_t>(info.args); ++i)
152
0
        arguments[i] = stackElement(m_stackHeight - static_cast<int>(i), _item.location());
153
0
      switch (_item.instruction())
154
0
      {
155
0
      case Instruction::SSTORE:
156
0
        op = storeInStorage(arguments[0], arguments[1], _item.location());
157
0
        break;
158
0
      case Instruction::SLOAD:
159
0
        setStackElement(
160
0
          m_stackHeight + static_cast<int>(_item.deposit()),
161
0
          loadFromStorage(arguments[0], _item.location())
162
0
        );
163
0
        break;
164
0
      case Instruction::MSTORE:
165
0
        op = storeInMemory(arguments[0], arguments[1], _item.location());
166
0
        break;
167
0
      case Instruction::MLOAD:
168
0
        setStackElement(
169
0
          m_stackHeight + static_cast<int>(_item.deposit()),
170
0
          loadFromMemory(arguments[0], _item.location())
171
0
        );
172
0
        break;
173
0
      case Instruction::KECCAK256:
174
0
        setStackElement(
175
0
          m_stackHeight + static_cast<int>(_item.deposit()),
176
0
          applyKeccak256(arguments.at(0), arguments.at(1), _item.location())
177
0
        );
178
0
        break;
179
0
      default:
180
0
        bool invMem =
181
0
          SemanticInformation::memory(_item.instruction()) == SemanticInformation::Write;
182
0
        bool invStor =
183
0
          SemanticInformation::storage(_item.instruction()) == SemanticInformation::Write;
184
        // We could be a bit more fine-grained here (CALL only invalidates part of
185
        // memory, etc), but we do not for now.
186
0
        if (invMem)
187
0
        {
188
0
          resetMemory();
189
0
          resetKnownKeccak256Hashes();
190
0
        }
191
0
        if (invStor)
192
0
          resetStorage();
193
0
        if (invMem || invStor)
194
0
          m_sequenceNumber += 2; // Increment by two because it can read and write
195
0
        assertThrow(info.ret <= 1, InvalidDeposit, "");
196
0
        if (info.ret == 1)
197
0
          setStackElement(
198
0
            m_stackHeight + static_cast<int>(_item.deposit()),
199
0
            m_expressionClasses->find(_item, arguments, _copyItem)
200
0
          );
201
0
      }
202
0
    }
203
0
    m_stackElements.erase(
204
0
      m_stackElements.upper_bound(m_stackHeight + static_cast<int>(_item.deposit())),
205
0
      m_stackElements.end()
206
0
    );
207
0
    m_stackHeight += static_cast<int>(_item.deposit());
208
0
  }
209
0
  return op;
210
0
}
211
212
/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from
213
/// _this which is not in or not equal to the value in _other.
214
template <class Mapping> void intersect(Mapping& _this, Mapping const& _other)
215
0
{
216
0
  for (auto it = _this.begin(); it != _this.end();)
217
0
    if (_other.count(it->first) && _other.at(it->first) == it->second)
218
0
      ++it;
219
0
    else
220
0
      it = _this.erase(it);
221
0
}
222
223
void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers)
224
0
{
225
0
  int stackDiff = m_stackHeight - _other.m_stackHeight;
226
0
  for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
227
0
    if (_other.m_stackElements.count(it->first - stackDiff))
228
0
    {
229
0
      Id other = _other.m_stackElements.at(it->first - stackDiff);
230
0
      if (it->second == other)
231
0
        ++it;
232
0
      else
233
0
      {
234
0
        set<u256> theseTags = tagsInExpression(it->second);
235
0
        set<u256> otherTags = tagsInExpression(other);
236
0
        if (!theseTags.empty() && !otherTags.empty())
237
0
        {
238
0
          theseTags.insert(otherTags.begin(), otherTags.end());
239
0
          it->second = tagUnion(theseTags);
240
0
          ++it;
241
0
        }
242
0
        else
243
0
          it = m_stackElements.erase(it);
244
0
      }
245
0
    }
246
0
    else
247
0
      it = m_stackElements.erase(it);
248
249
  // Use the smaller stack height. Essential to terminate in case of loops.
250
0
  if (m_stackHeight > _other.m_stackHeight)
251
0
  {
252
0
    map<int, Id> shiftedStack;
253
0
    for (auto const& stackElement: m_stackElements)
254
0
      shiftedStack[stackElement.first - stackDiff] = stackElement.second;
255
0
    m_stackElements = move(shiftedStack);
256
0
    m_stackHeight = _other.m_stackHeight;
257
0
  }
258
259
0
  intersect(m_storageContent, _other.m_storageContent);
260
0
  intersect(m_memoryContent, _other.m_memoryContent);
261
0
  if (_combineSequenceNumbers)
262
0
    m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber);
263
0
}
264
265
bool KnownState::operator==(KnownState const& _other) const
266
0
{
267
0
  if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent)
268
0
    return false;
269
0
  int stackDiff = m_stackHeight - _other.m_stackHeight;
270
0
  auto thisIt = m_stackElements.cbegin();
271
0
  auto otherIt = _other.m_stackElements.cbegin();
272
0
  for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt)
273
0
    if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second)
274
0
      return false;
275
0
  return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend());
276
0
}
277
278
ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
279
0
{
280
0
  if (m_stackElements.count(_stackHeight))
281
0
    return m_stackElements.at(_stackHeight);
282
  // Stack element not found (not assigned yet), create new unknown equivalence class.
283
0
  return m_stackElements[_stackHeight] =
284
0
      m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location));
285
0
}
286
287
KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location)
288
0
{
289
0
  return stackElement(m_stackHeight + _stackOffset, _location);
290
0
}
291
292
void KnownState::clearTagUnions()
293
0
{
294
0
  for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
295
0
    if (m_tagUnions.left.count(it->second))
296
0
      it = m_stackElements.erase(it);
297
0
    else
298
0
      ++it;
299
0
}
300
301
void KnownState::setStackElement(int _stackHeight, Id _class)
302
0
{
303
0
  m_stackElements[_stackHeight] = _class;
304
0
}
305
306
void KnownState::swapStackElements(
307
  int _stackHeightA,
308
  int _stackHeightB,
309
  SourceLocation const& _location
310
)
311
0
{
312
0
  assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
313
  // ensure they are created
314
0
  stackElement(_stackHeightA, _location);
315
0
  stackElement(_stackHeightB, _location);
316
317
0
  swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
318
0
}
319
320
KnownState::StoreOperation KnownState::storeInStorage(
321
  Id _slot,
322
  Id _value,
323
  SourceLocation const& _location)
324
0
{
325
0
  if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
326
    // do not execute the storage if we know that the value is already there
327
0
    return StoreOperation();
328
0
  m_sequenceNumber++;
329
0
  decltype(m_storageContent) storageContents;
330
  // Copy over all values (i.e. retain knowledge about them) where we know that this store
331
  // operation will not destroy the knowledge. Specifically, we copy storage locations we know
332
  // are different from _slot or locations where we know that the stored value is equal to _value.
333
0
  for (auto const& storageItem: m_storageContent)
334
0
    if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
335
0
      storageContents.insert(storageItem);
336
0
  m_storageContent = move(storageContents);
337
338
0
  AssemblyItem item(Instruction::SSTORE, _location);
339
0
  Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
340
0
  StoreOperation operation{StoreOperation::Storage, _slot, m_sequenceNumber, id};
341
0
  m_storageContent[_slot] = _value;
342
  // increment a second time so that we get unique sequence numbers for writes
343
0
  m_sequenceNumber++;
344
345
0
  return operation;
346
0
}
347
348
ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
349
0
{
350
0
  if (m_storageContent.count(_slot))
351
0
    return m_storageContent.at(_slot);
352
353
0
  AssemblyItem item(Instruction::SLOAD, _location);
354
0
  return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
355
0
}
356
357
KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
358
0
{
359
0
  if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
360
    // do not execute the store if we know that the value is already there
361
0
    return StoreOperation();
362
0
  m_sequenceNumber++;
363
0
  decltype(m_memoryContent) memoryContents;
364
  // copy over values at points where we know that they are different from _slot by at least 32
365
0
  for (auto const& memoryItem: m_memoryContent)
366
0
    if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
367
0
      memoryContents.insert(memoryItem);
368
0
  m_memoryContent = move(memoryContents);
369
370
0
  AssemblyItem item(Instruction::MSTORE, _location);
371
0
  Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
372
0
  StoreOperation operation{StoreOperation::Memory, _slot, m_sequenceNumber, id};
373
0
  m_memoryContent[_slot] = _value;
374
  // increment a second time so that we get unique sequence numbers for writes
375
0
  m_sequenceNumber++;
376
0
  return operation;
377
0
}
378
379
ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
380
0
{
381
0
  if (m_memoryContent.count(_slot))
382
0
    return m_memoryContent.at(_slot);
383
384
0
  AssemblyItem item(Instruction::MLOAD, _location);
385
0
  return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
386
0
}
387
388
KnownState::Id KnownState::applyKeccak256(
389
  Id _start,
390
  Id _length,
391
  SourceLocation const& _location
392
)
393
0
{
394
0
  AssemblyItem keccak256Item(Instruction::KECCAK256, _location);
395
  // Special logic if length is a short constant, otherwise we cannot tell.
396
0
  u256 const* l = m_expressionClasses->knownConstant(_length);
397
  // unknown or too large length
398
0
  if (!l || *l > 128)
399
0
    return m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
400
0
  unsigned length = unsigned(*l);
401
0
  vector<Id> arguments;
402
0
  for (unsigned i = 0; i < length; i += 32)
403
0
  {
404
0
    Id slot = m_expressionClasses->find(
405
0
      AssemblyItem(Instruction::ADD, _location),
406
0
      {_start, m_expressionClasses->find(u256(i))}
407
0
    );
408
0
    arguments.push_back(loadFromMemory(slot, _location));
409
0
  }
410
0
  if (m_knownKeccak256Hashes.count({arguments, length}))
411
0
    return m_knownKeccak256Hashes.at({arguments, length});
412
0
  Id v;
413
  // If all arguments are known constants, compute the Keccak-256 here
414
0
  if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
415
0
  {
416
0
    bytes data;
417
0
    for (Id a: arguments)
418
0
      data += toBigEndian(*m_expressionClasses->knownConstant(a));
419
0
    data.resize(length);
420
0
    v = m_expressionClasses->find(AssemblyItem(u256(util::keccak256(data)), _location));
421
0
  }
422
0
  else
423
0
    v = m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
424
0
  return m_knownKeccak256Hashes[{arguments, length}] = v;
425
0
}
426
427
set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId)
428
0
{
429
0
  if (m_tagUnions.left.count(_expressionId))
430
0
    return m_tagUnions.left.at(_expressionId);
431
  // Might be a tag, then return the set of itself.
432
0
  ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId);
433
0
  if (expr.item && expr.item->type() == PushTag)
434
0
    return set<u256>({expr.item->data()});
435
0
  else
436
0
    return set<u256>();
437
0
}
438
439
KnownState::Id KnownState::tagUnion(set<u256> _tags)
440
0
{
441
0
  if (m_tagUnions.right.count(_tags))
442
0
    return m_tagUnions.right.at(_tags);
443
0
  else
444
0
  {
445
0
    Id id = m_expressionClasses->newClass(SourceLocation());
446
0
    m_tagUnions.right.insert(make_pair(_tags, id));
447
0
    return id;
448
0
  }
449
0
}