Coverage Report

Created: 2025-09-04 07:34

/src/solidity/test/tools/ossfuzz/protoToYul.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
  This file is part of solidity.
3
4
  solidity is free software: you can redistribute it and/or modify
5
  it under the terms of the GNU General Public License as published by
6
  the Free Software Foundation, either version 3 of the License, or
7
  (at your option) any later version.
8
9
  solidity is distributed in the hope that it will be useful,
10
  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
  GNU General Public License for more details.
13
14
  You should have received a copy of the GNU General Public License
15
  along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16
*/
17
// SPDX-License-Identifier: GPL-3.0
18
19
#include <test/tools/ossfuzz/protoToYul.h>
20
#include <test/tools/ossfuzz/yulOptimizerFuzzDictionary.h>
21
22
#include <libyul/Exceptions.h>
23
24
#include <libsolutil/StringUtils.h>
25
26
#include <range/v3/algorithm/all_of.hpp>
27
28
#include <boost/algorithm/string.hpp>
29
#include <boost/algorithm/string/split.hpp>
30
31
#include <range/v3/action/remove_if.hpp>
32
33
#include <algorithm>
34
35
using namespace solidity::yul::test::yul_fuzzer;
36
using namespace solidity::yul::test;
37
using namespace solidity::langutil;
38
using namespace solidity::util;
39
using namespace solidity;
40
41
std::string ProtoConverter::dictionaryToken(HexPrefix _p)
42
844k
{
43
844k
  std::string token;
44
  // If dictionary constant is requested while converting
45
  // for loop condition, then return zero so that we don't
46
  // generate infinite for loops.
47
844k
  if (m_inForCond)
48
9.80k
    token = "0";
49
834k
  else
50
834k
  {
51
834k
    unsigned indexVar = m_inputSize * m_inputSize + counter();
52
834k
    token = hexDictionary[indexVar % hexDictionary.size()];
53
834k
    yulAssert(token.size() <= 64, "Proto Fuzzer: Dictionary token too large");
54
834k
  }
55
56
844k
  return _p == HexPrefix::Add ? "0x" + token : token;
57
844k
}
58
59
std::string ProtoConverter::createHex(std::string const& _hexBytes)
60
26.6k
{
61
26.6k
  std::string tmp{_hexBytes};
62
26.6k
  if (!tmp.empty())
63
25.2k
  {
64
8.08M
    ranges::actions::remove_if(tmp, [=](char c) -> bool {
65
8.08M
      return !std::isxdigit(c);
66
8.08M
    });
67
25.2k
    tmp = tmp.substr(0, 64);
68
25.2k
  }
69
  // We need this awkward if case because hex literals cannot be empty.
70
  // Use a dictionary token.
71
26.6k
  if (tmp.empty())
72
16.5k
    tmp = dictionaryToken(HexPrefix::DontAdd);
73
  // Hex literals must have even number of digits
74
26.6k
  if (tmp.size() % 2)
75
13.1k
    tmp.insert(0, "0");
76
77
26.6k
  yulAssert(tmp.size() <= 64, "Proto Fuzzer: Dictionary token too large");
78
26.6k
  return tmp;
79
26.6k
}
80
81
std::string ProtoConverter::createAlphaNum(std::string const& _strBytes)
82
10.4k
{
83
10.4k
  std::string tmp{_strBytes};
84
10.4k
  if (!tmp.empty())
85
9.02k
  {
86
298k
    ranges::actions::remove_if(tmp, [=](char c) -> bool {
87
298k
      return !(std::isalpha(c) || std::isdigit(c));
88
298k
    });
89
9.02k
    tmp = tmp.substr(0, 32);
90
9.02k
  }
91
10.4k
  return tmp;
92
10.4k
}
93
94
EVMVersion ProtoConverter::evmVersionMapping(Program_Version const& _ver)
95
38.6k
{
96
38.6k
  switch (_ver)
97
38.6k
  {
98
9.02k
  case Program::HOMESTEAD:
99
9.02k
    return EVMVersion::homestead();
100
1.94k
  case Program::TANGERINE:
101
1.94k
    return EVMVersion::tangerineWhistle();
102
1.79k
  case Program::SPURIOUS:
103
1.79k
    return EVMVersion::spuriousDragon();
104
959
  case Program::BYZANTIUM:
105
959
    return EVMVersion::byzantium();
106
1.44k
  case Program::CONSTANTINOPLE:
107
1.44k
    return EVMVersion::constantinople();
108
1.42k
  case Program::PETERSBURG:
109
1.42k
    return EVMVersion::petersburg();
110
4.61k
  case Program::ISTANBUL:
111
4.61k
    return EVMVersion::istanbul();
112
11.2k
  case Program::BERLIN:
113
11.2k
    return EVMVersion::berlin();
114
917
  case Program::LONDON:
115
917
    return EVMVersion::london();
116
3.00k
  case Program::PARIS:
117
3.00k
    return EVMVersion::paris();
118
182
  case Program::SHANGHAI:
119
182
    return EVMVersion::shanghai();
120
1.11k
  case Program::CANCUN:
121
1.11k
    return EVMVersion::cancun();
122
699
  case Program::PRAGUE:
123
699
    return EVMVersion::prague();
124
253
  case Program::OSAKA:
125
253
    return EVMVersion::osaka();
126
38.6k
  }
127
38.6k
}
128
129
std::string ProtoConverter::visit(Literal const& _x)
130
137k
{
131
137k
  switch (_x.literal_oneof_case())
132
137k
  {
133
58.5k
  case Literal::kIntval:
134
58.5k
    return std::to_string(_x.intval());
135
25.5k
  case Literal::kHexval:
136
25.5k
    return "0x" + createHex(_x.hexval());
137
10.4k
  case Literal::kStrval:
138
10.4k
    return "\"" + createAlphaNum(_x.strval()) + "\"";
139
7.03k
  case Literal::kBoolval:
140
7.03k
    return _x.boolval() ? "true" : "false";
141
36.2k
  case Literal::LITERAL_ONEOF_NOT_SET:
142
36.2k
    return dictionaryToken();
143
137k
  }
144
137k
}
145
146
void ProtoConverter::consolidateVarDeclsInFunctionDef()
147
69.3k
{
148
69.3k
  m_currentFuncVars.clear();
149
69.3k
  yulAssert(!m_funcVars.empty(), "Proto fuzzer: Invalid operation");
150
151
69.3k
  auto const& scopes = m_funcVars.back();
152
69.3k
  for (auto const& s: scopes)
153
270k
    for (auto const& var: s)
154
319k
      m_currentFuncVars.push_back(&var);
155
69.3k
  yulAssert(!m_funcForLoopInitVars.empty(), "Proto fuzzer: Invalid operation");
156
69.3k
  auto const& forinitscopes = m_funcForLoopInitVars.back();
157
69.3k
  for (auto const& s: forinitscopes)
158
4.90k
    for (auto const& var: s)
159
3.75k
      m_currentFuncVars.push_back(&var);
160
69.3k
}
161
162
void ProtoConverter::consolidateGlobalVarDecls()
163
46.4k
{
164
46.4k
  m_currentGlobalVars.clear();
165
  // Place pointers to all global variables that are in scope
166
  // into a single vector
167
46.4k
  for (auto const& scope: m_globalVars)
168
88.5k
    for (auto const& var: scope)
169
226k
      m_currentGlobalVars.push_back(&var);
170
  // Place pointers to all variables declared in for-init blocks
171
  // that are still live into the same vector
172
46.4k
  for (auto const& init: m_globalForLoopInitVars)
173
4.84k
    for (auto const& var: init)
174
2.90k
      m_currentGlobalVars.push_back(&var);
175
46.4k
}
176
177
bool ProtoConverter::varDeclAvailable()
178
115k
{
179
115k
  if (m_inFunctionDef)
180
69.3k
  {
181
69.3k
    consolidateVarDeclsInFunctionDef();
182
69.3k
    return !m_currentFuncVars.empty();
183
69.3k
  }
184
46.4k
  else
185
46.4k
  {
186
46.4k
    consolidateGlobalVarDecls();
187
46.4k
    return !m_currentGlobalVars.empty();
188
46.4k
  }
189
115k
}
190
191
void ProtoConverter::visit(VarRef const& _x)
192
91.4k
{
193
91.4k
  if (m_inFunctionDef)
194
68.5k
  {
195
    // Ensure that there is at least one variable declaration to reference in function scope.
196
68.5k
    yulAssert(!m_currentFuncVars.empty(), "Proto fuzzer: No variables to reference.");
197
68.5k
    m_output << *m_currentFuncVars[static_cast<size_t>(_x.varnum()) % m_currentFuncVars.size()];
198
68.5k
  }
199
22.8k
  else
200
22.8k
  {
201
    // Ensure that there is at least one variable declaration to reference in nested scopes.
202
22.8k
    yulAssert(!m_currentGlobalVars.empty(), "Proto fuzzer: No global variables to reference.");
203
22.8k
    m_output << *m_currentGlobalVars[static_cast<size_t>(_x.varnum()) % m_currentGlobalVars.size()];
204
22.8k
  }
205
91.4k
}
206
207
void ProtoConverter::visit(Expression const& _x)
208
1.16M
{
209
1.16M
  switch (_x.expr_oneof_case())
210
1.16M
  {
211
90.8k
  case Expression::kVarref:
212
    // If the expression requires a variable reference that we cannot provide
213
    // (because there are no variables in scope), we silently output a literal
214
    // expression from the optimizer dictionary.
215
90.8k
    if (!varDeclAvailable())
216
19.3k
      m_output << dictionaryToken();
217
71.5k
    else
218
71.5k
      visit(_x.varref());
219
90.8k
    break;
220
76.8k
  case Expression::kCons:
221
    // If literal expression describes for-loop condition
222
    // then force it to zero, so we don't generate infinite
223
    // for loops
224
76.8k
    if (m_inForCond)
225
202
      m_output << "0";
226
76.6k
    else
227
76.6k
      m_output << visit(_x.cons());
228
76.8k
    break;
229
108k
  case Expression::kBinop:
230
108k
    visit(_x.binop());
231
108k
    break;
232
52.2k
  case Expression::kUnop:
233
52.2k
    visit(_x.unop());
234
52.2k
    break;
235
22.9k
  case Expression::kTop:
236
22.9k
    visit(_x.top());
237
22.9k
    break;
238
40.6k
  case Expression::kNop:
239
40.6k
    visit(_x.nop());
240
40.6k
    break;
241
28.8k
  case Expression::kFuncExpr:
242
28.8k
    if (auto v = functionExists(NumFunctionReturns::Single); v.has_value())
243
23.9k
    {
244
23.9k
      std::string functionName = v.value();
245
23.9k
      visit(_x.func_expr(), functionName, true);
246
23.9k
    }
247
4.81k
    else
248
4.81k
      m_output << dictionaryToken();
249
28.8k
    break;
250
18.6k
  case Expression::kLowcall:
251
18.6k
    visit(_x.lowcall());
252
18.6k
    break;
253
7.98k
  case Expression::kCreate:
254
    // Create and create2 return address of created contract which
255
    // may lead to state change via sstore of the returned address.
256
7.98k
    if (!m_filterStatefulInstructions)
257
5.77k
      visit(_x.create());
258
2.21k
    else
259
2.21k
      m_output << dictionaryToken();
260
7.98k
    break;
261
20.1k
  case Expression::kUnopdata:
262
    // Filter datasize and dataoffset because these instructions may return
263
    // a value that is a function of optimisation. Therefore, when run on
264
    // an EVM client, the execution traces for unoptimised vs optimised
265
    // programs may differ. This ends up as a false-positive bug report.
266
20.1k
    if (m_isObject && !m_filterStatefulInstructions)
267
1.51k
      visit(_x.unopdata());
268
18.6k
    else
269
18.6k
      m_output << dictionaryToken();
270
20.1k
    break;
271
701k
  case Expression::EXPR_ONEOF_NOT_SET:
272
701k
    m_output << dictionaryToken();
273
701k
    break;
274
1.16M
  }
275
1.16M
}
276
277
void ProtoConverter::visit(BinaryOp const& _x)
278
108k
{
279
108k
  BinaryOp_BOp op = _x.op();
280
281
108k
  if ((op == BinaryOp::SHL || op == BinaryOp::SHR || op == BinaryOp::SAR) &&
282
108k
    !m_evmVersion.hasBitwiseShifting())
283
470
  {
284
470
    m_output << dictionaryToken();
285
470
    return;
286
470
  }
287
288
107k
  switch (op)
289
107k
  {
290
7.57k
  case BinaryOp::ADD:
291
7.57k
    m_output << "add";
292
7.57k
    break;
293
3.44k
  case BinaryOp::SUB:
294
3.44k
    m_output << "sub";
295
3.44k
    break;
296
8.28k
  case BinaryOp::MUL:
297
8.28k
    m_output << "mul";
298
8.28k
    break;
299
15.3k
  case BinaryOp::DIV:
300
15.3k
    m_output << "div";
301
15.3k
    break;
302
5.08k
  case BinaryOp::MOD:
303
5.08k
    m_output << "mod";
304
5.08k
    break;
305
2.88k
  case BinaryOp::XOR:
306
2.88k
    m_output << "xor";
307
2.88k
    break;
308
3.16k
  case BinaryOp::AND:
309
3.16k
    m_output << "and";
310
3.16k
    break;
311
9.53k
  case BinaryOp::OR:
312
9.53k
    m_output << "or";
313
9.53k
    break;
314
2.25k
  case BinaryOp::EQ:
315
2.25k
    m_output << "eq";
316
2.25k
    break;
317
1.97k
  case BinaryOp::LT:
318
1.97k
    m_output << "lt";
319
1.97k
    break;
320
4.78k
  case BinaryOp::GT:
321
4.78k
    m_output << "gt";
322
4.78k
    break;
323
1.87k
  case BinaryOp::SHR:
324
1.87k
    yulAssert(m_evmVersion.hasBitwiseShifting(), "Proto fuzzer: Invalid evm version");
325
1.87k
    m_output << "shr";
326
1.87k
    break;
327
4.89k
  case BinaryOp::SHL:
328
4.89k
    yulAssert(m_evmVersion.hasBitwiseShifting(), "Proto fuzzer: Invalid evm version");
329
4.89k
    m_output << "shl";
330
4.89k
    break;
331
788
  case BinaryOp::SAR:
332
788
    yulAssert(m_evmVersion.hasBitwiseShifting(), "Proto fuzzer: Invalid evm version");
333
788
    m_output << "sar";
334
788
    break;
335
9.99k
  case BinaryOp::SDIV:
336
9.99k
    m_output << "sdiv";
337
9.99k
    break;
338
8.16k
  case BinaryOp::SMOD:
339
8.16k
    m_output << "smod";
340
8.16k
    break;
341
3.71k
  case BinaryOp::EXP:
342
3.71k
    m_output << "exp";
343
3.71k
    break;
344
1.29k
  case BinaryOp::SLT:
345
1.29k
    m_output << "slt";
346
1.29k
    break;
347
1.72k
  case BinaryOp::SGT:
348
1.72k
    m_output << "sgt";
349
1.72k
    break;
350
1.16k
  case BinaryOp::BYTE:
351
1.16k
    m_output << "byte";
352
1.16k
    break;
353
4.75k
  case BinaryOp::SI:
354
4.75k
    m_output << "signextend";
355
4.75k
    break;
356
4.90k
  case BinaryOp::KECCAK:
357
4.90k
    m_output << "keccak256";
358
4.90k
    break;
359
107k
  }
360
107k
  m_output << "(";
361
107k
  if (op == BinaryOp::KECCAK)
362
4.90k
  {
363
4.90k
    m_output << "mod(";
364
4.90k
    visit(_x.left());
365
4.90k
    m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
366
4.90k
    m_output << ",";
367
4.90k
    m_output << "mod(";
368
4.90k
    visit(_x.right());
369
4.90k
    m_output << ", " << std::to_string(s_maxSize) << ")";
370
4.90k
  }
371
102k
  else
372
102k
  {
373
102k
    visit(_x.left());
374
102k
    m_output << ",";
375
102k
    visit(_x.right());
376
102k
  }
377
107k
  m_output << ")";
378
107k
}
379
380
void ProtoConverter::scopeVariables(std::vector<std::string> const& _varNames)
381
25.7k
{
382
  // If we are inside a for-init block, there are two places
383
  // where the visited vardecl may have been defined:
384
  // - directly inside the for-init block
385
  // - inside a block within the for-init block
386
  // In the latter case, we don't scope extend. The flag
387
  // m_forInitScopeExtEnabled (= true) indicates whether we are directly
388
  // inside a for-init block e.g., for { let x } or (= false) inside a
389
  // nested for-init block e.g., for { { let x } }
390
25.7k
  bool forInitScopeExtendVariable = m_inForInitScope && m_forInitScopeExtEnabled;
391
392
  // There are four cases that are tackled here
393
  // Case 1. We are inside a function definition and the variable declaration's
394
  // scope needs to be extended.
395
  // Case 2. We are inside a function definition but scope extension is disabled
396
  // Case 3. We are inside global scope and scope extension is required
397
  // Case 4. We are inside global scope but scope extension is disabled
398
25.7k
  if (m_inFunctionDef)
399
9.11k
  {
400
    // Variables declared directly in for-init block
401
    // are tracked separately because their scope
402
    // extends beyond the block they are defined in
403
    // to the rest of the for-loop statement.
404
    // Case 1
405
9.11k
    if (forInitScopeExtendVariable)
406
377
    {
407
377
      yulAssert(
408
377
        !m_funcForLoopInitVars.empty() && !m_funcForLoopInitVars.back().empty(),
409
377
        "Proto fuzzer: Invalid operation"
410
377
      );
411
377
      for (auto const& varName: _varNames)
412
624
        m_funcForLoopInitVars.back().back().push_back(varName);
413
377
    }
414
    // Case 2
415
8.73k
    else
416
8.73k
    {
417
8.73k
      yulAssert(
418
8.73k
        !m_funcVars.empty() && !m_funcVars.back().empty(),
419
8.73k
        "Proto fuzzer: Invalid operation"
420
8.73k
      );
421
8.73k
      for (auto const& varName: _varNames)
422
17.9k
        m_funcVars.back().back().push_back(varName);
423
8.73k
    }
424
9.11k
  }
425
  // If m_inFunctionDef is false, we are in global scope
426
16.6k
  else
427
16.6k
  {
428
    // Case 3
429
16.6k
    if (forInitScopeExtendVariable)
430
737
    {
431
737
      yulAssert(!m_globalForLoopInitVars.empty(), "Proto fuzzer: Invalid operation");
432
433
737
      for (auto const& varName: _varNames)
434
1.65k
        m_globalForLoopInitVars.back().push_back(varName);
435
737
    }
436
    // Case 4
437
15.9k
    else
438
15.9k
    {
439
15.9k
      yulAssert(!m_globalVars.empty(), "Proto fuzzer: Invalid operation");
440
441
15.9k
      for (auto const& varName: _varNames)
442
34.8k
        m_globalVars.back().push_back(varName);
443
15.9k
    }
444
16.6k
  }
445
25.7k
}
446
447
void ProtoConverter::visit(VarDecl const& _x)
448
11.2k
{
449
11.2k
  std::string varName = newVarName();
450
11.2k
  m_output << "let " << varName << " := ";
451
11.2k
  visit(_x.expr());
452
11.2k
  m_output << "\n";
453
11.2k
  scopeVariables({varName});
454
11.2k
}
455
456
void ProtoConverter::visit(MultiVarDecl const& _x)
457
14.5k
{
458
14.5k
  m_output << "let ";
459
14.5k
  std::vector<std::string> varNames;
460
  // We support up to 4 variables in a single
461
  // declaration statement.
462
14.5k
  unsigned numVars = _x.num_vars() % 3 + 2;
463
14.5k
  std::string delimiter;
464
58.3k
  for (unsigned i = 0; i < numVars; i++)
465
43.8k
  {
466
43.8k
    std::string varName = newVarName();
467
43.8k
    varNames.push_back(varName);
468
43.8k
    m_output << delimiter << varName;
469
43.8k
    if (i == 0)
470
14.5k
      delimiter = ", ";
471
43.8k
  }
472
14.5k
  m_output << "\n";
473
14.5k
  scopeVariables(varNames);
474
14.5k
}
475
476
void ProtoConverter::visit(TypedVarDecl const& _x)
477
0
{
478
0
  std::string varName = newVarName();
479
0
  m_output << "let " << varName;
480
0
  switch (_x.type())
481
0
  {
482
0
  case TypedVarDecl::BOOL:
483
0
    m_output << ": bool := ";
484
0
    visit(_x.expr());
485
0
    m_output << " : bool\n";
486
0
    break;
487
0
  case TypedVarDecl::S8:
488
0
    m_output << ": s8 := ";
489
0
    visit(_x.expr());
490
0
    m_output << " : s8\n";
491
0
    break;
492
0
  case TypedVarDecl::S32:
493
0
    m_output << ": s32 := ";
494
0
    visit(_x.expr());
495
0
    m_output << " : s32\n";
496
0
    break;
497
0
  case TypedVarDecl::S64:
498
0
    m_output << ": s64 := ";
499
0
    visit(_x.expr());
500
0
    m_output << " : s64\n";
501
0
    break;
502
0
  case TypedVarDecl::S128:
503
0
    m_output << ": s128 := ";
504
0
    visit(_x.expr());
505
0
    m_output << " : s128\n";
506
0
    break;
507
0
  case TypedVarDecl::S256:
508
0
    m_output << ": s256 := ";
509
0
    visit(_x.expr());
510
0
    m_output << " : s256\n";
511
0
    break;
512
0
  case TypedVarDecl::U8:
513
0
    m_output << ": u8 := ";
514
0
    visit(_x.expr());
515
0
    m_output << " : u8\n";
516
0
    break;
517
0
  case TypedVarDecl::U32:
518
0
    m_output << ": u32 := ";
519
0
    visit(_x.expr());
520
0
    m_output << " : u32\n";
521
0
    break;
522
0
  case TypedVarDecl::U64:
523
0
    m_output << ": u64 := ";
524
0
    visit(_x.expr());
525
0
    m_output << " : u64\n";
526
0
    break;
527
0
  case TypedVarDecl::U128:
528
0
    m_output << ": u128 := ";
529
0
    visit(_x.expr());
530
0
    m_output << " : u128\n";
531
0
    break;
532
0
  case TypedVarDecl::U256:
533
0
    m_output << ": u256 := ";
534
0
    visit(_x.expr());
535
0
    m_output << " : u256\n";
536
0
    break;
537
0
  }
538
  // If we are inside a for-init block, there are two places
539
  // where the visited vardecl may have been defined:
540
  // - directly inside the for-init block
541
  // - inside a block within the for-init block
542
  // In the latter case, we don't scope extend.
543
0
  if (m_inFunctionDef)
544
0
  {
545
    // Variables declared directly in for-init block
546
    // are tracked separately because their scope
547
    // extends beyond the block they are defined in
548
    // to the rest of the for-loop statement.
549
0
    if (m_inForInitScope && m_forInitScopeExtEnabled)
550
0
    {
551
0
      yulAssert(
552
0
        !m_funcForLoopInitVars.empty() && !m_funcForLoopInitVars.back().empty(),
553
0
        "Proto fuzzer: Invalid operation"
554
0
      );
555
0
      m_funcForLoopInitVars.back().back().push_back(varName);
556
0
    }
557
0
    else
558
0
    {
559
0
      yulAssert(
560
0
        !m_funcVars.empty() && !m_funcVars.back().empty(),
561
0
        "Proto fuzzer: Invalid operation"
562
0
      );
563
0
      m_funcVars.back().back().push_back(varName);
564
0
    }
565
0
  }
566
0
  else
567
0
  {
568
0
    if (m_inForInitScope && m_forInitScopeExtEnabled)
569
0
    {
570
0
      yulAssert(
571
0
        !m_globalForLoopInitVars.empty(),
572
0
        "Proto fuzzer: Invalid operation"
573
0
      );
574
0
      m_globalForLoopInitVars.back().push_back(varName);
575
0
    }
576
0
    else
577
0
    {
578
0
      yulAssert(
579
0
        !m_globalVars.empty(),
580
0
        "Proto fuzzer: Invalid operation"
581
0
      );
582
0
      m_globalVars.back().push_back(varName);
583
0
    }
584
0
  }
585
0
}
586
587
void ProtoConverter::visit(UnaryOp const& _x)
588
52.2k
{
589
52.2k
  UnaryOp_UOp op = _x.op();
590
591
  // Replace calls to extcodehash on unsupported EVMs with a dictionary
592
  // token.
593
52.2k
  if (op == UnaryOp::EXTCODEHASH && !m_evmVersion.hasExtCodeHash())
594
249
  {
595
249
    m_output << dictionaryToken();
596
249
    return;
597
249
  }
598
599
51.9k
  if (op == UnaryOp::TLOAD && !m_evmVersion.supportsTransientStorage())
600
224
  {
601
224
    m_output << dictionaryToken();
602
224
    return;
603
224
  }
604
605
51.7k
  if (op == UnaryOp::BLOBHASH && !m_evmVersion.hasBlobHash())
606
273
  {
607
273
    m_output << dictionaryToken();
608
273
    return;
609
273
  }
610
611
  // The following instructions may lead to change of EVM state and are hence
612
  // excluded to avoid false positives.
613
51.4k
  if (
614
51.4k
    m_filterStatefulInstructions &&
615
51.4k
    (
616
12.6k
      op == UnaryOp::EXTCODEHASH ||
617
12.6k
      op == UnaryOp::EXTCODESIZE ||
618
12.6k
      op == UnaryOp::BALANCE ||
619
12.6k
      op == UnaryOp::BLOCKHASH
620
12.6k
    )
621
51.4k
  )
622
728
  {
623
728
    m_output << dictionaryToken();
624
728
    return;
625
728
  }
626
627
50.7k
  switch (op)
628
50.7k
  {
629
33.4k
  case UnaryOp::NOT:
630
33.4k
    m_output << "not";
631
33.4k
    break;
632
2.59k
  case UnaryOp::MLOAD:
633
2.59k
    m_output << "mload";
634
2.59k
    break;
635
4.93k
  case UnaryOp::SLOAD:
636
4.93k
    m_output << "sload";
637
4.93k
    break;
638
225
  case UnaryOp::TLOAD:
639
225
    m_output << "tload";
640
225
    break;
641
2.59k
  case UnaryOp::ISZERO:
642
2.59k
    m_output << "iszero";
643
2.59k
    break;
644
1.83k
  case UnaryOp::CALLDATALOAD:
645
1.83k
    m_output << "calldataload";
646
1.83k
    break;
647
483
  case UnaryOp::EXTCODESIZE:
648
483
    m_output << "extcodesize";
649
483
    break;
650
920
  case UnaryOp::EXTCODEHASH:
651
920
    m_output << "extcodehash";
652
920
    break;
653
914
  case UnaryOp::BALANCE:
654
914
    m_output << "balance";
655
914
    break;
656
1.77k
  case UnaryOp::BLOCKHASH:
657
1.77k
    m_output << "blockhash";
658
1.77k
    break;
659
1.06k
  case UnaryOp::BLOBHASH:
660
1.06k
    m_output << "blobhash";
661
1.06k
    break;
662
50.7k
  }
663
50.7k
  m_output << "(";
664
50.7k
  if (op == UnaryOp::MLOAD)
665
2.59k
  {
666
2.59k
    m_output << "mod(";
667
2.59k
    visit(_x.operand());
668
2.59k
    m_output << ", " << std::to_string(s_maxMemory - 32) << ")";
669
2.59k
  }
670
48.1k
  else
671
48.1k
    visit(_x.operand());
672
50.7k
  m_output << ")";
673
50.7k
}
674
675
void ProtoConverter::visit(TernaryOp const& _x)
676
22.9k
{
677
22.9k
  switch (_x.op())
678
22.9k
  {
679
14.4k
  case TernaryOp::ADDM:
680
14.4k
    m_output << "addmod";
681
14.4k
    break;
682
8.50k
  case TernaryOp::MULM:
683
8.50k
    m_output << "mulmod";
684
8.50k
    break;
685
22.9k
  }
686
22.9k
  m_output << "(";
687
22.9k
  visit(_x.arg1());
688
22.9k
  m_output << ", ";
689
22.9k
  visit(_x.arg2());
690
22.9k
  m_output << ", ";
691
22.9k
  visit(_x.arg3());
692
22.9k
  m_output << ")";
693
22.9k
}
694
695
void ProtoConverter::visit(NullaryOp const& _x)
696
40.6k
{
697
40.6k
  auto op = _x.op();
698
  // The following instructions may lead to a change in EVM state and are
699
  // excluded to avoid false positive reports.
700
40.6k
  if (
701
40.6k
    m_filterStatefulInstructions &&
702
40.6k
    (
703
12.8k
      op == NullaryOp::GAS ||
704
12.8k
      op == NullaryOp::CODESIZE ||
705
12.8k
      op == NullaryOp::ADDRESS ||
706
12.8k
      op == NullaryOp::TIMESTAMP ||
707
12.8k
      op == NullaryOp::NUMBER ||
708
12.8k
      op == NullaryOp::DIFFICULTY
709
12.8k
    )
710
40.6k
  )
711
1.89k
  {
712
1.89k
    m_output << dictionaryToken();
713
1.89k
    return;
714
1.89k
  }
715
716
38.7k
  switch (op)
717
38.7k
  {
718
5.84k
  case NullaryOp::MSIZE:
719
5.84k
    m_output << "msize()";
720
5.84k
    break;
721
2.07k
  case NullaryOp::GAS:
722
2.07k
    m_output << "gas()";
723
2.07k
    break;
724
2.06k
  case NullaryOp::CALLDATASIZE:
725
2.06k
    m_output << "calldatasize()";
726
2.06k
    break;
727
1.59k
  case NullaryOp::CODESIZE:
728
1.59k
    m_output << "codesize()";
729
1.59k
    break;
730
6.11k
  case NullaryOp::RETURNDATASIZE:
731
    // If evm supports returndatasize, we generate it. Otherwise,
732
    // we output a dictionary token.
733
6.11k
    if (m_evmVersion.supportsReturndata())
734
5.68k
      m_output << "returndatasize()";
735
427
    else
736
427
      m_output << dictionaryToken();
737
6.11k
    break;
738
1.69k
  case NullaryOp::ADDRESS:
739
1.69k
    m_output << "address()";
740
1.69k
    break;
741
4.51k
  case NullaryOp::ORIGIN:
742
4.51k
    m_output << "origin()";
743
4.51k
    break;
744
1.19k
  case NullaryOp::CALLER:
745
1.19k
    m_output << "caller()";
746
1.19k
    break;
747
1.49k
  case NullaryOp::CALLVALUE:
748
1.49k
    m_output << "callvalue()";
749
1.49k
    break;
750
1.28k
  case NullaryOp::GASPRICE:
751
1.28k
    m_output << "gasprice()";
752
1.28k
    break;
753
1.11k
  case NullaryOp::COINBASE:
754
1.11k
    m_output << "coinbase()";
755
1.11k
    break;
756
1.01k
  case NullaryOp::TIMESTAMP:
757
1.01k
    m_output << "timestamp()";
758
1.01k
    break;
759
1.72k
  case NullaryOp::NUMBER:
760
1.72k
    m_output << "number()";
761
1.72k
    break;
762
556
  case NullaryOp::DIFFICULTY:
763
556
    if (m_evmVersion >= EVMVersion::paris())
764
152
      m_output << "prevrandao()";
765
404
    else
766
404
      m_output << "difficulty()";
767
556
    break;
768
1.47k
  case NullaryOp::GASLIMIT:
769
1.47k
    m_output << "gaslimit()";
770
1.47k
    break;
771
2.18k
  case NullaryOp::SELFBALANCE:
772
    // Replace calls to selfbalance() on unsupported EVMs with a dictionary
773
    // token.
774
2.18k
    if (m_evmVersion.hasSelfBalance())
775
1.63k
      m_output << "selfbalance()";
776
549
    else
777
549
      m_output << dictionaryToken();
778
2.18k
    break;
779
1.76k
  case NullaryOp::CHAINID:
780
    // Replace calls to chainid() on unsupported EVMs with a dictionary
781
    // token.
782
1.76k
    if (m_evmVersion.hasChainID())
783
963
      m_output << "chainid()";
784
799
    else
785
799
      m_output << dictionaryToken();
786
1.76k
    break;
787
795
  case NullaryOp::BASEFEE:
788
    // Replace calls to basefee() on unsupported EVMs with a dictionary
789
    // token.
790
795
    if (m_evmVersion.hasBaseFee())
791
296
      m_output << "basefee()";
792
499
    else
793
499
      m_output << dictionaryToken();
794
795
    break;
795
295
  case NullaryOp::BLOBBASEFEE:
796
    // Replace calls to blobbasefee() on unsupported EVMs with a dictionary
797
    // token.
798
295
    if (m_evmVersion.hasBlobBaseFee())
799
52
      m_output << "blobbasefee()";
800
243
    else
801
243
      m_output << dictionaryToken();
802
295
    break;
803
38.7k
  }
804
38.7k
}
805
806
void ProtoConverter::visit(CopyFunc const& _x)
807
8.35k
{
808
8.35k
  CopyFunc_CopyType type = _x.ct();
809
810
  // datacopy() is valid only if we are inside
811
  // a Yul object.
812
8.35k
  if (type == CopyFunc::DATA && !m_isObject)
813
546
    return;
814
815
  // We don't generate code if the copy function is returndatacopy
816
  // and the underlying evm does not support it.
817
7.80k
  if (type == CopyFunc::RETURNDATA && !m_evmVersion.supportsReturndata())
818
138
    return;
819
820
  // Bail out if MCOPY is not supported for fuzzed EVM version
821
7.66k
  if (type == CopyFunc::MEMORY && !m_evmVersion.hasMcopy())
822
129
    return;
823
824
  // Code copy may change state if e.g., some byte of code
825
  // is stored to storage via a sequence of mload and sstore.
826
7.53k
  if (m_filterStatefulInstructions && type == CopyFunc::CODE)
827
65
    return;
828
829
7.47k
  switch (type)
830
7.47k
  {
831
4.07k
  case CopyFunc::CALLDATA:
832
4.07k
    m_output << "calldatacopy";
833
4.07k
    break;
834
1.91k
  case CopyFunc::CODE:
835
1.91k
    m_output << "codecopy";
836
1.91k
    break;
837
827
  case CopyFunc::RETURNDATA:
838
827
    yulAssert(m_evmVersion.supportsReturndata(), "Proto fuzzer: Invalid evm version");
839
827
    m_output << "returndatacopy";
840
827
    break;
841
365
  case CopyFunc::DATA:
842
365
    m_output << "datacopy";
843
365
    break;
844
285
  case CopyFunc::MEMORY:
845
285
    m_output << "mcopy";
846
7.47k
  }
847
7.47k
  m_output << "(";
848
7.47k
  m_output << "mod(";
849
7.47k
  visit(_x.target());
850
7.47k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
851
7.47k
  m_output << ", ";
852
7.47k
  if (type == CopyFunc::MEMORY)
853
285
  {
854
285
    m_output << "mod(";
855
285
    visit(_x.source());
856
285
    m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
857
285
  }
858
7.18k
  else
859
7.18k
    visit(_x.source());
860
7.47k
  m_output << ", ";
861
7.47k
  m_output << "mod(";
862
7.47k
  visit(_x.size());
863
7.47k
  m_output << ", " << std::to_string(s_maxSize) << ")";
864
7.47k
  m_output << ")\n";
865
7.47k
}
866
867
void ProtoConverter::visit(ExtCodeCopy const& _x)
868
10.0k
{
869
10.0k
  m_output << "extcodecopy";
870
10.0k
  m_output << "(";
871
10.0k
  visit(_x.addr());
872
10.0k
  m_output << ", ";
873
10.0k
  m_output << "mod(";
874
10.0k
  visit(_x.target());
875
10.0k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
876
10.0k
  m_output << ", ";
877
10.0k
  visit(_x.source());
878
10.0k
  m_output << ", ";
879
10.0k
  m_output << "mod(";
880
10.0k
  visit(_x.size());
881
10.0k
  m_output << ", " << std::to_string(s_maxSize) << ")";
882
10.0k
  m_output << ")\n";
883
10.0k
}
884
885
void ProtoConverter::visit(LogFunc const& _x)
886
5.35k
{
887
5.35k
  auto visitPosAndSize = [&](LogFunc const& _y) {
888
5.35k
    m_output << "mod(";
889
5.35k
    visit(_y.pos());
890
5.35k
    m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
891
5.35k
    m_output << ", ";
892
5.35k
    m_output << "mod(";
893
5.35k
    visit(_y.size());
894
5.35k
    m_output << ", " << std::to_string(s_maxSize) << ")";
895
5.35k
  };
896
897
5.35k
  switch (_x.num_topics())
898
5.35k
  {
899
2.64k
  case LogFunc::ZERO:
900
2.64k
    m_output << "log0";
901
2.64k
    m_output << "(";
902
2.64k
    visitPosAndSize(_x);
903
2.64k
    m_output << ")\n";
904
2.64k
    break;
905
941
  case LogFunc::ONE:
906
941
    m_output << "log1";
907
941
    m_output << "(";
908
941
    visitPosAndSize(_x);
909
941
    m_output << ", ";
910
941
    visit(_x.t1());
911
941
    m_output << ")\n";
912
941
    break;
913
453
  case LogFunc::TWO:
914
453
    m_output << "log2";
915
453
    m_output << "(";
916
453
    visitPosAndSize(_x);
917
453
    m_output << ", ";
918
453
    visit(_x.t1());
919
453
    m_output << ", ";
920
453
    visit(_x.t2());
921
453
    m_output << ")\n";
922
453
    break;
923
407
  case LogFunc::THREE:
924
407
    m_output << "log3";
925
407
    m_output << "(";
926
407
    visitPosAndSize(_x);
927
407
    m_output << ", ";
928
407
    visit(_x.t1());
929
407
    m_output << ", ";
930
407
    visit(_x.t2());
931
407
    m_output << ", ";
932
407
    visit(_x.t3());
933
407
    m_output << ")\n";
934
407
    break;
935
906
  case LogFunc::FOUR:
936
906
    m_output << "log4";
937
906
    m_output << "(";
938
906
    visitPosAndSize(_x);
939
906
    m_output << ", ";
940
906
    visit(_x.t1());
941
906
    m_output << ", ";
942
906
    visit(_x.t2());
943
906
    m_output << ", ";
944
906
    visit(_x.t3());
945
906
    m_output << ", ";
946
906
    visit(_x.t4());
947
906
    m_output << ")\n";
948
906
    break;
949
5.35k
  }
950
5.35k
}
951
952
void ProtoConverter::visit(AssignmentStatement const& _x)
953
19.8k
{
954
19.8k
  visit(_x.ref_id());
955
19.8k
  m_output << " := ";
956
19.8k
  visit(_x.expr());
957
19.8k
  m_output << "\n";
958
19.8k
}
959
960
void ProtoConverter::visitFunctionInputParams(FunctionCall const& _x, unsigned _numInputParams)
961
114k
{
962
  // We reverse the order of function input visits since it helps keep this switch case concise.
963
114k
  switch (_numInputParams)
964
114k
  {
965
55.6k
  case 4:
966
55.6k
    visit(_x.in_param4());
967
55.6k
    m_output << ", ";
968
55.6k
    [[fallthrough]];
969
66.9k
  case 3:
970
66.9k
    visit(_x.in_param3());
971
66.9k
    m_output << ", ";
972
66.9k
    [[fallthrough]];
973
73.2k
  case 2:
974
73.2k
    visit(_x.in_param2());
975
73.2k
    m_output << ", ";
976
73.2k
    [[fallthrough]];
977
103k
  case 1:
978
103k
    visit(_x.in_param1());
979
103k
    [[fallthrough]];
980
114k
  case 0:
981
114k
    break;
982
0
  default:
983
0
    yulAssert(false, "Proto fuzzer: Function call with too many input parameters.");
984
0
    break;
985
114k
  }
986
114k
}
987
988
void ProtoConverter::convertFunctionCall(
989
  FunctionCall const& _x,
990
  std::string const& _name,
991
  unsigned _numInParams,
992
  bool _newLine
993
)
994
114k
{
995
114k
  m_output << _name << "(";
996
114k
  visitFunctionInputParams(_x, _numInParams);
997
114k
  m_output << ")";
998
114k
  if (_newLine)
999
114k
    m_output << "\n";
1000
114k
}
1001
1002
std::vector<std::string> ProtoConverter::createVarDecls(unsigned _start, unsigned _end, bool _isAssignment)
1003
117k
{
1004
117k
  m_output << "let ";
1005
117k
  std::vector<std::string> varsVec = createVars(_start, _end);
1006
117k
  if (_isAssignment)
1007
117k
    m_output << " := ";
1008
0
  else
1009
0
    m_output << "\n";
1010
117k
  return varsVec;
1011
117k
}
1012
1013
std::optional<std::string> ProtoConverter::functionExists(NumFunctionReturns _numReturns)
1014
28.8k
{
1015
28.8k
  for (auto const& item: m_functionSigMap)
1016
45.3k
    if (_numReturns == NumFunctionReturns::None || _numReturns == NumFunctionReturns::Single)
1017
45.3k
    {
1018
45.3k
      if (item.second.second == static_cast<unsigned>(_numReturns))
1019
23.9k
        return item.first;
1020
45.3k
    }
1021
0
    else
1022
0
    {
1023
0
      if (item.second.second >= static_cast<unsigned>(_numReturns))
1024
0
        return item.first;
1025
0
    }
1026
4.81k
  return std::nullopt;
1027
28.8k
}
1028
1029
void ProtoConverter::visit(FunctionCall const& _x, std::string const& _functionName, bool _expression)
1030
114k
{
1031
114k
  yulAssert(m_functionSigMap.count(_functionName), "Proto fuzzer: Invalid function.");
1032
114k
  auto ret = m_functionSigMap.at(_functionName);
1033
114k
  unsigned numInParams = ret.first;
1034
114k
  unsigned numOutParams = ret.second;
1035
1036
114k
  if (numOutParams == 0)
1037
43.2k
  {
1038
43.2k
    convertFunctionCall(_x, _functionName, numInParams);
1039
43.2k
    return;
1040
43.2k
  }
1041
71.2k
  else
1042
71.2k
  {
1043
71.2k
    yulAssert(numOutParams > 0, "");
1044
71.2k
    std::vector<std::string> varsVec;
1045
71.2k
    if (!_expression)
1046
47.2k
    {
1047
      // Obtain variable name suffix
1048
47.2k
      unsigned startIdx = counter();
1049
47.2k
      varsVec = createVarDecls(
1050
47.2k
        startIdx,
1051
47.2k
        startIdx + numOutParams,
1052
47.2k
        /*isAssignment=*/true
1053
47.2k
      );
1054
47.2k
    }
1055
71.2k
    convertFunctionCall(_x, _functionName, numInParams);
1056
    // Add newly minted vars in the multidecl statement to current scope
1057
71.2k
    if (!_expression)
1058
47.2k
      addVarsToScope(varsVec);
1059
71.2k
  }
1060
114k
}
1061
1062
void ProtoConverter::visit(LowLevelCall const& _x)
1063
18.6k
{
1064
18.6k
  LowLevelCall_Type type = _x.callty();
1065
1066
  // Generate staticcall if it is supported by the underlying evm
1067
18.6k
  if (type == LowLevelCall::STATICCALL && !m_evmVersion.hasStaticCall())
1068
115
  {
1069
    // Since staticcall is supposed to return 0 on success and 1 on
1070
    // failure, we can use counter value to emulate it
1071
115
    m_output << ((counter() % 2) ? "0" : "1");
1072
115
    return;
1073
115
  }
1074
1075
18.4k
  switch (type)
1076
18.4k
  {
1077
14.8k
  case LowLevelCall::CALL:
1078
14.8k
    m_output << "call(";
1079
14.8k
    break;
1080
1.83k
  case LowLevelCall::CALLCODE:
1081
1.83k
    m_output << "callcode(";
1082
1.83k
    break;
1083
1.05k
  case LowLevelCall::DELEGATECALL:
1084
1.05k
    m_output << "delegatecall(";
1085
1.05k
    break;
1086
727
  case LowLevelCall::STATICCALL:
1087
727
    yulAssert(m_evmVersion.hasStaticCall(), "Proto fuzzer: Invalid evm version");
1088
727
    m_output << "staticcall(";
1089
727
    break;
1090
18.4k
  }
1091
18.4k
  visit(_x.gas());
1092
18.4k
  m_output << ", ";
1093
18.4k
  visit(_x.addr());
1094
18.4k
  m_output << ", ";
1095
18.4k
  if (type == LowLevelCall::CALL || type == LowLevelCall::CALLCODE)
1096
16.7k
  {
1097
16.7k
    visit(_x.wei());
1098
16.7k
    m_output << ", ";
1099
16.7k
  }
1100
18.4k
  m_output << "mod(";
1101
18.4k
  visit(_x.in());
1102
18.4k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
1103
18.4k
  m_output << ", ";
1104
18.4k
  m_output << "mod(";
1105
18.4k
  visit(_x.insize());
1106
18.4k
  m_output << ", " << std::to_string(s_maxSize) << ")";
1107
18.4k
  m_output << ", ";
1108
18.4k
  m_output << "mod(";
1109
18.4k
  visit(_x.out());
1110
18.4k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
1111
18.4k
  m_output << ", ";
1112
18.4k
  m_output << "mod(";
1113
18.4k
  visit(_x.outsize());
1114
18.4k
  m_output << ", " << std::to_string(s_maxSize) << ")";
1115
18.4k
  m_output << ")";
1116
18.4k
}
1117
1118
void ProtoConverter::visit(Create const& _x)
1119
5.77k
{
1120
5.77k
  Create_Type type = _x.createty();
1121
1122
  // Replace a call to create2 on unsupported EVMs with a dictionary
1123
  // token.
1124
5.77k
  if (type == Create::CREATE2 && !m_evmVersion.hasCreate2())
1125
216
  {
1126
216
    m_output << dictionaryToken();
1127
216
    return;
1128
216
  }
1129
1130
5.55k
  switch (type)
1131
5.55k
  {
1132
4.02k
  case Create::CREATE:
1133
4.02k
    m_output << "create(";
1134
4.02k
    break;
1135
1.53k
  case Create::CREATE2:
1136
1.53k
    m_output << "create2(";
1137
1.53k
    break;
1138
5.55k
  }
1139
5.55k
  visit(_x.wei());
1140
5.55k
  m_output << ", ";
1141
5.55k
  m_output << "mod(";
1142
5.55k
  visit(_x.position());
1143
5.55k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
1144
5.55k
  m_output << ", ";
1145
5.55k
  m_output << "mod(";
1146
5.55k
  visit(_x.size());
1147
5.55k
  m_output << ", " << std::to_string(s_maxSize) << ")";
1148
5.55k
  if (type == Create::CREATE2)
1149
1.53k
  {
1150
1.53k
    m_output << ", ";
1151
1.53k
    visit(_x.value());
1152
1.53k
  }
1153
5.55k
  m_output << ")";
1154
5.55k
}
1155
1156
void ProtoConverter::visit(IfStmt const& _x)
1157
51.3k
{
1158
51.3k
  m_output << "if ";
1159
51.3k
  visit(_x.cond());
1160
51.3k
  m_output << " ";
1161
51.3k
  visit(_x.if_body());
1162
51.3k
}
1163
1164
void ProtoConverter::visit(StoreFunc const& _x)
1165
71.5k
{
1166
71.5k
  auto storeType = _x.st();
1167
  // Skip statement generation if tstore is not
1168
  // supported in EVM version
1169
71.5k
  if (storeType == StoreFunc::TSTORE && !m_evmVersion.supportsTransientStorage())
1170
4.02k
    return;
1171
1172
67.5k
  switch (storeType)
1173
67.5k
  {
1174
25.5k
  case StoreFunc::MSTORE:
1175
25.5k
    m_output << "mstore(";
1176
25.5k
    break;
1177
28.8k
  case StoreFunc::SSTORE:
1178
28.8k
    m_output << "sstore(";
1179
28.8k
    break;
1180
12.7k
  case StoreFunc::MSTORE8:
1181
12.7k
    m_output << "mstore8(";
1182
12.7k
    break;
1183
397
  case StoreFunc::TSTORE:
1184
397
    m_output << "tstore(";
1185
397
    break;
1186
67.5k
  }
1187
  // Write to memory within bounds, storage is unbounded
1188
67.5k
  if (storeType == StoreFunc::SSTORE || storeType == StoreFunc::TSTORE)
1189
29.2k
    visit(_x.loc());
1190
38.3k
  else if (storeType == StoreFunc::MSTORE8)
1191
12.7k
  {
1192
12.7k
    m_output << "mod(";
1193
12.7k
    visit(_x.loc());
1194
12.7k
    m_output << ", " << std::to_string(s_maxMemory) << ")";
1195
12.7k
  }
1196
25.5k
  else if (storeType == StoreFunc::MSTORE)
1197
25.5k
  {
1198
    // Since we write 32 bytes, ensure it does not exceed
1199
    // upper bound on memory.
1200
25.5k
    m_output << "mod(";
1201
25.5k
    visit(_x.loc());
1202
25.5k
    m_output << ", " << std::to_string(s_maxMemory - 32) << ")";
1203
1204
25.5k
  }
1205
67.5k
  m_output << ", ";
1206
67.5k
  visit(_x.val());
1207
67.5k
  m_output << ")\n";
1208
67.5k
}
1209
1210
void ProtoConverter::visit(ForStmt const& _x)
1211
29.0k
{
1212
29.0k
  if (++m_numForLoops > s_maxForLoops)
1213
21.8k
    return;
1214
7.17k
  bool wasInForBody = m_inForBodyScope;
1215
7.17k
  bool wasInForInit = m_inForInitScope;
1216
7.17k
  bool wasForInitScopeExtEnabled = m_forInitScopeExtEnabled;
1217
7.17k
  m_inForBodyScope = false;
1218
7.17k
  m_inForInitScope = true;
1219
7.17k
  m_forInitScopeExtEnabled = true;
1220
7.17k
  m_inForCond = false;
1221
7.17k
  m_output << "for ";
1222
7.17k
  visit(_x.for_init());
1223
7.17k
  m_inForInitScope = false;
1224
7.17k
  m_forInitScopeExtEnabled = wasForInitScopeExtEnabled;
1225
7.17k
  m_inForCond = true;
1226
7.17k
  visit(_x.for_cond());
1227
7.17k
  m_inForCond = false;
1228
7.17k
  visit(_x.for_post());
1229
7.17k
  m_inForBodyScope = true;
1230
7.17k
  visit(_x.for_body());
1231
7.17k
  m_inForBodyScope = wasInForBody;
1232
7.17k
  m_inForInitScope = wasInForInit;
1233
7.17k
  if (m_inFunctionDef)
1234
2.30k
  {
1235
2.30k
    yulAssert(
1236
2.30k
      !m_funcForLoopInitVars.empty() && !m_funcForLoopInitVars.back().empty(),
1237
2.30k
      "Proto fuzzer: Invalid data structure"
1238
2.30k
    );
1239
    // Remove variables in for-init
1240
2.30k
    m_funcForLoopInitVars.back().pop_back();
1241
2.30k
  }
1242
4.86k
  else
1243
4.86k
  {
1244
4.86k
    yulAssert(!m_globalForLoopInitVars.empty(), "Proto fuzzer: Invalid data structure");
1245
4.86k
    m_globalForLoopInitVars.pop_back();
1246
4.86k
  }
1247
7.17k
}
1248
1249
void ProtoConverter::visit(BoundedForStmt const& _x)
1250
54.7k
{
1251
54.7k
  if (++m_numForLoops > s_maxForLoops)
1252
35.0k
    return;
1253
1254
  // Boilerplate for loop that limits the number of iterations to a maximum of 4.
1255
19.6k
  std::string loopVarName("i_" + std::to_string(m_numNestedForLoops++));
1256
19.6k
  m_output << "for { let " << loopVarName << " := 0 } "
1257
19.6k
         << "lt(" << loopVarName << ", 0x60) "
1258
19.6k
         << "{ " << loopVarName << " := add(" << loopVarName << ", 0x20) } ";
1259
  // Store previous for body scope
1260
19.6k
  bool wasInForBody = m_inForBodyScope;
1261
19.6k
  bool wasInForInit = m_inForInitScope;
1262
19.6k
  m_inForBodyScope = true;
1263
19.6k
  m_inForInitScope = false;
1264
19.6k
  visit(_x.for_body());
1265
  // Restore previous for body scope and init
1266
19.6k
  m_inForBodyScope = wasInForBody;
1267
19.6k
  m_inForInitScope = wasInForInit;
1268
19.6k
}
1269
1270
void ProtoConverter::visit(CaseStmt const& _x)
1271
61.1k
{
1272
61.1k
  std::string literal = visit(_x.case_lit());
1273
  // u256 value of literal
1274
61.1k
  u256 literalVal;
1275
1276
  // Convert string to u256 before looking for duplicate case literals
1277
61.1k
  if (_x.case_lit().has_strval())
1278
2.81k
  {
1279
    // Since string literals returned by the Literal visitor are enclosed within
1280
    // double quotes (like this "\"<string>\""), their size is at least two in the worst case
1281
    // that <string> is empty. Here we assert this invariant.
1282
2.81k
    yulAssert(literal.size() >= 2, "Proto fuzzer: String literal too short");
1283
    // This variable stores the <string> part i.e., literal minus the first and last
1284
    // double quote characters. This is used to compute the keccak256 hash of the
1285
    // string literal. The hashing is done to check whether we are about to create
1286
    // a case statement containing a case literal that has already been used in a
1287
    // previous case statement. If the hash (u256 value) matches a previous hash,
1288
    // then we simply don't create a new case statement.
1289
2.81k
    std::string noDoubleQuoteStr;
1290
2.81k
    if (literal.size() > 2)
1291
1.68k
    {
1292
      // Ensure that all characters in the string literal except the first
1293
      // and the last (double quote characters) are alphanumeric.
1294
1.68k
      yulAssert(
1295
1.68k
        ranges::all_of(
1296
1.68k
          literal.begin() + 1,
1297
1.68k
          literal.end() - 2,
1298
1.68k
          [=](char c) { return isalpha(c) || isdigit(c); }),
1299
1.68k
        "Proto fuzzer: Invalid string literal encountered"
1300
1.68k
      );
1301
1302
      // Make a copy because literal will need to be used later
1303
1.68k
      noDoubleQuoteStr = literal.substr(1, literal.size() - 2);
1304
1.68k
    }
1305
    // Hash the result to check for duplicate case literal strings
1306
2.81k
    literalVal = u256(h256(noDoubleQuoteStr, h256::FromBinary, h256::AlignLeft));
1307
1308
    // Make sure that an empty string literal evaluates to zero. This is to detect creation of
1309
    // duplicate case literals like so
1310
    // switch (x)
1311
    // {
1312
    //    case "": { x := 0 }
1313
    //    case 0: { x:= 1 } // Case statement with duplicate literal is invalid
1314
    // } // This snippet will not be parsed successfully.
1315
2.81k
    if (noDoubleQuoteStr.empty())
1316
2.81k
      yulAssert(literalVal == 0, "Proto fuzzer: Empty string does not evaluate to zero");
1317
2.81k
  }
1318
58.3k
  else if (_x.case_lit().has_boolval())
1319
1.79k
    literalVal = _x.case_lit().boolval() ? u256(1) : u256(0);
1320
56.5k
  else
1321
56.5k
    literalVal = u256(literal);
1322
1323
  // Check if set insertion fails (case literal present) or succeeds (case literal
1324
  // absent).
1325
61.1k
  bool isUnique = m_switchLiteralSetPerScope.top().insert(literalVal).second;
1326
1327
  // It is fine to bail out if we encounter a duplicate case literal because
1328
  // we can be assured that the switch statement is well-formed i.e., contains
1329
  // at least one case statement or a default block.
1330
61.1k
  if (isUnique)
1331
56.4k
  {
1332
56.4k
    m_output << "case " << literal << " ";
1333
56.4k
    visit(_x.case_block());
1334
56.4k
  }
1335
61.1k
}
1336
1337
void ProtoConverter::visit(SwitchStmt const& _x)
1338
89.3k
{
1339
89.3k
  if (_x.case_stmt_size() > 0 || _x.has_default_block())
1340
68.9k
  {
1341
68.9k
    std::set<u256> s;
1342
68.9k
    m_switchLiteralSetPerScope.push(s);
1343
68.9k
    m_output << "switch ";
1344
68.9k
    visit(_x.switch_expr());
1345
68.9k
    m_output << "\n";
1346
1347
68.9k
    for (auto const& caseStmt: _x.case_stmt())
1348
61.1k
      visit(caseStmt);
1349
1350
68.9k
    m_switchLiteralSetPerScope.pop();
1351
1352
68.9k
    if (_x.has_default_block())
1353
65.3k
    {
1354
65.3k
      m_output << "default ";
1355
65.3k
      visit(_x.default_block());
1356
65.3k
    }
1357
68.9k
  }
1358
89.3k
}
1359
1360
void ProtoConverter::visit(StopInvalidStmt const& _x)
1361
2.12k
{
1362
2.12k
  switch (_x.stmt())
1363
2.12k
  {
1364
1.14k
  case StopInvalidStmt::STOP:
1365
1.14k
    m_output << "stop()\n";
1366
1.14k
    break;
1367
975
  case StopInvalidStmt::INVALID:
1368
975
    m_output << "invalid()\n";
1369
975
    break;
1370
2.12k
  }
1371
2.12k
}
1372
1373
void ProtoConverter::visit(RetRevStmt const& _x)
1374
1.90k
{
1375
1.90k
  switch (_x.stmt())
1376
1.90k
  {
1377
1.03k
  case RetRevStmt::RETURN:
1378
1.03k
    m_output << "return";
1379
1.03k
    break;
1380
869
  case RetRevStmt::REVERT:
1381
869
    m_output << "revert";
1382
869
    break;
1383
1.90k
  }
1384
1.90k
  m_output << "(";
1385
1.90k
  m_output << "mod(";
1386
1.90k
  visit(_x.pos());
1387
1.90k
  m_output << ", " << std::to_string(s_maxMemory - s_maxSize) << ")";
1388
1.90k
  m_output << ", ";
1389
1.90k
  m_output << "mod(";
1390
1.90k
  visit(_x.size());
1391
1.90k
  m_output << ", " << std::to_string(s_maxSize) << ")";
1392
1.90k
  m_output << ")\n";
1393
1.90k
}
1394
1395
void ProtoConverter::visit(SelfDestructStmt const& _x)
1396
1.83k
{
1397
1.83k
  m_output << "selfdestruct";
1398
1.83k
  m_output << "(";
1399
1.83k
  visit(_x.addr());
1400
1.83k
  m_output << ")\n";
1401
1.83k
}
1402
1403
void ProtoConverter::visit(TerminatingStmt const& _x)
1404
76.0k
{
1405
76.0k
  switch (_x.term_oneof_case())
1406
76.0k
  {
1407
2.12k
  case TerminatingStmt::kStopInvalid:
1408
2.12k
    visit(_x.stop_invalid());
1409
2.12k
    break;
1410
1.90k
  case TerminatingStmt::kRetRev:
1411
1.90k
    visit(_x.ret_rev());
1412
1.90k
    break;
1413
1.83k
  case TerminatingStmt::kSelfDes:
1414
1.83k
    visit(_x.self_des());
1415
1.83k
    break;
1416
70.1k
  case TerminatingStmt::TERM_ONEOF_NOT_SET:
1417
70.1k
    break;
1418
76.0k
  }
1419
76.0k
}
1420
1421
void ProtoConverter::visit(UnaryOpData const& _x)
1422
1.51k
{
1423
1.51k
  switch (_x.op())
1424
1.51k
  {
1425
796
  case UnaryOpData::SIZE:
1426
796
    m_output << Whiskers(R"(datasize("<id>"))")
1427
796
      ("id", getObjectIdentifier(static_cast<unsigned>(_x.identifier())))
1428
796
      .render();
1429
796
    break;
1430
715
  case UnaryOpData::OFFSET:
1431
715
    m_output << Whiskers(R"(dataoffset("<id>"))")
1432
715
      ("id", getObjectIdentifier(static_cast<unsigned>(_x.identifier())))
1433
715
      .render();
1434
715
    break;
1435
1.51k
  }
1436
1.51k
}
1437
1438
void ProtoConverter::visit(Statement const& _x)
1439
1.14M
{
1440
1.14M
  switch (_x.stmt_oneof_case())
1441
1.14M
  {
1442
11.2k
  case Statement::kDecl:
1443
11.2k
    visit(_x.decl());
1444
11.2k
    break;
1445
24.9k
  case Statement::kAssignment:
1446
    // Create an assignment statement only if there is at least one variable
1447
    // declaration that is in scope.
1448
24.9k
    if (varDeclAvailable())
1449
19.8k
      visit(_x.assignment());
1450
24.9k
    break;
1451
71.7k
  case Statement::kIfstmt:
1452
71.7k
    if (_x.ifstmt().if_body().statements_size() > 0)
1453
51.3k
      visit(_x.ifstmt());
1454
71.7k
    break;
1455
71.5k
  case Statement::kStorageFunc:
1456
71.5k
    visit(_x.storage_func());
1457
71.5k
    break;
1458
8.50k
  case Statement::kBlockstmt:
1459
8.50k
    if (_x.blockstmt().statements_size() > 0)
1460
6.40k
      visit(_x.blockstmt());
1461
8.50k
    break;
1462
64.9k
  case Statement::kForstmt:
1463
64.9k
    if (_x.forstmt().for_body().statements_size() > 0 && !m_filterUnboundedLoops)
1464
29.0k
      visit(_x.forstmt());
1465
64.9k
    break;
1466
75.7k
  case Statement::kBoundedforstmt:
1467
75.7k
    if (_x.boundedforstmt().for_body().statements_size() > 0)
1468
54.7k
      visit(_x.boundedforstmt());
1469
75.7k
    break;
1470
89.3k
  case Statement::kSwitchstmt:
1471
89.3k
    visit(_x.switchstmt());
1472
89.3k
    break;
1473
83.2k
  case Statement::kBreakstmt:
1474
83.2k
    if (m_inForBodyScope)
1475
11.3k
      m_output << "break\n";
1476
83.2k
    break;
1477
76.9k
  case Statement::kContstmt:
1478
76.9k
    if (m_inForBodyScope)
1479
12.3k
      m_output << "continue\n";
1480
76.9k
    break;
1481
6.31k
  case Statement::kLogFunc:
1482
    // Log is a stateful statement since it writes to storage.
1483
6.31k
    if (!m_filterStatefulInstructions)
1484
5.35k
      visit(_x.log_func());
1485
6.31k
    break;
1486
8.35k
  case Statement::kCopyFunc:
1487
8.35k
    visit(_x.copy_func());
1488
8.35k
    break;
1489
11.2k
  case Statement::kExtcodeCopy:
1490
    // Extcodecopy may change state if external code is copied via a
1491
    // sequence of mload/sstore.
1492
11.2k
    if (!m_filterStatefulInstructions)
1493
10.0k
      visit(_x.extcode_copy());
1494
11.2k
    break;
1495
76.0k
  case Statement::kTerminatestmt:
1496
76.0k
    visit(_x.terminatestmt());
1497
76.0k
    break;
1498
115k
  case Statement::kFunctioncall:
1499
115k
    if (!m_functionSigMap.empty())
1500
90.5k
    {
1501
90.5k
      unsigned index = counter() % m_functionSigMap.size();
1502
90.5k
      auto iter = m_functionSigMap.begin();
1503
90.5k
      advance(iter, index);
1504
90.5k
      visit(_x.functioncall(), iter->first);
1505
90.5k
    }
1506
115k
    break;
1507
107k
  case Statement::kFuncdef:
1508
107k
    if (_x.funcdef().block().statements_size() > 0)
1509
102k
      if (!m_inForInitScope)
1510
102k
        visit(_x.funcdef());
1511
107k
    break;
1512
9.18k
  case Statement::kPop:
1513
9.18k
    visit(_x.pop());
1514
9.18k
    break;
1515
80.4k
  case Statement::kLeave:
1516
80.4k
    if (m_inFunctionDef)
1517
33.9k
      visit(_x.leave());
1518
80.4k
    break;
1519
14.5k
  case Statement::kMultidecl:
1520
14.5k
    visit(_x.multidecl());
1521
14.5k
    break;
1522
133k
  case Statement::STMT_ONEOF_NOT_SET:
1523
133k
    break;
1524
1.14M
  }
1525
1.14M
}
1526
1527
void ProtoConverter::openBlockScope()
1528
363k
{
1529
363k
  m_scopeFuncs.emplace_back();
1530
1531
  // Create new block scope inside current function scope
1532
363k
  if (m_inFunctionDef)
1533
200k
  {
1534
200k
    yulAssert(
1535
200k
      !m_funcVars.empty(),
1536
200k
      "Proto fuzzer: Invalid data structure"
1537
200k
    );
1538
200k
    m_funcVars.back().push_back(std::vector<std::string>{});
1539
200k
    if (m_inForInitScope && m_forInitScopeExtEnabled)
1540
2.30k
    {
1541
2.30k
      yulAssert(
1542
2.30k
        !m_funcForLoopInitVars.empty(),
1543
2.30k
        "Proto fuzzer: Invalid data structure"
1544
2.30k
      );
1545
2.30k
      m_funcForLoopInitVars.back().push_back(std::vector<std::string>{});
1546
2.30k
    }
1547
200k
  }
1548
163k
  else
1549
163k
  {
1550
163k
    m_globalVars.emplace_back();
1551
163k
    if (m_inForInitScope && m_forInitScopeExtEnabled)
1552
4.86k
      m_globalForLoopInitVars.emplace_back();
1553
163k
  }
1554
363k
}
1555
1556
void ProtoConverter::openFunctionScope(std::vector<std::string> const& _funcParams)
1557
102k
{
1558
102k
  m_funcVars.push_back(std::vector<std::vector<std::string>>({_funcParams}));
1559
102k
  m_funcForLoopInitVars.push_back(std::vector<std::vector<std::string>>({}));
1560
102k
}
1561
1562
void ProtoConverter::updateFunctionMaps(std::string const& _var)
1563
102k
{
1564
102k
  size_t erased = m_functionSigMap.erase(_var);
1565
1566
102k
  for (auto const& i: m_functionDefMap)
1567
640k
    if (i.second == _var)
1568
102k
    {
1569
102k
      erased += m_functionDefMap.erase(i.first);
1570
102k
      break;
1571
102k
    }
1572
1573
102k
  yulAssert(erased == 2, "Proto fuzzer: Function maps not updated");
1574
102k
}
1575
1576
void ProtoConverter::closeBlockScope()
1577
363k
{
1578
  // Remove functions declared in the block that is going
1579
  // out of scope from the global function map.
1580
363k
  for (auto const& f: m_scopeFuncs.back())
1581
102k
  {
1582
102k
    size_t numFuncsRemoved = m_functions.size();
1583
102k
    m_functions.erase(remove(m_functions.begin(), m_functions.end(), f), m_functions.end());
1584
102k
    numFuncsRemoved -= m_functions.size();
1585
102k
    yulAssert(
1586
102k
      numFuncsRemoved == 1,
1587
102k
      "Proto fuzzer: Nothing or too much went out of scope"
1588
102k
    );
1589
102k
    updateFunctionMaps(f);
1590
102k
  }
1591
  // Pop back the vector of scoped functions.
1592
363k
  if (!m_scopeFuncs.empty())
1593
363k
    m_scopeFuncs.pop_back();
1594
1595
  // If block belongs to function body, then remove
1596
  // local variables in function body that are going out of scope.
1597
363k
  if (m_inFunctionDef)
1598
200k
  {
1599
200k
    yulAssert(!m_funcVars.empty(), "Proto fuzzer: Invalid data structure");
1600
200k
    if (!m_funcVars.back().empty())
1601
200k
      m_funcVars.back().pop_back();
1602
200k
  }
1603
  // Remove variables declared in vanilla block from current
1604
  // global scope.
1605
163k
  else
1606
163k
  {
1607
163k
    yulAssert(!m_globalVars.empty(), "Proto fuzzer: Invalid data structure");
1608
163k
    m_globalVars.pop_back();
1609
163k
  }
1610
363k
}
1611
1612
void ProtoConverter::closeFunctionScope()
1613
102k
{
1614
102k
  yulAssert(!m_funcVars.empty(), "Proto fuzzer: Invalid data structure");
1615
102k
  m_funcVars.pop_back();
1616
102k
  yulAssert(!m_funcForLoopInitVars.empty(), "Proto fuzzer: Invalid data structure");
1617
102k
  m_funcForLoopInitVars.pop_back();
1618
102k
}
1619
1620
void ProtoConverter::addVarsToScope(std::vector<std::string> const& _vars)
1621
117k
{
1622
  // If we are in function definition, add the new vars to current function scope
1623
117k
  if (m_inFunctionDef)
1624
79.2k
  {
1625
    // If we are directly in for-init block, add the newly created vars to the
1626
    // stack of for-init variables.
1627
79.2k
    if (m_inForInitScope && m_forInitScopeExtEnabled)
1628
246
    {
1629
246
      yulAssert(
1630
246
        !m_funcForLoopInitVars.empty() && !m_funcForLoopInitVars.back().empty(),
1631
246
        "Proto fuzzer: Invalid data structure"
1632
246
      );
1633
246
      m_funcForLoopInitVars.back().back().insert(
1634
246
        m_funcForLoopInitVars.back().back().end(),
1635
246
        _vars.begin(),
1636
246
        _vars.end()
1637
246
      );
1638
246
    }
1639
79.0k
    else
1640
79.0k
    {
1641
79.0k
      yulAssert(
1642
79.0k
        !m_funcVars.empty() && !m_funcVars.back().empty(),
1643
79.0k
        "Proto fuzzer: Invalid data structure"
1644
79.0k
      );
1645
79.0k
      m_funcVars.back().back().insert(
1646
79.0k
        m_funcVars.back().back().end(),
1647
79.0k
        _vars.begin(),
1648
79.0k
        _vars.end()
1649
79.0k
      );
1650
79.0k
    }
1651
79.2k
  }
1652
  // If we are in a vanilla block, add the new vars to current global scope
1653
37.7k
  else
1654
37.7k
  {
1655
37.7k
    if (m_inForInitScope && m_forInitScopeExtEnabled)
1656
70
    {
1657
70
      yulAssert(
1658
70
        !m_globalForLoopInitVars.empty(),
1659
70
        "Proto fuzzer: Invalid data structure"
1660
70
      );
1661
70
      m_globalForLoopInitVars.back().insert(
1662
70
        m_globalForLoopInitVars.back().end(),
1663
70
        _vars.begin(),
1664
70
        _vars.end()
1665
70
      );
1666
70
    }
1667
37.7k
    else
1668
37.7k
    {
1669
37.7k
      yulAssert(
1670
37.7k
        !m_globalVars.empty(),
1671
37.7k
        "Proto fuzzer: Invalid data structure"
1672
37.7k
      );
1673
37.7k
      m_globalVars.back().insert(
1674
37.7k
        m_globalVars.back().end(),
1675
37.7k
        _vars.begin(),
1676
37.7k
        _vars.end()
1677
37.7k
      );
1678
37.7k
    }
1679
37.7k
  }
1680
117k
}
1681
1682
void ProtoConverter::visit(Block const& _x)
1683
363k
{
1684
363k
  openBlockScope();
1685
1686
  // Register function declarations in this scope unless this
1687
  // scope belongs to for-init (in which function declarations
1688
  // are forbidden).
1689
363k
  for (auto const& statement: _x.statements())
1690
1.14M
    if (statement.has_funcdef() && statement.funcdef().block().statements_size() > 0 && !m_inForInitScope)
1691
102k
      registerFunction(&statement.funcdef());
1692
1693
363k
  if (_x.statements_size() > 0)
1694
328k
  {
1695
328k
    m_output << "{\n";
1696
328k
    bool wasForInitScopeExtEnabled = m_forInitScopeExtEnabled;
1697
328k
    for (auto const& st: _x.statements())
1698
1.14M
    {
1699
      // If statement is block or introduces one and we are in for-init block
1700
      // then temporarily disable scope extension if it is not already disabled.
1701
1.14M
      if (
1702
1.14M
        (st.has_blockstmt() || st.has_switchstmt() || st.has_ifstmt()) &&
1703
1.14M
        m_inForInitScope &&
1704
1.14M
        m_forInitScopeExtEnabled
1705
1.14M
      )
1706
1.01k
        m_forInitScopeExtEnabled = false;
1707
1.14M
      visit(st);
1708
1.14M
      m_forInitScopeExtEnabled = wasForInitScopeExtEnabled;
1709
1.14M
    }
1710
328k
    m_output << "}\n";
1711
328k
  }
1712
34.8k
  else
1713
34.8k
    m_output << "{}\n";
1714
363k
  closeBlockScope();
1715
363k
}
1716
1717
std::vector<std::string> ProtoConverter::createVars(unsigned _startIdx, unsigned _endIdx)
1718
283k
{
1719
283k
  yulAssert(_endIdx > _startIdx, "Proto fuzzer: Variable indices not in range");
1720
283k
  std::string varsStr = suffixedVariableNameList("x_", _startIdx, _endIdx);
1721
283k
  m_output << varsStr;
1722
283k
  std::vector<std::string> varsVec;
1723
283k
  boost::split(
1724
283k
    varsVec,
1725
283k
    varsStr,
1726
283k
    boost::algorithm::is_any_of(", "),
1727
283k
    boost::algorithm::token_compress_on
1728
283k
  );
1729
1730
283k
  yulAssert(
1731
283k
    varsVec.size() == (_endIdx - _startIdx),
1732
283k
    "Proto fuzzer: Variable count mismatch during function definition"
1733
283k
  );
1734
283k
  m_counter += varsVec.size();
1735
283k
  return varsVec;
1736
283k
}
1737
1738
void ProtoConverter::registerFunction(FunctionDef const* _x)
1739
102k
{
1740
102k
  unsigned numInParams = _x->num_input_params() % s_modInputParams;
1741
102k
  unsigned numOutParams = _x->num_output_params() % s_modOutputParams;
1742
102k
  NumFunctionReturns numReturns;
1743
102k
  if (numOutParams == 0)
1744
17.9k
    numReturns = NumFunctionReturns::None;
1745
84.7k
  else if (numOutParams == 1)
1746
16.0k
    numReturns = NumFunctionReturns::Single;
1747
68.6k
  else
1748
68.6k
    numReturns = NumFunctionReturns::Multiple;
1749
1750
  // Generate function name
1751
102k
  std::string funcName = functionName(numReturns);
1752
1753
  // Register function
1754
102k
  auto ret = m_functionSigMap.emplace(std::make_pair(funcName, std::make_pair(numInParams, numOutParams)));
1755
102k
  yulAssert(ret.second, "Proto fuzzer: Function already exists.");
1756
102k
  m_functions.push_back(funcName);
1757
102k
  m_scopeFuncs.back().push_back(funcName);
1758
102k
  m_functionDefMap.emplace(std::make_pair(_x, funcName));
1759
102k
}
1760
1761
void ProtoConverter::fillFunctionCallInput(unsigned _numInParams)
1762
67.8k
{
1763
282k
  for (unsigned i = 0; i < _numInParams; i++)
1764
214k
  {
1765
    // Throw a 4-sided dice to choose whether to populate function input
1766
    // argument from a pseudo-randomly chosen slot in one of the following
1767
    // locations: calldata, memory, storage, or Yul optimizer dictionary.
1768
214k
    unsigned diceValue = counter() % 4;
1769
    // Pseudo-randomly choose one of the first ten 32-byte
1770
    // aligned slots.
1771
214k
    std::string slot = std::to_string((counter() % 10) * 32);
1772
214k
    switch (diceValue)
1773
214k
    {
1774
87.5k
    case 0:
1775
87.5k
      m_output << "calldataload(" << slot << ")";
1776
87.5k
      break;
1777
19.2k
    case 1:
1778
19.2k
    {
1779
      // Access memory within stipulated bounds
1780
19.2k
      slot = "mod(" + dictionaryToken() + ", " + std::to_string(s_maxMemory - 32) + ")";
1781
19.2k
      m_output << "mload(" << slot << ")";
1782
19.2k
      break;
1783
0
    }
1784
87.9k
    case 2:
1785
87.9k
      m_output << "sload(" << slot << ")";
1786
87.9k
      break;
1787
19.6k
    default:
1788
      // Call to dictionaryToken() automatically picks a token
1789
      // at a pseudo-random location.
1790
19.6k
      m_output << dictionaryToken();
1791
19.6k
      break;
1792
214k
    }
1793
214k
    if (i < _numInParams - 1)
1794
146k
      m_output << ",";
1795
214k
  }
1796
67.8k
}
1797
1798
void ProtoConverter::saveFunctionCallOutput(std::vector<std::string> const& _varsVec)
1799
69.7k
{
1800
69.7k
  constexpr auto numSlots = 10;
1801
69.7k
  constexpr auto slotSize = 32;
1802
1803
69.7k
  for (std::string const& var: _varsVec)
1804
232k
  {
1805
    // Flip a dice to choose whether to save output values
1806
    // in storage or memory.
1807
232k
    unsigned diceThrow = counter() % (m_evmVersion.supportsTransientStorage() ? 3 : 2);
1808
    // Pseudo-randomly choose one of the first ten 32-byte
1809
    // aligned slots.
1810
232k
    std::string slot = std::to_string((counter() % numSlots) * slotSize);
1811
232k
    if (diceThrow == 0)
1812
208k
      m_output << "sstore(" << slot << ", " << var << ")\n";
1813
23.6k
    else if (diceThrow == 1)
1814
21.7k
      m_output << "mstore(" << slot << ", " << var << ")\n";
1815
1.92k
    else
1816
1.92k
    {
1817
1.92k
      yulAssert(
1818
1.92k
        m_evmVersion.supportsTransientStorage(),
1819
1.92k
        "Proto fuzzer: Invalid evm version"
1820
1.92k
      );
1821
1.92k
      m_output << "tstore(" << slot << ", " << var << ")\n";
1822
1.92k
    }
1823
232k
  }
1824
69.7k
}
1825
1826
void ProtoConverter::createFunctionCall(
1827
  std::string const& _funcName,
1828
  unsigned _numInParams,
1829
  unsigned _numOutParams
1830
)
1831
82.5k
{
1832
82.5k
  std::vector<std::string> varsVec{};
1833
82.5k
  if (_numOutParams > 0)
1834
69.7k
  {
1835
69.7k
    unsigned startIdx = counter();
1836
    // Prints the following to output stream "let x_i,...,x_n := "
1837
69.7k
    varsVec = createVarDecls(
1838
69.7k
      startIdx,
1839
69.7k
      startIdx + _numOutParams,
1840
69.7k
      /*isAssignment=*/true
1841
69.7k
    );
1842
69.7k
  }
1843
1844
  // Call the function with the correct number of input parameters
1845
82.5k
  m_output << _funcName << "(";
1846
82.5k
  if (_numInParams > 0)
1847
67.8k
    fillFunctionCallInput(_numInParams);
1848
82.5k
  m_output << ")\n";
1849
1850
82.5k
  if (!varsVec.empty())
1851
69.7k
  {
1852
    // Save values returned by function so that they are reflected
1853
    // in the interpreter trace.
1854
69.7k
    saveFunctionCallOutput(varsVec);
1855
    // Add newly minted vars to current scope
1856
69.7k
    addVarsToScope(varsVec);
1857
69.7k
  }
1858
12.7k
  else
1859
82.5k
    yulAssert(_numOutParams == 0, "Proto fuzzer: Function return value not saved");
1860
82.5k
}
1861
1862
void ProtoConverter::createFunctionDefAndCall(
1863
  FunctionDef const& _x,
1864
  unsigned _numInParams,
1865
  unsigned _numOutParams
1866
)
1867
102k
{
1868
102k
  yulAssert(
1869
102k
    ((_numInParams <= s_modInputParams - 1) && (_numOutParams <= s_modOutputParams - 1)),
1870
102k
    "Proto fuzzer: Too many function I/O parameters requested."
1871
102k
  );
1872
1873
  // Obtain function name
1874
102k
  yulAssert(m_functionDefMap.count(&_x), "Proto fuzzer: Unregistered function");
1875
102k
  std::string funcName = m_functionDefMap.at(&_x);
1876
1877
102k
  std::vector<std::string> varsVec = {};
1878
102k
  m_output << "function " << funcName << "(";
1879
102k
  unsigned startIdx = counter();
1880
102k
  if (_numInParams > 0)
1881
81.9k
    varsVec = createVars(startIdx, startIdx + _numInParams);
1882
102k
  m_output << ")";
1883
1884
102k
  std::vector<std::string> outVarsVec = {};
1885
  // This creates -> x_n+1,...,x_r
1886
102k
  if (_numOutParams > 0)
1887
84.7k
  {
1888
84.7k
    m_output << " -> ";
1889
84.7k
    if (varsVec.empty())
1890
14.5k
    {
1891
14.5k
      yulAssert(_numInParams == 0, "Proto fuzzer: Input parameters not processed correctly");
1892
14.5k
      varsVec = createVars(startIdx, startIdx + _numOutParams);
1893
14.5k
    }
1894
70.1k
    else
1895
70.1k
    {
1896
70.1k
      outVarsVec = createVars(startIdx + _numInParams, startIdx + _numInParams + _numOutParams);
1897
70.1k
      varsVec.insert(varsVec.end(), outVarsVec.begin(), outVarsVec.end());
1898
70.1k
    }
1899
84.7k
  }
1900
102k
  yulAssert(varsVec.size() == _numInParams + _numOutParams, "Proto fuzzer: Function parameters not processed correctly");
1901
1902
102k
  m_output << "\n";
1903
1904
  // If function definition is in for-loop body, update
1905
102k
  bool wasInForBody = m_inForBodyScope;
1906
102k
  m_inForBodyScope = false;
1907
1908
102k
  bool wasInFunctionDef = m_inFunctionDef;
1909
102k
  m_inFunctionDef = true;
1910
1911
  // Create new function scope and add function input and return
1912
  // parameters to it.
1913
102k
  openFunctionScope(varsVec);
1914
  // Visit function body
1915
102k
  visit(_x.block());
1916
102k
  closeFunctionScope();
1917
1918
102k
  m_inForBodyScope = wasInForBody;
1919
102k
  m_inFunctionDef = wasInFunctionDef;
1920
1921
102k
  yulAssert(
1922
102k
    !m_inForInitScope,
1923
102k
    "Proto fuzzer: Trying to create function call inside a for-init block"
1924
102k
  );
1925
102k
  if (_x.force_call())
1926
82.5k
    createFunctionCall(funcName, _numInParams, _numOutParams);
1927
102k
}
1928
1929
void ProtoConverter::visit(FunctionDef const& _x)
1930
102k
{
1931
102k
  unsigned numInParams = _x.num_input_params() % s_modInputParams;
1932
102k
  unsigned numOutParams = _x.num_output_params() % s_modOutputParams;
1933
102k
  createFunctionDefAndCall(_x, numInParams, numOutParams);
1934
102k
}
1935
1936
void ProtoConverter::visit(PopStmt const& _x)
1937
9.18k
{
1938
9.18k
  m_output << "pop(";
1939
9.18k
  visit(_x.expr());
1940
9.18k
  m_output << ")\n";
1941
9.18k
}
1942
1943
void ProtoConverter::visit(LeaveStmt const&)
1944
33.9k
{
1945
33.9k
  m_output << "leave\n";
1946
33.9k
}
1947
1948
std::string ProtoConverter::getObjectIdentifier(unsigned _x)
1949
1.51k
{
1950
1.51k
  unsigned currentId = currentObjectId();
1951
1.51k
  std::string currentObjName = "object" + std::to_string(currentId);
1952
1.51k
  yulAssert(
1953
1.51k
    m_objectScope.count(currentObjName) && !m_objectScope.at(currentObjName).empty(),
1954
1.51k
    "Yul proto fuzzer: Error referencing object"
1955
1.51k
  );
1956
1.51k
  std::vector<std::string> objectIdsInScope = m_objectScope.at(currentObjName);
1957
1.51k
  return objectIdsInScope[_x % objectIdsInScope.size()];
1958
1.51k
}
1959
1960
void ProtoConverter::visit(Code const& _x)
1961
4.44k
{
1962
4.44k
  m_output << "code {\n";
1963
4.44k
  visit(_x.block());
1964
4.44k
  m_output << "}\n";
1965
4.44k
}
1966
1967
void ProtoConverter::visit(Data const& _x)
1968
1.17k
{
1969
  // TODO: Generate random data block identifier
1970
1.17k
  m_output << "data \"" << s_dataIdentifier << "\" hex\"" << createHex(_x.hex()) << "\"\n";
1971
1.17k
}
1972
1973
void ProtoConverter::visit(Object const& _x)
1974
4.44k
{
1975
  // object "object<n>" {
1976
  // ...
1977
  // }
1978
4.44k
  m_output << "object " << newObjectId() << " {\n";
1979
4.44k
  visit(_x.code());
1980
4.44k
  if (_x.has_data())
1981
1.17k
    visit(_x.data());
1982
4.44k
  for (auto const& subObj: _x.sub_obj())
1983
1.60k
    visit(subObj);
1984
4.44k
  m_output << "}\n";
1985
4.44k
}
1986
1987
void ProtoConverter::buildObjectScopeTree(Object const& _x)
1988
4.44k
{
1989
  // Identifies object being visited
1990
4.44k
  std::string objectName = newObjectId(false);
1991
4.44k
  std::vector<std::string> node{objectName};
1992
4.44k
  if (_x.has_data())
1993
1.17k
    node.emplace_back(s_dataIdentifier);
1994
4.44k
  for (auto const& subObj: _x.sub_obj())
1995
1.60k
  {
1996
    // Identifies sub object whose numeric suffix is
1997
    // m_objectId
1998
1.60k
    unsigned subObjectId = m_objectId;
1999
1.60k
    std::string subObjectName = "object" + std::to_string(subObjectId);
2000
1.60k
    node.push_back(subObjectName);
2001
1.60k
    buildObjectScopeTree(subObj);
2002
    // Add sub-object to object's ancestors
2003
1.60k
    yulAssert(m_objectScope.count(subObjectName), "Yul proto fuzzer: Invalid object hierarchy");
2004
1.60k
    for (std::string const& item: m_objectScope.at(subObjectName))
2005
9.15k
      if (item != subObjectName)
2006
7.55k
        node.emplace_back(subObjectName + "." + item);
2007
1.60k
  }
2008
4.44k
  m_objectScope.emplace(objectName, node);
2009
4.44k
}
2010
2011
void ProtoConverter::visit(Program const& _x)
2012
38.6k
{
2013
  // Initialize input size
2014
38.6k
  m_inputSize = static_cast<unsigned>(_x.ByteSizeLong());
2015
2016
  // Record EVM Version
2017
38.6k
  m_evmVersion = evmVersionMapping(_x.ver());
2018
2019
  // Program is either a Yul object or a block of
2020
  // statements.
2021
38.6k
  switch (_x.program_oneof_case())
2022
38.6k
  {
2023
35.7k
  case Program::kBlock:
2024
35.7k
    m_output << "{\n";
2025
35.7k
    m_output << "mstore(memoryguard(0x10000), 1)\n";
2026
35.7k
    m_output << "sstore(mload(calldataload(0)), 1)\n";
2027
35.7k
    visit(_x.block());
2028
35.7k
    m_output << "}\n";
2029
35.7k
    break;
2030
2.84k
  case Program::kObj:
2031
2.84k
    m_isObject = true;
2032
2.84k
    buildObjectScopeTree(_x.obj());
2033
    // Reset object id counter
2034
2.84k
    m_objectId = 0;
2035
2.84k
    visit(_x.obj());
2036
2.84k
    break;
2037
66
  case Program::PROGRAM_ONEOF_NOT_SET:
2038
    // {} is a trivial Yul program
2039
66
    m_output << "{}";
2040
66
    break;
2041
38.6k
  }
2042
38.6k
}
2043
2044
std::string ProtoConverter::programToString(Program const& _input)
2045
38.6k
{
2046
38.6k
  visit(_input);
2047
38.6k
  return m_output.str();
2048
38.6k
}
2049
2050
std::string ProtoConverter::functionTypeToString(NumFunctionReturns _type)
2051
102k
{
2052
102k
  switch (_type)
2053
102k
  {
2054
17.9k
  case NumFunctionReturns::None:
2055
17.9k
    return "n";
2056
16.0k
  case NumFunctionReturns::Single:
2057
16.0k
    return "s";
2058
68.6k
  case NumFunctionReturns::Multiple:
2059
68.6k
    return "m";
2060
102k
  }
2061
102k
}