Coverage Report

Created: 2025-06-24 07:59

/src/solidity/libevmasm/Assembly.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
  This file is part of solidity.
3
4
  solidity is free software: you can redistribute it and/or modify
5
  it under the terms of the GNU General Public License as published by
6
  the Free Software Foundation, either version 3 of the License, or
7
  (at your option) any later version.
8
9
  solidity is distributed in the hope that it will be useful,
10
  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
  GNU General Public License for more details.
13
14
  You should have received a copy of the GNU General Public License
15
  along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16
*/
17
// SPDX-License-Identifier: GPL-3.0
18
/** @file Assembly.cpp
19
 * @author Gav Wood <i@gavwood.com>
20
 * @date 2014
21
 */
22
23
#include <libevmasm/Assembly.h>
24
25
#include <libevmasm/CommonSubexpressionEliminator.h>
26
#include <libevmasm/ControlFlowGraph.h>
27
#include <libevmasm/PeepholeOptimiser.h>
28
#include <libevmasm/Inliner.h>
29
#include <libevmasm/JumpdestRemover.h>
30
#include <libevmasm/BlockDeduplicator.h>
31
#include <libevmasm/ConstantOptimiser.h>
32
33
#include <liblangutil/CharStream.h>
34
#include <liblangutil/Exceptions.h>
35
36
#include <libsolutil/JSON.h>
37
#include <libsolutil/StringUtils.h>
38
39
#include <fmt/format.h>
40
41
#include <range/v3/algorithm/any_of.hpp>
42
#include <range/v3/view/drop_exactly.hpp>
43
#include <range/v3/view/enumerate.hpp>
44
#include <range/v3/view/map.hpp>
45
46
#include <fstream>
47
#include <limits>
48
#include <iterator>
49
#include <stack>
50
51
using namespace solidity;
52
using namespace solidity::evmasm;
53
using namespace solidity::langutil;
54
using namespace solidity::util;
55
56
namespace
57
{
58
59
/// Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode,
60
/// this class can be instantiated in that scope. It will record the current bytecode size (before addition)
61
/// and, at destruction time, record the new bytecode size. This information is then added to an external
62
/// instruction locations vector.
63
/// If the instruction decomposes into multiple individual evm instructions, `emit` can be
64
/// called for all but the last one (which will be emitted by the destructor).
65
class InstructionLocationEmitter
66
{
67
public:
68
  InstructionLocationEmitter(
69
    std::vector<LinkerObject::InstructionLocation>& _instructionLocations,
70
    bytes const& _bytecode,
71
    size_t const _assemblyItemIndex
72
  ):
73
    m_instructionLocations(_instructionLocations),
74
    m_bytecode(_bytecode),
75
    m_assemblyItemIndex(_assemblyItemIndex),
76
    m_instructionLocationStart(_bytecode.size())
77
19.1M
  {}
78
79
  ~InstructionLocationEmitter()
80
19.1M
  {
81
19.1M
    emit();
82
19.1M
  }
83
84
  void emit()
85
19.1M
  {
86
19.1M
    auto const end = m_bytecode.size();
87
19.1M
    m_instructionLocations.push_back(LinkerObject::InstructionLocation{
88
19.1M
      .start = m_instructionLocationStart,
89
19.1M
      .end = end,
90
19.1M
      .assemblyItemIndex = m_assemblyItemIndex
91
19.1M
    });
92
19.1M
    m_instructionLocationStart = end;
93
19.1M
  }
94
95
private:
96
  std::vector<LinkerObject::InstructionLocation>& m_instructionLocations;
97
  bytes const& m_bytecode;
98
  size_t const m_assemblyItemIndex{};
99
  size_t m_instructionLocationStart{};
100
};
101
102
}
103
104
std::map<std::string, std::shared_ptr<std::string const>> Assembly::s_sharedSourceNames;
105
106
AssemblyItem const& Assembly::append(AssemblyItem _i)
107
20.3M
{
108
20.3M
  assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow.");
109
20.3M
  m_deposit += static_cast<int>(_i.deposit());
110
20.3M
  solAssert(m_currentCodeSection < m_codeSections.size());
111
20.3M
  auto& currentItems = m_codeSections.at(m_currentCodeSection).items;
112
20.3M
  currentItems.emplace_back(std::move(_i));
113
20.3M
  if (!currentItems.back().location().isValid() && m_currentSourceLocation.isValid())
114
20.1M
    currentItems.back().setLocation(m_currentSourceLocation);
115
20.3M
  currentItems.back().m_modifierDepth = m_currentModifierDepth;
116
20.3M
  return currentItems.back();
117
20.3M
}
118
119
unsigned Assembly::codeSize(unsigned subTagSize) const
120
86.6k
{
121
104k
  for (unsigned tagSize = subTagSize; true; ++tagSize)
122
104k
  {
123
104k
    size_t ret = 1;
124
104k
    for (auto const& i: m_data)
125
18.9k
      ret += i.second.size();
126
127
104k
    for (auto const& codeSection: m_codeSections)
128
104k
      for (AssemblyItem const& i: codeSection.items)
129
38.9M
        ret += i.bytesRequired(tagSize, m_evmVersion, Precision::Precise);
130
104k
    if (numberEncodingSize(ret) <= tagSize)
131
86.6k
      return static_cast<unsigned>(ret);
132
104k
  }
133
86.6k
}
134
135
void Assembly::importAssemblyItemsFromJSON(Json const& _code, std::vector<std::string> const& _sourceList)
136
0
{
137
  // Assembly constructor creates first code section with proper type and empty `items`
138
0
  solAssert(m_codeSections.size() == 1);
139
0
  solAssert(m_codeSections[0].items.empty());
140
  // TODO: Add support for EOF and more than one code sections.
141
0
  solUnimplementedAssert(!m_eofVersion.has_value(), "Assembly import for EOF is not yet implemented.");
142
0
  solRequire(_code.is_array(), AssemblyImportException, "Supplied JSON is not an array.");
143
0
  for (auto jsonItemIter = std::begin(_code); jsonItemIter != std::end(_code); ++jsonItemIter)
144
0
  {
145
0
    AssemblyItem const& newItem = m_codeSections[0].items.emplace_back(createAssemblyItemFromJSON(*jsonItemIter, _sourceList));
146
0
    if (newItem == Instruction::JUMPDEST)
147
0
      solThrow(AssemblyImportException, "JUMPDEST instruction without a tag");
148
0
    else if (newItem.type() == AssemblyItemType::Tag)
149
0
    {
150
0
      ++jsonItemIter;
151
0
      if (jsonItemIter != std::end(_code) && createAssemblyItemFromJSON(*jsonItemIter, _sourceList) != Instruction::JUMPDEST)
152
0
        solThrow(AssemblyImportException, "JUMPDEST expected after tag.");
153
0
    }
154
0
  }
155
0
}
156
157
AssemblyItem Assembly::createAssemblyItemFromJSON(Json const& _json, std::vector<std::string> const& _sourceList)
158
0
{
159
0
  solRequire(_json.is_object(), AssemblyImportException, "Supplied JSON is not an object.");
160
0
  static std::set<std::string> const validMembers{"name", "begin", "end", "source", "value", "modifierDepth", "jumpType"};
161
0
  for (auto const& [member, _]: _json.items())
162
0
    solRequire(
163
0
      validMembers.count(member),
164
0
      AssemblyImportException,
165
0
      fmt::format(
166
0
        "Unknown member '{}'. Valid members are: {}.",
167
0
        member,
168
0
        solidity::util::joinHumanReadable(validMembers, ", ")
169
0
      )
170
0
    );
171
0
  solRequire(isOfType<std::string>(_json["name"]), AssemblyImportException, "Member 'name' missing or not of type string.");
172
0
  solRequire(isOfTypeIfExists<int>(_json, "begin"), AssemblyImportException, "Optional member 'begin' not of type int.");
173
0
  solRequire(isOfTypeIfExists<int>(_json, "end"), AssemblyImportException, "Optional member 'end' not of type int.");
174
0
  solRequire(isOfTypeIfExists<int>(_json, "source"), AssemblyImportException, "Optional member 'source' not of type int.");
175
0
  solRequire(isOfTypeIfExists<std::string>(_json, "value"), AssemblyImportException, "Optional member 'value' not of type string.");
176
0
  solRequire(isOfTypeIfExists<int>(_json, "modifierDepth"), AssemblyImportException, "Optional member 'modifierDepth' not of type int.");
177
0
  solRequire(isOfTypeIfExists<std::string>(_json, "jumpType"), AssemblyImportException, "Optional member 'jumpType' not of type string.");
178
179
0
  std::string name = get<std::string>(_json["name"]);
180
0
  solRequire(!name.empty(), AssemblyImportException, "Member 'name' is empty.");
181
182
0
  SourceLocation location;
183
0
  if (_json.contains("begin"))
184
0
    location.start = get<int>(_json["begin"]);
185
0
  if (_json.contains("end"))
186
0
    location.end = get<int>(_json["end"]);
187
0
  int srcIndex = getOrDefault<int>(_json, "source", -1);
188
0
  size_t modifierDepth = static_cast<size_t>(getOrDefault<int>(_json, "modifierDepth", 0));
189
0
  std::string value = getOrDefault<std::string>(_json, "value", "");
190
0
  std::string jumpType = getOrDefault<std::string>(_json, "jumpType", "");
191
192
0
  auto updateUsedTags = [&](u256 const& data)
193
0
  {
194
0
    m_usedTags = std::max(m_usedTags, static_cast<unsigned>(data) + 1);
195
0
    return data;
196
0
  };
197
198
0
  auto storeImmutableHash = [&](std::string const& _immutableName) -> h256
199
0
  {
200
0
    h256 hash(util::keccak256(_immutableName));
201
0
    solAssert(m_immutables.count(hash) == 0 || m_immutables[hash] == _immutableName);
202
0
    m_immutables[hash] = _immutableName;
203
0
    return hash;
204
0
  };
205
206
0
  auto storeLibraryHash = [&](std::string const& _libraryName) -> h256
207
0
  {
208
0
    h256 hash(util::keccak256(_libraryName));
209
0
    solAssert(m_libraries.count(hash) == 0 || m_libraries[hash] == _libraryName);
210
0
    m_libraries[hash] = _libraryName;
211
0
    return hash;
212
0
  };
213
214
0
  auto requireValueDefinedForInstruction = [&](std::string const& _name, std::string const& _value)
215
0
  {
216
0
    solRequire(
217
0
      !_value.empty(),
218
0
      AssemblyImportException,
219
0
      "Member 'value' is missing for instruction '" + _name + "', but the instruction needs a value."
220
0
    );
221
0
  };
222
223
0
  auto requireValueUndefinedForInstruction = [&](std::string const& _name, std::string const& _value)
224
0
  {
225
0
    solRequire(
226
0
      _value.empty(),
227
0
      AssemblyImportException,
228
0
      "Member 'value' defined for instruction '" + _name + "', but the instruction does not need a value."
229
0
    );
230
0
  };
231
232
0
  solRequire(srcIndex >= -1 && srcIndex < static_cast<int>(_sourceList.size()), AssemblyImportException, "Source index out of bounds.");
233
0
  if (srcIndex != -1)
234
0
    location.sourceName = sharedSourceName(_sourceList[static_cast<size_t>(srcIndex)]);
235
236
0
  AssemblyItem result(0);
237
238
0
  if (c_instructions.count(name))
239
0
  {
240
0
    AssemblyItem item{c_instructions.at(name), langutil::DebugData::create(location)};
241
0
    if (!jumpType.empty())
242
0
    {
243
0
      if (item.instruction() == Instruction::JUMP || item.instruction() == Instruction::JUMPI)
244
0
      {
245
0
        std::optional<AssemblyItem::JumpType> parsedJumpType = AssemblyItem::parseJumpType(jumpType);
246
0
        if (!parsedJumpType.has_value())
247
0
          solThrow(AssemblyImportException, "Invalid jump type.");
248
0
        item.setJumpType(parsedJumpType.value());
249
0
      }
250
0
      else
251
0
        solThrow(
252
0
          AssemblyImportException,
253
0
          "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')"
254
0
        );
255
0
    }
256
0
    requireValueUndefinedForInstruction(name, value);
257
0
    result = item;
258
0
  }
259
0
  else
260
0
  {
261
0
    solRequire(
262
0
      jumpType.empty(),
263
0
      AssemblyImportException,
264
0
      "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')"
265
0
    );
266
0
    if (name == "PUSH")
267
0
    {
268
0
      requireValueDefinedForInstruction(name, value);
269
0
      result = {AssemblyItemType::Push, u256("0x" + value)};
270
0
    }
271
0
    else if (name == "PUSH [ErrorTag]")
272
0
    {
273
0
      requireValueUndefinedForInstruction(name, value);
274
0
      result = {AssemblyItemType::PushTag, 0};
275
0
    }
276
0
    else if (name == "PUSH [tag]")
277
0
    {
278
0
      requireValueDefinedForInstruction(name, value);
279
0
      result = {AssemblyItemType::PushTag, updateUsedTags(u256(value))};
280
0
    }
281
0
    else if (name == "PUSH [$]")
282
0
    {
283
0
      requireValueDefinedForInstruction(name, value);
284
0
      result = {AssemblyItemType::PushSub, u256("0x" + value)};
285
0
    }
286
0
    else if (name == "PUSH #[$]")
287
0
    {
288
0
      requireValueDefinedForInstruction(name, value);
289
0
      result = {AssemblyItemType::PushSubSize, u256("0x" + value)};
290
0
    }
291
0
    else if (name == "PUSHSIZE")
292
0
    {
293
0
      requireValueUndefinedForInstruction(name, value);
294
0
      result = {AssemblyItemType::PushProgramSize, 0};
295
0
    }
296
0
    else if (name == "PUSHLIB")
297
0
    {
298
0
      requireValueDefinedForInstruction(name, value);
299
0
      result = {AssemblyItemType::PushLibraryAddress, storeLibraryHash(value)};
300
0
    }
301
0
    else if (name == "PUSHDEPLOYADDRESS")
302
0
    {
303
0
      requireValueUndefinedForInstruction(name, value);
304
0
      result = {AssemblyItemType::PushDeployTimeAddress, 0};
305
0
    }
306
0
    else if (name == "PUSHIMMUTABLE")
307
0
    {
308
0
      requireValueDefinedForInstruction(name, value);
309
0
      result = {AssemblyItemType::PushImmutable, storeImmutableHash(value)};
310
0
    }
311
0
    else if (name == "ASSIGNIMMUTABLE")
312
0
    {
313
0
      requireValueDefinedForInstruction(name, value);
314
0
      result = {AssemblyItemType::AssignImmutable, storeImmutableHash(value)};
315
0
    }
316
0
    else if (name == "tag")
317
0
    {
318
0
      requireValueDefinedForInstruction(name, value);
319
0
      result = {AssemblyItemType::Tag, updateUsedTags(u256(value))};
320
0
    }
321
0
    else if (name == "PUSH data")
322
0
    {
323
0
      requireValueDefinedForInstruction(name, value);
324
0
      result = {AssemblyItemType::PushData, u256("0x" + value)};
325
0
    }
326
0
    else if (name == "VERBATIM")
327
0
    {
328
0
      requireValueDefinedForInstruction(name, value);
329
0
      AssemblyItem item(fromHex(value), 0, 0);
330
0
      result = item;
331
0
    }
332
0
    else
333
0
      solThrow(AssemblyImportException, "Invalid opcode (" + name + ")");
334
0
  }
335
0
  result.setLocation(location);
336
0
  result.m_modifierDepth = modifierDepth;
337
0
  return result;
338
0
}
339
340
namespace
341
{
342
343
std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location)
344
0
{
345
0
  if (!_location.hasText() || _sourceCodes.empty())
346
0
    return {};
347
348
0
  auto it = _sourceCodes.find(*_location.sourceName);
349
0
  if (it == _sourceCodes.end())
350
0
    return {};
351
352
0
  return CharStream::singleLineSnippet(it->second, _location);
353
0
}
354
355
class Functionalizer
356
{
357
public:
358
  Functionalizer (std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes, Assembly const& _assembly):
359
    m_out(_out), m_prefix(_prefix), m_sourceCodes(_sourceCodes), m_assembly(_assembly)
360
0
  {}
361
362
  void feed(AssemblyItem const& _item, DebugInfoSelection const& _debugInfoSelection)
363
0
  {
364
0
    if (_item.location().isValid() && _item.location() != m_location)
365
0
    {
366
0
      flush();
367
0
      m_location = _item.location();
368
0
      printLocation(_debugInfoSelection);
369
0
    }
370
371
0
    std::string expression = _item.toAssemblyText(m_assembly);
372
373
0
    if (!(
374
0
      _item.canBeFunctional() &&
375
0
      _item.returnValues() <= 1 &&
376
0
      _item.arguments() <= m_pending.size()
377
0
    ))
378
0
    {
379
0
      flush();
380
0
      m_out << m_prefix << (_item.type() == Tag ? "" : "  ") << expression << std::endl;
381
0
      return;
382
0
    }
383
0
    if (_item.arguments() > 0)
384
0
    {
385
0
      expression += "(";
386
0
      for (size_t i = 0; i < _item.arguments(); ++i)
387
0
      {
388
0
        expression += m_pending.back();
389
0
        m_pending.pop_back();
390
0
        if (i + 1 < _item.arguments())
391
0
          expression += ", ";
392
0
      }
393
0
      expression += ")";
394
0
    }
395
396
0
    m_pending.push_back(expression);
397
0
    if (_item.returnValues() != 1)
398
0
      flush();
399
0
  }
400
401
  void flush()
402
0
  {
403
0
    for (std::string const& expression: m_pending)
404
0
      m_out << m_prefix << "  " << expression << std::endl;
405
0
    m_pending.clear();
406
0
  }
407
408
  void printLocation(DebugInfoSelection const& _debugInfoSelection)
409
0
  {
410
0
    if (!m_location.isValid() || (!_debugInfoSelection.location && !_debugInfoSelection.snippet))
411
0
      return;
412
413
0
    m_out << m_prefix << "    /*";
414
415
0
    if (_debugInfoSelection.location)
416
0
    {
417
0
      if (m_location.sourceName)
418
0
        m_out << " " + escapeAndQuoteString(*m_location.sourceName);
419
0
      if (m_location.hasText())
420
0
        m_out << ":" << std::to_string(m_location.start) + ":" + std::to_string(m_location.end);
421
0
    }
422
423
0
    if (_debugInfoSelection.snippet)
424
0
    {
425
0
      if (_debugInfoSelection.location)
426
0
        m_out << "  ";
427
428
0
      m_out << locationFromSources(m_sourceCodes, m_location);
429
0
    }
430
431
0
    m_out << " */" << std::endl;
432
0
  }
433
434
private:
435
  strings m_pending;
436
  SourceLocation m_location;
437
438
  std::ostream& m_out;
439
  std::string const& m_prefix;
440
  StringMap const& m_sourceCodes;
441
  Assembly const& m_assembly;
442
};
443
444
}
445
446
void Assembly::assemblyStream(
447
  std::ostream& _out,
448
  DebugInfoSelection const& _debugInfoSelection,
449
  std::string const& _prefix,
450
  StringMap const& _sourceCodes
451
) const
452
0
{
453
0
  Functionalizer f(_out, _prefix, _sourceCodes, *this);
454
455
0
  for (auto const& i: m_codeSections.front().items)
456
0
    f.feed(i, _debugInfoSelection);
457
0
  f.flush();
458
459
0
  for (size_t i = 1; i < m_codeSections.size(); ++i)
460
0
  {
461
0
    _out << std::endl << _prefix << "code_section_" << i << ": assembly {\n";
462
0
    Functionalizer codeSectionF(_out, _prefix + "    ", _sourceCodes, *this);
463
0
    for (auto const& item: m_codeSections[i].items)
464
0
      codeSectionF.feed(item, _debugInfoSelection);
465
0
    codeSectionF.flush();
466
0
    _out << _prefix << "}" << std::endl;
467
0
  }
468
469
0
  if (!m_data.empty() || !m_subs.empty())
470
0
  {
471
0
    _out << _prefix << "stop" << std::endl;
472
0
    for (auto const& i: m_data)
473
0
      if (u256(i.first) >= m_subs.size())
474
0
        _out << _prefix << "data_" << toHex(u256(i.first)) << " " << util::toHex(i.second) << std::endl;
475
476
0
    for (size_t i = 0; i < m_subs.size(); ++i)
477
0
    {
478
0
      _out << std::endl << _prefix << "sub_" << i << ": assembly {\n";
479
0
      m_subs[i]->assemblyStream(_out, _debugInfoSelection, _prefix + "    ", _sourceCodes);
480
0
      _out << _prefix << "}" << std::endl;
481
0
    }
482
0
  }
483
484
0
  if (m_auxiliaryData.size() > 0)
485
0
    _out << std::endl << _prefix << "auxdata: 0x" << util::toHex(m_auxiliaryData) << std::endl;
486
0
}
487
488
std::string Assembly::assemblyString(
489
  DebugInfoSelection const& _debugInfoSelection,
490
  StringMap const& _sourceCodes
491
) const
492
0
{
493
0
  std::ostringstream tmp;
494
0
  assemblyStream(tmp, _debugInfoSelection, "", _sourceCodes);
495
0
  return (_debugInfoSelection.ethdebug ? "/// ethdebug: enabled\n" : "") + tmp.str();
496
0
}
497
498
Json Assembly::assemblyJSON(std::map<std::string, unsigned> const& _sourceIndices, bool _includeSourceList) const
499
0
{
500
0
  Json root;
501
0
  root[".code"] = Json::array();
502
0
  Json& code = root[".code"];
503
  // TODO: support EOF
504
0
  solUnimplementedAssert(!m_eofVersion.has_value(), "Assembly output for EOF is not yet implemented.");
505
0
  solAssert(m_codeSections.size() == 1);
506
0
  for (AssemblyItem const& item: m_codeSections.front().items)
507
0
  {
508
0
    int sourceIndex = -1;
509
0
    if (item.location().sourceName)
510
0
    {
511
0
      auto iter = _sourceIndices.find(*item.location().sourceName);
512
0
      if (iter != _sourceIndices.end())
513
0
        sourceIndex = static_cast<int>(iter->second);
514
0
    }
515
516
0
    auto [name, data] = item.nameAndData(m_evmVersion);
517
0
    Json jsonItem;
518
0
    jsonItem["name"] = name;
519
0
    jsonItem["begin"] = item.location().start;
520
0
    jsonItem["end"] = item.location().end;
521
0
    if (item.m_modifierDepth != 0)
522
0
      jsonItem["modifierDepth"] = static_cast<int>(item.m_modifierDepth);
523
0
    std::string jumpType = item.getJumpTypeAsString();
524
0
    if (!jumpType.empty())
525
0
      jsonItem["jumpType"] = jumpType;
526
0
    if (name == "PUSHLIB")
527
0
      data = m_libraries.at(h256(data));
528
0
    else if (name == "PUSHIMMUTABLE" || name == "ASSIGNIMMUTABLE")
529
0
      data = m_immutables.at(h256(data));
530
0
    if (!data.empty())
531
0
      jsonItem["value"] = data;
532
0
    jsonItem["source"] = sourceIndex;
533
0
    code.emplace_back(std::move(jsonItem));
534
535
0
    if (item.type() == AssemblyItemType::Tag)
536
0
    {
537
0
      Json jumpdest;
538
0
      jumpdest["name"] = "JUMPDEST";
539
0
      jumpdest["begin"] = item.location().start;
540
0
      jumpdest["end"] = item.location().end;
541
0
      jumpdest["source"] = sourceIndex;
542
0
      if (item.m_modifierDepth != 0)
543
0
        jumpdest["modifierDepth"] = static_cast<int>(item.m_modifierDepth);
544
0
      code.emplace_back(std::move(jumpdest));
545
0
    }
546
0
  }
547
0
  if (_includeSourceList)
548
0
  {
549
0
    root["sourceList"] = Json::array();
550
0
    Json& jsonSourceList = root["sourceList"];
551
0
    unsigned maxSourceIndex = 0;
552
0
    for (auto const& [sourceName, sourceIndex]: _sourceIndices)
553
0
    {
554
0
      maxSourceIndex = std::max(sourceIndex, maxSourceIndex);
555
0
      jsonSourceList[sourceIndex] = sourceName;
556
0
    }
557
0
    solAssert(maxSourceIndex + 1 >= _sourceIndices.size());
558
0
    solRequire(
559
0
      _sourceIndices.size() == 0 || _sourceIndices.size() == maxSourceIndex + 1,
560
0
      AssemblyImportException,
561
0
      "The 'sourceList' array contains invalid 'null' item."
562
0
    );
563
0
  }
564
565
0
  if (!m_data.empty() || !m_subs.empty())
566
0
  {
567
0
    root[".data"] = Json::object();
568
0
    Json& data = root[".data"];
569
0
    for (auto const& i: m_data)
570
0
      if (u256(i.first) >= m_subs.size())
571
0
        data[util::toHex(toBigEndian((u256)i.first), util::HexPrefix::DontAdd, util::HexCase::Upper)] = util::toHex(i.second);
572
573
0
    for (size_t i = 0; i < m_subs.size(); ++i)
574
0
    {
575
0
      std::stringstream hexStr;
576
0
      hexStr << std::hex << i;
577
0
      data[hexStr.str()] = m_subs[i]->assemblyJSON(_sourceIndices, /*_includeSourceList = */false);
578
0
    }
579
0
  }
580
581
0
  if (!m_auxiliaryData.empty())
582
0
    root[".auxdata"] = util::toHex(m_auxiliaryData);
583
584
0
  return root;
585
0
}
586
587
std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSON(
588
  Json const& _json,
589
  std::vector<std::string> const& _sourceList,
590
  size_t _level,
591
  std::optional<uint8_t> _eofVersion
592
)
593
0
{
594
0
  solRequire(_json.is_object(), AssemblyImportException, "Supplied JSON is not an object.");
595
0
  static std::set<std::string> const validMembers{".code", ".data", ".auxdata", "sourceList"};
596
0
  for (auto const& [attribute, _]: _json.items())
597
0
    solRequire(validMembers.count(attribute), AssemblyImportException, "Unknown attribute '" + attribute + "'.");
598
599
0
  if (_level == 0)
600
0
  {
601
0
    if (_json.contains("sourceList"))
602
0
    {
603
0
      solRequire(_json["sourceList"].is_array(), AssemblyImportException, "Optional member 'sourceList' is not an array.");
604
0
      for (Json const& sourceName: _json["sourceList"])
605
0
      {
606
0
        solRequire(!sourceName.is_null(), AssemblyImportException, "The 'sourceList' array contains invalid 'null' item.");
607
0
        solRequire(
608
0
          sourceName.is_string(),
609
0
          AssemblyImportException,
610
0
          "The 'sourceList' array contains an item that is not a string."
611
0
        );
612
0
      }
613
0
    }
614
0
  }
615
0
  else
616
0
    solRequire(
617
0
      !_json.contains("sourceList"),
618
0
      AssemblyImportException,
619
0
      "Member 'sourceList' may only be present in the root JSON object."
620
0
    );
621
622
0
  auto result = std::make_shared<Assembly>(EVMVersion{}, _level == 0 /* _creation */, _eofVersion, "" /* _name */);
623
0
  std::vector<std::string> parsedSourceList;
624
0
  if (_json.contains("sourceList"))
625
0
  {
626
0
    solAssert(_level == 0);
627
0
    solAssert(_sourceList.empty());
628
0
    for (Json const& sourceName: _json["sourceList"])
629
0
    {
630
0
      solRequire(
631
0
        std::find(parsedSourceList.begin(), parsedSourceList.end(), sourceName.get<std::string>()) == parsedSourceList.end(),
632
0
        AssemblyImportException,
633
0
        "Items in 'sourceList' array are not unique."
634
0
      );
635
0
      parsedSourceList.emplace_back(sourceName.get<std::string>());
636
0
    }
637
0
  }
638
639
0
  solRequire(_json.contains(".code"), AssemblyImportException, "Member '.code' is missing.");
640
0
  solRequire(_json[".code"].is_array(), AssemblyImportException, "Member '.code' is not an array.");
641
0
  for (Json const& codeItem: _json[".code"])
642
0
    solRequire(codeItem.is_object(), AssemblyImportException, "The '.code' array contains an item that is not an object.");
643
644
0
  result->importAssemblyItemsFromJSON(_json[".code"], _level == 0 ? parsedSourceList : _sourceList);
645
646
0
  if (_json.contains(".auxdata"))
647
0
  {
648
0
    solRequire(_json[".auxdata"].is_string(), AssemblyImportException, "Optional member '.auxdata' is not a string.");
649
0
    result->m_auxiliaryData = fromHex(_json[".auxdata"].get<std::string>());
650
0
    solRequire(!result->m_auxiliaryData.empty(), AssemblyImportException, "Optional member '.auxdata' is not a valid hexadecimal string.");
651
0
  }
652
653
0
  if (_json.contains(".data"))
654
0
  {
655
0
    solRequire(_json[".data"].is_object(), AssemblyImportException, "Optional member '.data' is not an object.");
656
0
    Json const& data = _json[".data"];
657
0
    std::map<size_t, std::shared_ptr<Assembly>> subAssemblies;
658
0
    for (auto const& [key, value] : data.items())
659
0
    {
660
0
      if (value.is_string())
661
0
      {
662
0
        solRequire(
663
0
          value.get<std::string>().empty() || !fromHex(value.get<std::string>()).empty(),
664
0
          AssemblyImportException,
665
0
          "The value for key '" + key + "' inside '.data' is not a valid hexadecimal string."
666
0
        );
667
0
        result->m_data[h256(fromHex(key))] = fromHex(value.get<std::string>());
668
0
      }
669
0
      else if (value.is_object())
670
0
      {
671
0
        size_t index{};
672
0
        try
673
0
        {
674
          // Using signed variant because stoul() still accepts negative numbers and
675
          // just lets them wrap around.
676
0
          int parsedDataItemID = std::stoi(key, nullptr, 16);
677
0
          solRequire(parsedDataItemID >= 0, AssemblyImportException, "The key '" + key + "' inside '.data' is out of the supported integer range.");
678
0
          index = static_cast<size_t>(parsedDataItemID);
679
0
        }
680
0
        catch (std::invalid_argument const&)
681
0
        {
682
0
          solThrow(AssemblyImportException, "The key '" + key + "' inside '.data' is not an integer.");
683
0
        }
684
0
        catch (std::out_of_range const&)
685
0
        {
686
0
          solThrow(AssemblyImportException, "The key '" + key + "' inside '.data' is out of the supported integer range.");
687
0
        }
688
689
0
        auto [subAssembly, emptySourceList] = Assembly::fromJSON(value, _level == 0 ? parsedSourceList : _sourceList, _level + 1, _eofVersion);
690
0
        solAssert(subAssembly);
691
0
        solAssert(emptySourceList.empty());
692
0
        solAssert(subAssemblies.count(index) == 0);
693
0
        subAssemblies[index] = subAssembly;
694
0
      }
695
0
      else
696
0
        solThrow(AssemblyImportException, "The value of key '" + key + "' inside '.data' is neither a hex string nor an object.");
697
0
    }
698
699
0
    if (!subAssemblies.empty())
700
0
      solRequire(
701
0
        ranges::max(subAssemblies | ranges::views::keys) == subAssemblies.size() - 1,
702
0
        AssemblyImportException,
703
0
        fmt::format(
704
0
          "Invalid subassembly indices in '.data'. Not all numbers between 0 and {} are present.",
705
0
          subAssemblies.size() - 1
706
0
        )
707
0
      );
708
709
0
    result->m_subs = subAssemblies | ranges::views::values | ranges::to<std::vector>;
710
0
  }
711
712
0
  if (_level == 0)
713
0
    result->encodeAllPossibleSubPathsInAssemblyTree();
714
715
0
  return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{});
716
0
}
717
718
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
719
0
{
720
0
  _assembliesOnPath.push_back(this);
721
0
  for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back())
722
0
  {
723
0
    for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot)
724
0
      _assembliesOnPath[distanceFromRoot]->encodeSubPath(
725
0
        _pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector>
726
0
      );
727
728
0
    m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
729
0
  }
730
0
}
731
732
std::shared_ptr<std::string const> Assembly::sharedSourceName(std::string const& _name) const
733
0
{
734
0
  if (s_sharedSourceNames.find(_name) == s_sharedSourceNames.end())
735
0
    s_sharedSourceNames[_name] = std::make_shared<std::string>(_name);
736
737
0
  return s_sharedSourceNames[_name];
738
0
}
739
740
AssemblyItem Assembly::namedTag(std::string const& _name, size_t _params, size_t _returns, std::optional<uint64_t> _sourceID)
741
558k
{
742
558k
  assertThrow(!_name.empty(), AssemblyException, "Empty named tag.");
743
558k
  if (m_namedTags.count(_name))
744
220k
  {
745
220k
    assertThrow(m_namedTags.at(_name).params == _params, AssemblyException, "");
746
220k
    assertThrow(m_namedTags.at(_name).returns == _returns, AssemblyException, "");
747
220k
    assertThrow(m_namedTags.at(_name).sourceID == _sourceID, AssemblyException, "");
748
220k
  }
749
337k
  else
750
337k
    m_namedTags[_name] = {static_cast<size_t>(newTag().data()), _sourceID, _params, _returns};
751
558k
  return AssemblyItem{Tag, m_namedTags.at(_name).id};
752
558k
}
753
754
AssemblyItem Assembly::newFunctionCall(uint16_t _functionID) const
755
0
{
756
0
  solAssert(_functionID < m_codeSections.size(), "Call to undeclared function.");
757
0
  solAssert(_functionID > 0, "Cannot call section 0");
758
0
  auto const& section = m_codeSections.at(_functionID);
759
0
  if (section.nonReturning)
760
0
    return AssemblyItem::jumpToFunction(_functionID, section.inputs, section.outputs);
761
0
  else
762
0
    return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs);
763
0
}
764
765
AssemblyItem Assembly::newFunctionReturn() const
766
0
{
767
0
  solAssert(m_currentCodeSection != 0, "Appending function return without begin function.");
768
0
  return AssemblyItem::functionReturn();
769
0
}
770
771
uint16_t Assembly::createFunction(uint8_t _args, uint8_t _rets, bool _nonReturning)
772
0
{
773
0
  size_t functionID = m_codeSections.size();
774
0
  solRequire(functionID < 1024, AssemblyException, "Too many functions for EOF");
775
0
  solAssert(m_currentCodeSection == 0, "Functions need to be declared from the main block.");
776
0
  solRequire(_rets <= 127, AssemblyException, "Too many function returns.");
777
0
  solRequire(_args <= 127, AssemblyException, "Too many function inputs.");
778
0
  m_codeSections.emplace_back(CodeSection{_args, _rets, _nonReturning, {}});
779
0
  return static_cast<uint16_t>(functionID);
780
0
}
781
782
void Assembly::beginFunction(uint16_t _functionID)
783
0
{
784
0
  solAssert(m_currentCodeSection == 0, "Attempted to begin a function before ending the last one.");
785
0
  solAssert(_functionID != 0, "Attempt to begin a function with id 0");
786
0
  solAssert(_functionID < m_codeSections.size(), "Attempt to begin an undeclared function.");
787
0
  auto& section = m_codeSections.at(_functionID);
788
0
  solAssert(section.items.empty(), "Function already defined.");
789
0
  m_currentCodeSection = _functionID;
790
0
}
791
void Assembly::endFunction()
792
0
{
793
0
  solAssert(m_currentCodeSection != 0, "End function without begin function.");
794
0
  m_currentCodeSection = 0;
795
0
}
796
797
AssemblyItem Assembly::newPushLibraryAddress(std::string const& _identifier)
798
2.01k
{
799
2.01k
  h256 h(util::keccak256(_identifier));
800
2.01k
  m_libraries[h] = _identifier;
801
2.01k
  return AssemblyItem{PushLibraryAddress, h};
802
2.01k
}
803
804
AssemblyItem Assembly::newPushImmutable(std::string const& _identifier)
805
238
{
806
238
  h256 h(util::keccak256(_identifier));
807
238
  m_immutables[h] = _identifier;
808
238
  return AssemblyItem{PushImmutable, h};
809
238
}
810
811
AssemblyItem Assembly::newImmutableAssignment(std::string const& _identifier)
812
450
{
813
450
  h256 h(util::keccak256(_identifier));
814
450
  m_immutables[h] = _identifier;
815
450
  return AssemblyItem{AssignImmutable, h};
816
450
}
817
818
AssemblyItem Assembly::newAuxDataLoadN(size_t _offset) const
819
0
{
820
0
  return AssemblyItem{AuxDataLoadN, _offset};
821
0
}
822
823
AssemblyItem Assembly::newSwapN(size_t _depth) const
824
0
{
825
0
  return AssemblyItem::swapN(_depth);
826
0
}
827
828
AssemblyItem Assembly::newDupN(size_t _depth) const
829
0
{
830
0
  return AssemblyItem::dupN(_depth);
831
0
}
832
833
Assembly& Assembly::optimise(OptimiserSettings const& _settings)
834
59.1k
{
835
59.1k
  optimiseInternal(_settings, {});
836
59.1k
  return *this;
837
59.1k
}
838
839
std::map<u256, u256> const& Assembly::optimiseInternal(
840
  OptimiserSettings const& _settings,
841
  std::set<size_t> _tagsReferencedFromOutside
842
)
843
80.2k
{
844
80.2k
  if (m_tagReplacements)
845
180
    return *m_tagReplacements;
846
847
  // Run optimisation for sub-assemblies.
848
  // TODO: verify and double-check this for EOF.
849
101k
  for (size_t subId = 0; subId < m_subs.size(); ++subId)
850
21.1k
  {
851
21.1k
    OptimiserSettings settings = _settings;
852
21.1k
    Assembly& sub = *m_subs[subId];
853
21.1k
    std::set<size_t> referencedTags;
854
21.1k
    for (auto& codeSection: m_codeSections)
855
21.1k
      referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId);
856
21.1k
    std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal(
857
21.1k
      settings,
858
21.1k
      referencedTags
859
21.1k
    );
860
    // Apply the replacements (can be empty).
861
21.1k
    for (auto& codeSection: m_codeSections)
862
21.1k
      BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId);
863
21.1k
  }
864
865
80.0k
  std::map<u256, u256> tagReplacements;
866
  // Iterate until no new optimisation possibilities are found.
867
285k
  for (unsigned count = 1; count > 0;)
868
205k
  {
869
205k
    count = 0;
870
871
    // TODO: verify this for EOF.
872
205k
    if (_settings.runInliner && !m_eofVersion.has_value())
873
106k
    {
874
106k
      solAssert(m_codeSections.size() == 1);
875
106k
      Inliner{
876
106k
        m_codeSections.front().items,
877
106k
        _tagsReferencedFromOutside,
878
106k
        _settings.expectedExecutionsPerDeployment,
879
106k
        isCreation(),
880
106k
        m_evmVersion
881
106k
      }.optimise();
882
106k
    }
883
    // TODO: verify this for EOF.
884
205k
    if (_settings.runJumpdestRemover && !m_eofVersion.has_value())
885
205k
    {
886
205k
      for (auto& codeSection: m_codeSections)
887
205k
      {
888
205k
        JumpdestRemover jumpdestOpt{codeSection.items};
889
205k
        if (jumpdestOpt.optimise(_tagsReferencedFromOutside))
890
91.1k
          count++;
891
205k
      }
892
205k
    }
893
894
205k
    if (_settings.runPeephole)
895
205k
    {
896
205k
      for (auto& codeSection: m_codeSections)
897
205k
      {
898
205k
        PeepholeOptimiser peepOpt{codeSection.items, m_evmVersion};
899
352k
        while (peepOpt.optimise())
900
146k
        {
901
146k
          count++;
902
146k
          assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck.");
903
146k
        }
904
205k
      }
905
205k
    }
906
907
    // This only modifies PushTags, we have to run again to actually remove code.
908
    // TODO: implement for EOF.
909
205k
    if (_settings.runDeduplicate && !m_eofVersion.has_value())
910
106k
      for (auto& section: m_codeSections)
911
106k
      {
912
106k
        BlockDeduplicator deduplicator{section.items};
913
106k
        if (deduplicator.deduplicate())
914
7.62k
        {
915
7.62k
          for (auto const& replacement: deduplicator.replacedTags())
916
90.7k
          {
917
90.7k
            assertThrow(
918
90.7k
              replacement.first <= std::numeric_limits<size_t>::max() && replacement.second <= std::numeric_limits<size_t>::max(),
919
90.7k
              OptimizerException,
920
90.7k
              "Invalid tag replacement."
921
90.7k
            );
922
90.7k
            assertThrow(
923
90.7k
              !tagReplacements.count(replacement.first),
924
90.7k
              OptimizerException,
925
90.7k
              "Replacement already known."
926
90.7k
            );
927
90.7k
            tagReplacements[replacement.first] = replacement.second;
928
90.7k
            if (_tagsReferencedFromOutside.erase(static_cast<size_t>(replacement.first)))
929
0
              _tagsReferencedFromOutside.insert(static_cast<size_t>(replacement.second));
930
90.7k
          }
931
7.62k
          count++;
932
7.62k
        }
933
106k
      }
934
935
    // TODO: investigate for EOF
936
205k
    if (_settings.runCSE && !m_eofVersion.has_value())
937
106k
    {
938
      // Control flow graph optimization has been here before but is disabled because it
939
      // assumes we only jump to tags that are pushed. This is not the case anymore with
940
      // function types that can be stored in storage.
941
106k
      AssemblyItems optimisedItems;
942
943
106k
      solAssert(m_codeSections.size() == 1);
944
106k
      auto& items = m_codeSections.front().items;
945
17.9M
      bool usesMSize = ranges::any_of(items, [](AssemblyItem const& _i) {
946
17.9M
        return _i == AssemblyItem{Instruction::MSIZE} || _i.type() == VerbatimBytecode;
947
17.9M
      });
948
949
106k
      auto iter = items.begin();
950
3.26M
      while (iter != items.end())
951
3.15M
      {
952
3.15M
        KnownState emptyState;
953
3.15M
        CommonSubexpressionEliminator eliminator{emptyState};
954
3.15M
        auto orig = iter;
955
3.15M
        iter = eliminator.feedItems(iter, items.end(), usesMSize);
956
3.15M
        bool shouldReplace = false;
957
3.15M
        AssemblyItems optimisedChunk;
958
3.15M
        try
959
3.15M
        {
960
3.15M
          optimisedChunk = eliminator.getOptimizedItems();
961
3.15M
          shouldReplace = (optimisedChunk.size() < static_cast<size_t>(iter - orig));
962
3.15M
        }
963
3.15M
        catch (StackTooDeepException const&)
964
3.15M
        {
965
          // This might happen if the opcode reconstruction is not as efficient
966
          // as the hand-crafted code.
967
1.72k
        }
968
3.15M
        catch (ItemNotAvailableException const&)
969
3.15M
        {
970
          // This might happen if e.g. associativity and commutativity rules
971
          // reorganise the expression tree, but not all leaves are available.
972
0
        }
973
974
3.15M
        if (shouldReplace)
975
135k
        {
976
135k
          count++;
977
135k
          optimisedItems += optimisedChunk;
978
135k
        }
979
3.01M
        else
980
3.01M
          copy(orig, iter, back_inserter(optimisedItems));
981
3.15M
      }
982
106k
      if (optimisedItems.size() < items.size())
983
37.3k
      {
984
37.3k
        items = std::move(optimisedItems);
985
37.3k
        count++;
986
37.3k
      }
987
106k
    }
988
205k
  }
989
990
  // TODO: investigate for EOF
991
80.0k
  if (_settings.runConstantOptimiser && !m_eofVersion.has_value())
992
39.3k
    ConstantOptimisationMethod::optimiseConstants(
993
39.3k
      isCreation(),
994
39.3k
      isCreation() ? 1 : _settings.expectedExecutionsPerDeployment,
995
39.3k
      m_evmVersion,
996
39.3k
      *this
997
39.3k
    );
998
999
80.0k
  m_tagReplacements = std::move(tagReplacements);
1000
80.0k
  return *m_tagReplacements;
1001
80.0k
}
1002
1003
namespace
1004
{
1005
template<typename ValueT>
1006
void setBigEndian(bytes& _dest, size_t _offset, size_t _size, ValueT _value)
1007
2.94M
{
1008
2.94M
  assertThrow(numberEncodingSize(_value) <= _size, AssemblyException, "");
1009
2.94M
  toBigEndian(_value, bytesRef(_dest.data() + _offset, _size));
1010
2.94M
}
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::setBigEndian<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long, unsigned long)
Assembly.cpp:void (anonymous namespace)::setBigEndian<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>)
Line
Count
Source
1007
2.94M
{
1008
2.94M
  assertThrow(numberEncodingSize(_value) <= _size, AssemblyException, "");
1009
2.94M
  toBigEndian(_value, bytesRef(_dest.data() + _offset, _size));
1010
2.94M
}
1011
1012
template<typename ValueT>
1013
void appendBigEndian(bytes& _dest, size_t _size, ValueT _value)
1014
2.94M
{
1015
2.94M
  _dest.resize(_dest.size() + _size);
1016
2.94M
  setBigEndian(_dest, _dest.size() - _size, _size, _value);
1017
2.94M
}
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndian<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, unsigned long)
Assembly.cpp:void (anonymous namespace)::appendBigEndian<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>)
Line
Count
Source
1014
2.94M
{
1015
2.94M
  _dest.resize(_dest.size() + _size);
1016
2.94M
  setBigEndian(_dest, _dest.size() - _size, _size, _value);
1017
2.94M
}
1018
1019
template<typename ValueT>
1020
void setBigEndianUint16(bytes& _dest, size_t _offset, ValueT _value)
1021
0
{
1022
0
  setBigEndian(_dest, _offset, 2, _value);
1023
0
}
1024
1025
template<typename ValueT>
1026
void appendBigEndianUint16(bytes& _dest, ValueT _value)
1027
0
{
1028
0
  static_assert(!std::numeric_limits<ValueT>::is_signed, "only unsigned types or bigint supported");
1029
0
  assertThrow(_value <= 0xFFFF, AssemblyException, "");
1030
0
  appendBigEndian(_dest, 2, static_cast<size_t>(_value));
1031
0
}
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned long>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long)
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned int>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned int)
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<unsigned short>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned short)
Unexecuted instantiation: Assembly.cpp:void (anonymous namespace)::appendBigEndianUint16<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0> >(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<256ul, 256ul, (boost::multiprecision::cpp_integer_type)0, (boost::multiprecision::cpp_int_check_type)0, void>, (boost::multiprecision::expression_template_option)0>)
1032
1033
// Calculates maximum stack height for given code section. According to EIP5450 https://eips.ethereum.org/EIPS/eip-5450
1034
uint16_t calculateMaxStackHeight(Assembly::CodeSection const& _section)
1035
0
{
1036
0
  static auto constexpr UNVISITED = std::numeric_limits<size_t>::max();
1037
1038
0
  AssemblyItems const& items = _section.items;
1039
0
  solAssert(!items.empty());
1040
0
  uint16_t overallMaxHeight = _section.inputs;
1041
0
  std::stack<size_t> worklist;
1042
0
  std::vector<size_t> maxStackHeights(items.size(), UNVISITED);
1043
1044
  // Init first item stack height to number of inputs to the code section
1045
  // maxStackHeights stores stack height for an item before the item execution
1046
0
  maxStackHeights[0] = _section.inputs;
1047
  // Push first item index to the worklist
1048
0
  worklist.push(0u);
1049
0
  while (!worklist.empty())
1050
0
  {
1051
0
    size_t idx = worklist.top();
1052
0
    worklist.pop();
1053
0
    AssemblyItem const& item = items[idx];
1054
0
    size_t stackHeightChange = item.deposit();
1055
0
    size_t currentMaxHeight = maxStackHeights[idx];
1056
0
    solAssert(currentMaxHeight != UNVISITED);
1057
1058
0
    std::vector<size_t> successors;
1059
1060
    // Add next instruction to successors for non-control-flow-changing instructions
1061
0
    if (
1062
0
      !(item.hasInstruction() && SemanticInformation::terminatesControlFlow(item.instruction())) &&
1063
0
      item.type() != RelativeJump &&
1064
0
      item.type() != RetF &&
1065
0
      item.type() != JumpF
1066
0
    )
1067
0
    {
1068
0
      solAssert(idx < items.size() - 1, "No terminating instruction.");
1069
0
      successors.emplace_back(idx + 1);
1070
0
    }
1071
1072
    // Add jumps destinations to successors
1073
    // TODO: Remember to add RJUMPV when it is supported.
1074
0
    if (item.type() == RelativeJump || item.type() == ConditionalRelativeJump)
1075
0
    {
1076
0
      auto const tagIt = std::find(items.begin(), items.end(), item.tag());
1077
0
      solAssert(tagIt != items.end(), "Tag not found.");
1078
0
      successors.emplace_back(static_cast<size_t>(std::distance(items.begin(), tagIt)));
1079
      // TODO: This assert fails until the code is not topologically sorted. Uncomment when sorting introduced.
1080
      // If backward jump the successor must be already visited.
1081
      // solAssert(idx <= successors.back() || maxStackHeights[successors.back()] != UNVISITED);
1082
0
    }
1083
1084
0
    solRequire(
1085
0
      currentMaxHeight + stackHeightChange <= std::numeric_limits<uint16_t>::max(),
1086
0
      AssemblyException,
1087
0
      "Stack overflow in EOF function."
1088
0
    );
1089
0
    overallMaxHeight = std::max(overallMaxHeight, static_cast<uint16_t>(currentMaxHeight + stackHeightChange));
1090
0
    currentMaxHeight += stackHeightChange;
1091
1092
    // Set stack height for all instruction successors
1093
0
    for (size_t successor: successors)
1094
0
    {
1095
0
      solAssert(successor < maxStackHeights.size());
1096
      // Set stack height for newly visited
1097
0
      if (maxStackHeights[successor] == UNVISITED)
1098
0
      {
1099
0
        maxStackHeights[successor] = currentMaxHeight;
1100
0
        worklist.push(successor);
1101
0
      }
1102
0
      else
1103
0
      {
1104
0
        solAssert(successor < maxStackHeights.size());
1105
        // For backward jump successor stack height must be equal
1106
0
        if (successor < idx)
1107
0
          solAssert(maxStackHeights[successor] == currentMaxHeight, "Stack height mismatch.");
1108
1109
        // If successor stack height is smaller update it and recalculate
1110
0
        if (currentMaxHeight > maxStackHeights[successor])
1111
0
        {
1112
0
          maxStackHeights[successor] = currentMaxHeight;
1113
0
          worklist.push(successor);
1114
0
        }
1115
0
      }
1116
0
    }
1117
0
  }
1118
0
  return overallMaxHeight;
1119
0
}
1120
}
1121
1122
std::tuple<bytes, std::vector<size_t>, size_t> Assembly::createEOFHeader(std::set<ContainerID> const& _referencedSubIds) const
1123
0
{
1124
0
  bytes retBytecode;
1125
0
  std::vector<size_t> codeSectionSizePositions;
1126
0
  size_t dataSectionSizePosition;
1127
1128
0
  retBytecode.push_back(0xef);
1129
0
  retBytecode.push_back(0x00);
1130
0
  retBytecode.push_back(0x01);                                        // version 1
1131
1132
0
  retBytecode.push_back(0x01);                                        // kind=type
1133
0
  appendBigEndianUint16(retBytecode, m_codeSections.size() * 4u);     // length of type section
1134
1135
0
  retBytecode.push_back(0x02);                                        // kind=code
1136
0
  appendBigEndianUint16(retBytecode, m_codeSections.size());          // placeholder for number of code sections
1137
1138
0
  for (auto const& codeSection: m_codeSections)
1139
0
  {
1140
0
    (void) codeSection;
1141
0
    codeSectionSizePositions.emplace_back(retBytecode.size());
1142
0
    appendBigEndianUint16(retBytecode, 0u);                         // placeholder for length of code
1143
0
  }
1144
1145
0
  if (!_referencedSubIds.empty())
1146
0
  {
1147
0
    retBytecode.push_back(0x03);
1148
0
    appendBigEndianUint16(retBytecode, _referencedSubIds.size());
1149
1150
0
    for (auto subId: _referencedSubIds)
1151
0
      appendBigEndianUint16(retBytecode, m_subs[subId]->assemble().bytecode.size());
1152
0
  }
1153
1154
0
  retBytecode.push_back(0x04);                                        // kind=data
1155
0
  dataSectionSizePosition = retBytecode.size();
1156
0
  appendBigEndianUint16(retBytecode, 0u);                             // length of data
1157
1158
0
  retBytecode.push_back(0x00);                                        // terminator
1159
1160
0
  for (auto const& codeSection: m_codeSections)
1161
0
  {
1162
0
    retBytecode.push_back(codeSection.inputs);
1163
    // According to EOF spec function output num equals 0x80 means non-returning function
1164
0
    retBytecode.push_back(codeSection.nonReturning ? 0x80 : codeSection.outputs);
1165
0
    appendBigEndianUint16(retBytecode, calculateMaxStackHeight(codeSection));
1166
0
  }
1167
1168
0
  return {retBytecode, codeSectionSizePositions, dataSectionSizePosition};
1169
0
}
1170
1171
LinkerObject const& Assembly::assemble() const
1172
159k
{
1173
159k
  solRequire(!m_invalid, AssemblyException, "Attempted to assemble invalid Assembly object.");
1174
  // Return the already assembled object, if present.
1175
159k
  if (!m_assembledObject.bytecode.empty())
1176
72.4k
    return m_assembledObject;
1177
1178
  // Otherwise ensure the object is actually clear.
1179
86.6k
  solRequire(m_assembledObject.linkReferences.empty(), AssemblyException, "Unexpected link references.");
1180
1181
86.6k
  bool const eof = m_eofVersion.has_value();
1182
86.6k
  solRequire(!eof || m_eofVersion == 1, AssemblyException, "Invalid EOF version.");
1183
1184
86.6k
  if (!eof)
1185
86.6k
    return assembleLegacy();
1186
0
  else
1187
0
    return assembleEOF();
1188
86.6k
}
1189
1190
[[nodiscard]] bytes Assembly::assembleOperation(AssemblyItem const& _item) const
1191
11.2M
{
1192
  // solidity::evmasm::Instructions underlying type is uint8_t
1193
  // TODO: Change to std::to_underlying since C++23
1194
11.2M
  return {static_cast<uint8_t>(_item.instruction())};
1195
11.2M
}
1196
1197
[[nodiscard]] bytes Assembly::assemblePush(AssemblyItem const& _item) const
1198
3.71M
{
1199
3.71M
  bytes ret;
1200
3.71M
  unsigned pushValueSize = numberEncodingSize(_item.data());
1201
3.71M
  if (pushValueSize == 0 && !m_evmVersion.hasPush0())
1202
234k
    pushValueSize = 1;
1203
1204
  // solidity::evmasm::Instructions underlying type is uint8_t
1205
  // TODO: Change to std::to_underlying since C++23
1206
3.71M
  ret.push_back(static_cast<uint8_t>(pushInstruction(pushValueSize)));
1207
3.71M
  if (pushValueSize > 0)
1208
2.94M
    appendBigEndian(ret, pushValueSize, _item.data());
1209
1210
3.71M
  return ret;
1211
3.71M
}
1212
1213
[[nodiscard]] std::pair<bytes, Assembly::LinkRef> Assembly::assemblePushLibraryAddress(AssemblyItem const& _item, size_t _pos) const
1214
1.76k
{
1215
1.76k
  return {
1216
    // solidity::evmasm::Instructions underlying type is uint8_t
1217
    // TODO: Change to std::to_underlying since C++23
1218
1.76k
    bytes(1, static_cast<uint8_t>(Instruction::PUSH20)) + bytes(20),
1219
1.76k
    {_pos + 1, m_libraries.at(_item.data())}
1220
1.76k
  };
1221
1.76k
}
1222
1223
[[nodiscard]] bytes Assembly::assembleVerbatimBytecode(AssemblyItem const& item) const
1224
1.17k
{
1225
1.17k
  return item.verbatimData();
1226
1.17k
}
1227
1228
[[nodiscard]] bytes Assembly::assemblePushDeployTimeAddress() const
1229
420
{
1230
  // solidity::evmasm::Instructions underlying type is uint8_t
1231
  // TODO: Change to std::to_underlying since C++23
1232
420
  return bytes(1, static_cast<uint8_t>(Instruction::PUSH20)) + bytes(20);
1233
420
}
1234
1235
[[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const
1236
1.76M
{
1237
1.76M
  solRequire(_item.data() != 0, AssemblyException, "Invalid tag position.");
1238
1.76M
  solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag.");
1239
1.76M
  solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large.");
1240
1.76M
  size_t tagId = static_cast<size_t>(_item.data());
1241
1.76M
  solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position.");
1242
1.76M
  m_tagPositionsInBytecode[tagId] = _pos;
1243
1244
  // solidity::evmasm::Instructions underlying type is uint8_t
1245
  // TODO: Change to std::to_underlying since C++23
1246
1.76M
  return _addJumpDest ? bytes(1, static_cast<uint8_t>(Instruction::JUMPDEST)) : bytes();
1247
1.76M
}
1248
1249
LinkerObject const& Assembly::assembleLegacy() const
1250
86.6k
{
1251
86.6k
  solAssert(!m_eofVersion.has_value());
1252
86.6k
  solAssert(!m_invalid);
1253
  // Return the already assembled object, if present.
1254
86.6k
  if (!m_assembledObject.bytecode.empty())
1255
0
    return m_assembledObject;
1256
  // Otherwise ensure the object is actually clear.
1257
86.6k
  solAssert(m_assembledObject.linkReferences.empty());
1258
1259
86.6k
  LinkerObject& ret = m_assembledObject;
1260
1261
86.6k
  size_t subTagSize = 1;
1262
86.6k
  std::map<u256, LinkerObject::ImmutableRefs> immutableReferencesBySub;
1263
86.6k
  for (auto const& sub: m_subs)
1264
22.0k
  {
1265
22.0k
    auto const& linkerObject = sub->assemble();
1266
22.0k
    if (!linkerObject.immutableReferences.empty())
1267
176
    {
1268
176
      assertThrow(
1269
176
        immutableReferencesBySub.empty(),
1270
176
        AssemblyException,
1271
176
        "More than one sub-assembly references immutables."
1272
176
      );
1273
176
      immutableReferencesBySub = linkerObject.immutableReferences;
1274
176
    }
1275
22.0k
    for (size_t tagPos: sub->m_tagPositionsInBytecode)
1276
1.54M
      if (tagPos != std::numeric_limits<size_t>::max() && numberEncodingSize(tagPos) > subTagSize)
1277
7.60k
        subTagSize = numberEncodingSize(tagPos);
1278
22.0k
  }
1279
1280
86.6k
  bool setsImmutables = false;
1281
86.6k
  bool pushesImmutables = false;
1282
1283
86.6k
  assertThrow(m_codeSections.size() == 1, AssemblyException, "Expected exactly one code section in non-EOF code.");
1284
86.6k
  AssemblyItems const& items = m_codeSections.front().items;
1285
1286
86.6k
  for (auto const& item: items)
1287
19.1M
    if (item.type() == AssignImmutable)
1288
450
    {
1289
450
      item.setImmutableOccurrences(immutableReferencesBySub[item.data()].second.size());
1290
450
      setsImmutables = true;
1291
450
    }
1292
19.1M
    else if (item.type() == PushImmutable)
1293
311
      pushesImmutables = true;
1294
86.6k
  if (setsImmutables || pushesImmutables)
1295
86.6k
    assertThrow(
1296
86.6k
      setsImmutables != pushesImmutables,
1297
86.6k
      AssemblyException,
1298
86.6k
      "Cannot push and assign immutables in the same assembly subroutine."
1299
86.6k
    );
1300
1301
86.6k
  unsigned bytesRequiredForCode = codeSize(static_cast<unsigned>(subTagSize));
1302
86.6k
  m_tagPositionsInBytecode = std::vector<size_t>(m_usedTags, std::numeric_limits<size_t>::max());
1303
86.6k
  unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode);
1304
  // Adjust bytesPerTag for references to sub assemblies.
1305
86.6k
  for (AssemblyItem const& item: items)
1306
19.1M
    if (item.type() == PushTag)
1307
2.34M
    {
1308
2.34M
      auto [subId, tagId] = item.splitForeignPushTag();
1309
2.34M
      if (subId == std::numeric_limits<size_t>::max())
1310
2.34M
        continue;
1311
89
      assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id");
1312
89
      auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId);
1313
89
      assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
1314
89
      bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition));
1315
89
    }
1316
1317
86.6k
  unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast<unsigned>(m_auxiliaryData.size());
1318
86.6k
  for (auto const& sub: m_subs)
1319
22.0k
    bytesRequiredIncludingData += static_cast<unsigned>(sub->assemble().bytecode.size());
1320
1321
86.6k
  unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData);
1322
86.6k
  ret.bytecode.reserve(bytesRequiredIncludingData);
1323
1324
86.6k
  TagRefs tagRefs;
1325
86.6k
  DataRefs dataRefs;
1326
86.6k
  SubAssemblyRefs subRefs;
1327
86.6k
  ProgramSizeRefs sizeRefs;
1328
86.6k
  uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag));
1329
86.6k
  uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
1330
1331
86.6k
  LinkerObject::CodeSectionLocation codeSectionLocation;
1332
86.6k
  codeSectionLocation.instructionLocations.reserve(items.size());
1333
86.6k
  codeSectionLocation.start = 0;
1334
86.6k
  for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate)
1335
19.1M
  {
1336
    // collect instruction locations via side effects
1337
19.1M
    InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex);
1338
    // store position of the invalid jump destination
1339
19.1M
    if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
1340
84.3k
      m_tagPositionsInBytecode[0] = ret.bytecode.size();
1341
1342
19.1M
    switch (item.type())
1343
19.1M
    {
1344
11.2M
    case Operation:
1345
11.2M
      ret.bytecode += assembleOperation(item);
1346
11.2M
      break;
1347
3.71M
    case Push:
1348
3.71M
      ret.bytecode += assemblePush(item);
1349
3.71M
      break;
1350
2.34M
    case PushTag:
1351
2.34M
      ret.bytecode.push_back(tagPush);
1352
2.34M
      tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1353
2.34M
      ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1354
2.34M
      break;
1355
15.6k
    case PushData:
1356
15.6k
      ret.bytecode.push_back(dataRefPush);
1357
15.6k
      dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1358
15.6k
      ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1359
15.6k
      break;
1360
18.4k
    case PushSub:
1361
18.4k
      assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1362
18.4k
      ret.bytecode.push_back(dataRefPush);
1363
18.4k
      subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1364
18.4k
      ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1365
18.4k
      break;
1366
19.7k
    case PushSubSize:
1367
19.7k
    {
1368
19.7k
      assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1369
19.7k
      auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1370
19.7k
      item.setPushedValue(u256(s));
1371
19.7k
      unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1372
19.7k
      ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1373
19.7k
      ret.bytecode.resize(ret.bytecode.size() + b);
1374
19.7k
      bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1375
19.7k
      toBigEndian(s, byr);
1376
19.7k
      break;
1377
19.7k
    }
1378
903
    case PushProgramSize:
1379
903
      ret.bytecode.push_back(dataRefPush);
1380
903
      sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1381
903
      ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1382
903
      break;
1383
1.76k
    case PushLibraryAddress:
1384
1.76k
    {
1385
1.76k
      auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1386
1.76k
      ret.bytecode += bytecode;
1387
1.76k
      ret.linkReferences.insert(linkRef);
1388
1.76k
      break;
1389
19.7k
    }
1390
311
    case PushImmutable:
1391
311
      ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1392
      // Maps keccak back to the "identifier" std::string of that immutable.
1393
311
      ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1394
      // Record the bytecode offset of the PUSH32 argument.
1395
311
      ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1396
      // Advance bytecode by 32 bytes (default initialized).
1397
311
      ret.bytecode.resize(ret.bytecode.size() + 32);
1398
311
      break;
1399
1.17k
    case VerbatimBytecode:
1400
1.17k
      ret.bytecode += assembleVerbatimBytecode(item);
1401
1.17k
      break;
1402
450
    case AssignImmutable:
1403
450
    {
1404
      // Expect 2 elements on stack (source, dest_base)
1405
450
      auto const& offsets = immutableReferencesBySub[item.data()].second;
1406
682
      for (size_t i = 0; i < offsets.size(); ++i)
1407
232
      {
1408
232
        if (i != offsets.size() - 1)
1409
13
        {
1410
13
          ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1411
          // This item type decomposes into multiple evm instructions, so we manually call emit()
1412
13
          instructionLocationEmitter.emit();
1413
13
          ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1414
13
          instructionLocationEmitter.emit();
1415
13
        }
1416
        // TODO: should we make use of the constant optimizer methods for pushing the offsets?
1417
232
        bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1418
232
        ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1419
232
        ret.bytecode += offsetBytes;
1420
232
        instructionLocationEmitter.emit();
1421
232
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD));
1422
232
        instructionLocationEmitter.emit();
1423
232
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE));
1424
        // No emit needed here, it's taken care of by the destructor of instructionLocationEmitter.
1425
232
      }
1426
450
      if (offsets.empty())
1427
231
      {
1428
231
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1429
231
        instructionLocationEmitter.emit();
1430
231
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1431
        // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
1432
231
      }
1433
450
      immutableReferencesBySub.erase(item.data());
1434
450
      break;
1435
19.7k
    }
1436
420
    case PushDeployTimeAddress:
1437
420
      ret.bytecode += assemblePushDeployTimeAddress();
1438
420
      break;
1439
1.76M
    case Tag:
1440
1.76M
      ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1441
1.76M
      break;
1442
0
    default:
1443
0
      solAssert(false, "Unexpected opcode while assembling.");
1444
19.1M
    }
1445
19.1M
  }
1446
1447
86.6k
  codeSectionLocation.end = ret.bytecode.size();
1448
1449
86.6k
  ret.codeSectionLocations.emplace_back(std::move(codeSectionLocation));
1450
1451
86.6k
  if (!immutableReferencesBySub.empty())
1452
0
    throw
1453
0
      langutil::Error(
1454
0
        1284_error,
1455
0
        langutil::Error::Type::CodeGenerationError,
1456
0
        "Some immutables were read from but never assigned, possibly because of optimization."
1457
0
      );
1458
1459
86.6k
  if (!m_subs.empty() || !m_data.empty() || !m_auxiliaryData.empty())
1460
    // Append an INVALID here to help tests find miscompilation.
1461
42.8k
    ret.bytecode.push_back(static_cast<uint8_t>(Instruction::INVALID));
1462
1463
86.6k
  std::map<LinkerObject, size_t> subAssemblyOffsets;
1464
86.6k
  for (auto const& [subIdPath, bytecodeOffset]: subRefs)
1465
18.4k
  {
1466
18.4k
    LinkerObject subObject = subAssemblyById(subIdPath)->assemble();
1467
18.4k
    bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef);
1468
1469
    // In order for de-duplication to kick in, not only must the bytecode be identical, but
1470
    // link and immutables references as well.
1471
18.4k
    if (size_t* subAssemblyOffset = util::valueOrNullptr(subAssemblyOffsets, subObject))
1472
1.34k
      toBigEndian(*subAssemblyOffset, r);
1473
17.1k
    else
1474
17.1k
    {
1475
17.1k
      toBigEndian(ret.bytecode.size(), r);
1476
17.1k
      subAssemblyOffsets[subObject] = ret.bytecode.size();
1477
17.1k
      ret.bytecode += subObject.bytecode;
1478
17.1k
    }
1479
18.4k
    for (auto const& ref: subObject.linkReferences)
1480
2.80k
      ret.linkReferences[ref.first + subAssemblyOffsets[subObject]] = ref.second;
1481
18.4k
  }
1482
86.6k
  for (auto const& i: tagRefs)
1483
2.34M
  {
1484
2.34M
    size_t subId;
1485
2.34M
    size_t tagId;
1486
2.34M
    std::tie(subId, tagId) = i.second;
1487
2.34M
    assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id");
1488
2.34M
    std::vector<size_t> const& tagPositions =
1489
2.34M
      subId == std::numeric_limits<size_t>::max() ?
1490
2.34M
      m_tagPositionsInBytecode :
1491
2.34M
      m_subs[subId]->m_tagPositionsInBytecode;
1492
2.34M
    assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag.");
1493
2.34M
    size_t pos = tagPositions[tagId];
1494
2.34M
    assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
1495
2.34M
    assertThrow(numberEncodingSize(pos) <= bytesPerTag, AssemblyException, "Tag too large for reserved space.");
1496
2.34M
    bytesRef r(ret.bytecode.data() + i.first, bytesPerTag);
1497
2.34M
    toBigEndian(pos, r);
1498
2.34M
  }
1499
86.6k
  for (auto const& [name, tagInfo]: m_namedTags)
1500
412k
  {
1501
412k
    size_t position = m_tagPositionsInBytecode.at(tagInfo.id);
1502
412k
    std::optional<size_t> tagIndex;
1503
412k
    for (auto&& [index, item]: items | ranges::views::enumerate)
1504
1.99G
      if (item.type() == Tag && static_cast<size_t>(item.data()) == tagInfo.id)
1505
362k
      {
1506
362k
        tagIndex = index;
1507
362k
        break;
1508
362k
      }
1509
412k
    ret.functionDebugData[name] = {
1510
412k
      position == std::numeric_limits<size_t>::max() ? std::nullopt : std::optional<size_t>{position},
1511
412k
      tagIndex,
1512
412k
      tagInfo.sourceID,
1513
412k
      tagInfo.params,
1514
412k
      tagInfo.returns
1515
412k
    };
1516
412k
  }
1517
1518
86.6k
  for (auto const& dataItem: m_data)
1519
11.0k
  {
1520
11.0k
    auto references = dataRefs.equal_range(dataItem.first);
1521
11.0k
    if (references.first == references.second)
1522
5.09k
      continue;
1523
21.6k
    for (auto ref = references.first; ref != references.second; ++ref)
1524
15.6k
    {
1525
15.6k
      bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef);
1526
15.6k
      toBigEndian(ret.bytecode.size(), r);
1527
15.6k
    }
1528
5.95k
    ret.bytecode += dataItem.second;
1529
5.95k
  }
1530
1531
86.6k
  ret.bytecode += m_auxiliaryData;
1532
1533
86.6k
  for (unsigned pos: sizeRefs)
1534
903
  {
1535
903
    bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef);
1536
903
    toBigEndian(ret.bytecode.size(), r);
1537
903
  }
1538
86.6k
  return ret;
1539
86.6k
}
1540
1541
std::map<ContainerID, ContainerID> Assembly::findReferencedContainers() const
1542
0
{
1543
0
  std::set<ContainerID> referencedSubcontainersIds;
1544
0
  solAssert(m_subs.size() <= 0x100); // According to EOF spec
1545
1546
0
  for (auto&& codeSection: m_codeSections)
1547
0
    for (AssemblyItem const& item: codeSection.items)
1548
0
      if (item.type() == EOFCreate || item.type() == ReturnContract)
1549
0
      {
1550
0
        solAssert(item.data() <= m_subs.size(), "Invalid subcontainer index.");
1551
0
        auto const containerId = static_cast<ContainerID>(item.data());
1552
0
        referencedSubcontainersIds.insert(containerId);
1553
0
      }
1554
1555
0
  std::map<ContainerID, ContainerID> replacements;
1556
0
  uint8_t nUnreferenced = 0;
1557
0
  for (size_t i = 0; i < m_subs.size(); ++i)
1558
0
  {
1559
0
    solAssert(i <= std::numeric_limits<ContainerID>::max());
1560
0
    if (referencedSubcontainersIds.count(static_cast<ContainerID>(i)) > 0)
1561
0
      replacements[static_cast<ContainerID>(i)] = static_cast<ContainerID>(i - nUnreferenced);
1562
0
    else
1563
0
      nUnreferenced++;
1564
0
  }
1565
1566
0
  return replacements;
1567
0
}
1568
1569
std::optional<uint16_t> Assembly::findMaxAuxDataLoadNOffset() const
1570
0
{
1571
0
  std::optional<unsigned> maxOffset = std::nullopt;
1572
0
  for (auto&& codeSection: m_codeSections)
1573
0
    for (AssemblyItem const& item: codeSection.items)
1574
0
      if (item.type() == AuxDataLoadN)
1575
0
      {
1576
0
        solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid auxdataloadn index value.");
1577
0
        auto const offset = static_cast<unsigned>(item.data());
1578
0
        if (!maxOffset.has_value() || offset > maxOffset.value())
1579
0
          maxOffset = offset;
1580
1581
0
      }
1582
1583
0
  return maxOffset;
1584
0
}
1585
1586
LinkerObject const& Assembly::assembleEOF() const
1587
0
{
1588
0
  solAssert(m_eofVersion.has_value() && m_eofVersion == 1);
1589
0
  LinkerObject& ret = m_assembledObject;
1590
1591
0
  auto const subIdsReplacements = findReferencedContainers();
1592
0
  auto const referencedSubIds = keys(subIdsReplacements);
1593
1594
0
  solAssert(!m_codeSections.empty(), "Expected at least one code section.");
1595
0
  solAssert(
1596
0
    m_codeSections.front().inputs == 0 && m_codeSections.front().outputs == 0 && m_codeSections.front().nonReturning,
1597
0
    "Expected the first code section to have zero inputs and be non-returning."
1598
0
  );
1599
1600
0
  auto const maxAuxDataLoadNOffset = findMaxAuxDataLoadNOffset();
1601
1602
  // Insert EOF1 header.
1603
0
  auto [headerBytecode, codeSectionSizePositions, dataSectionSizePosition] = createEOFHeader(referencedSubIds);
1604
0
  ret.bytecode = headerBytecode;
1605
1606
0
  m_tagPositionsInBytecode = std::vector<size_t>(m_usedTags, std::numeric_limits<size_t>::max());
1607
0
  std::map<size_t, uint16_t> dataSectionRef;
1608
0
  std::map<size_t, size_t> tagRef;
1609
1610
0
  for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate)
1611
0
  {
1612
0
    auto const sectionStart = ret.bytecode.size();
1613
1614
0
    std::vector<LinkerObject::InstructionLocation> instructionLocations;
1615
0
    instructionLocations.reserve(codeSection.items.size());
1616
1617
0
    solAssert(!codeSection.items.empty(), "Empty code section.");
1618
1619
0
    for (auto const& [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate)
1620
0
    {
1621
      // collect instruction locations via side effects
1622
0
      InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode, assemblyItemIndex};
1623
1624
      // store position of the invalid jump destination
1625
0
      if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
1626
0
        m_tagPositionsInBytecode[0] = ret.bytecode.size();
1627
1628
0
      switch (item.type())
1629
0
      {
1630
0
      case Operation:
1631
0
        solAssert(
1632
0
          item.instruction() != Instruction::DATALOADN &&
1633
0
          item.instruction() != Instruction::RETURNCONTRACT &&
1634
0
          item.instruction() != Instruction::EOFCREATE &&
1635
0
          item.instruction() != Instruction::RJUMP &&
1636
0
          item.instruction() != Instruction::RJUMPI &&
1637
0
          item.instruction() != Instruction::CALLF &&
1638
0
          item.instruction() != Instruction::JUMPF &&
1639
0
          item.instruction() != Instruction::RETF &&
1640
0
          item.instruction() != Instruction::DUPN &&
1641
0
          item.instruction() != Instruction::SWAPN
1642
0
        );
1643
0
        solAssert(!(item.instruction() >= Instruction::PUSH0 && item.instruction() <= Instruction::PUSH32));
1644
0
        ret.bytecode += assembleOperation(item);
1645
0
        break;
1646
0
      case Push:
1647
0
        ret.bytecode += assemblePush(item);
1648
0
        break;
1649
0
      case PushLibraryAddress:
1650
0
      {
1651
0
        auto const [pushLibraryAddressBytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1652
0
        ret.bytecode += pushLibraryAddressBytecode;
1653
0
        ret.linkReferences.insert(linkRef);
1654
0
        break;
1655
0
      }
1656
0
      case RelativeJump:
1657
0
      case ConditionalRelativeJump:
1658
0
      {
1659
0
        ret.bytecode.push_back(static_cast<uint8_t>(item.instruction()));
1660
0
        tagRef[ret.bytecode.size()] = item.relativeJumpTagID();
1661
0
        appendBigEndianUint16(ret.bytecode, 0u);
1662
0
        break;
1663
0
      }
1664
0
      case EOFCreate:
1665
0
      {
1666
0
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::EOFCREATE));
1667
0
        solAssert(item.data() <= std::numeric_limits<ContainerID>::max());
1668
0
        auto const containerID = static_cast<ContainerID>(item.data());
1669
0
        solAssert(subIdsReplacements.count(containerID) == 1);
1670
0
        ret.bytecode.push_back(subIdsReplacements.at(containerID));
1671
0
        break;
1672
0
      }
1673
0
      case ReturnContract:
1674
0
      {
1675
0
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::RETURNCONTRACT));
1676
0
        solAssert(item.data() <= std::numeric_limits<ContainerID>::max());
1677
0
        auto const containerID = static_cast<ContainerID>(item.data());
1678
0
        solAssert(subIdsReplacements.count(containerID) == 1);
1679
0
        ret.bytecode.push_back(subIdsReplacements.at(containerID));
1680
0
        break;
1681
0
      }
1682
0
      case VerbatimBytecode:
1683
0
        ret.bytecode += assembleVerbatimBytecode(item);
1684
0
        break;
1685
0
      case PushDeployTimeAddress:
1686
0
        ret.bytecode += assemblePushDeployTimeAddress();
1687
0
        break;
1688
0
      case Tag:
1689
0
        ret.bytecode += assembleTag(item, ret.bytecode.size(), false);
1690
0
        break;
1691
0
      case AuxDataLoadN:
1692
0
      {
1693
        // In findMaxAuxDataLoadNOffset we already verified that unsigned data value fits 2 bytes
1694
0
        solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid auxdataloadn position.");
1695
0
        ret.bytecode.push_back(uint8_t(Instruction::DATALOADN));
1696
0
        dataSectionRef[ret.bytecode.size()] = static_cast<uint16_t>(item.data());
1697
0
        appendBigEndianUint16(ret.bytecode, item.data());
1698
0
        break;
1699
0
      }
1700
0
      case CallF:
1701
0
      case JumpF:
1702
0
      {
1703
0
        ret.bytecode.push_back(static_cast<uint8_t>(item.instruction()));
1704
0
        solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid callf/jumpf index value.");
1705
0
        size_t const index = static_cast<uint16_t>(item.data());
1706
0
        solAssert(index < m_codeSections.size());
1707
0
        solAssert(item.functionSignature().argsNum <= 127);
1708
0
        solAssert(item.functionSignature().retsNum <= 127);
1709
0
        solAssert(m_codeSections[index].inputs == item.functionSignature().argsNum);
1710
0
        solAssert(m_codeSections[index].outputs == item.functionSignature().retsNum);
1711
        // If CallF the function cannot be non-returning.
1712
0
        solAssert(item.type() == JumpF || !m_codeSections[index].nonReturning);
1713
0
        appendBigEndianUint16(ret.bytecode, item.data());
1714
0
        break;
1715
0
      }
1716
0
      case RetF:
1717
0
        ret.bytecode.push_back(static_cast<uint8_t>(Instruction::RETF));
1718
0
        break;
1719
0
      case SwapN:
1720
0
      case DupN:
1721
0
        ret.bytecode.push_back(static_cast<uint8_t>(item.instruction()));
1722
0
        solAssert(item.data() >= 1 && item.data() <= 256);
1723
0
        ret.bytecode.push_back(static_cast<uint8_t>(item.data() - 1));
1724
0
        break;
1725
0
      default:
1726
0
        solAssert(false, "Unexpected opcode while assembling.");
1727
0
      }
1728
0
    }
1729
1730
0
    if (ret.bytecode.size() - sectionStart > std::numeric_limits<uint16_t>::max())
1731
      // TODO: Include source location. Note that origin locations we have in debug data are
1732
      // not usable for error reporting when compiling pure Yul because they point at the optimized source.
1733
0
      throw Error(
1734
0
        2202_error,
1735
0
        Error::Type::CodeGenerationError,
1736
0
        "Code section too large for EOF."
1737
0
      );
1738
0
    setBigEndianUint16(ret.bytecode, codeSectionSizePositions[codeSectionIndex], ret.bytecode.size() - sectionStart);
1739
1740
0
    ret.codeSectionLocations.push_back(LinkerObject::CodeSectionLocation{
1741
0
      .start = sectionStart,
1742
0
      .end = ret.bytecode.size(),
1743
0
      .instructionLocations = std::move(instructionLocations)
1744
0
    });
1745
0
  }
1746
1747
0
  for (auto const& [refPos, tagId]: tagRef)
1748
0
  {
1749
0
    solAssert(tagId < m_tagPositionsInBytecode.size(), "Reference to non-existing tag.");
1750
0
    size_t tagPos = m_tagPositionsInBytecode[tagId];
1751
0
    solAssert(tagPos != std::numeric_limits<size_t>::max(), "Reference to tag without position.");
1752
1753
0
    ptrdiff_t const relativeJumpOffset = static_cast<ptrdiff_t>(tagPos) - (static_cast<ptrdiff_t>(refPos) + 2);
1754
    // This cannot happen in practice because we'll run into section size limit first.
1755
0
    if (!(-0x8000 <= relativeJumpOffset && relativeJumpOffset <= 0x7FFF))
1756
      // TODO: Include source location. Note that origin locations we have in debug data are
1757
      // not usable for error reporting when compiling pure Yul because they point at the optimized source.
1758
0
      throw Error(
1759
0
        2703_error,
1760
0
        Error::Type::CodeGenerationError,
1761
0
        "Relative jump too far"
1762
0
      );
1763
0
    solAssert(relativeJumpOffset < -2 || 0 <= relativeJumpOffset, "Relative jump offset into immediate argument.");
1764
0
    setBigEndianUint16(ret.bytecode, refPos, static_cast<size_t>(static_cast<uint16_t>(relativeJumpOffset)));
1765
0
  }
1766
1767
0
  for (auto i: referencedSubIds)
1768
0
  {
1769
0
    size_t const subAssemblyPositionInParentObject = ret.bytecode.size();
1770
0
    auto const& subAssemblyLinkerObject = m_subs[i]->assemble();
1771
    // Append subassembly bytecode to the parent assembly result bytecode
1772
0
    ret.bytecode += subAssemblyLinkerObject.bytecode;
1773
    // Add subassembly link references to parent linker object.
1774
    // Offset accordingly to subassembly position in parent object bytecode
1775
0
    for (auto const& [subAssemblyLinkRefPosition, linkRef]: subAssemblyLinkerObject.linkReferences)
1776
0
      ret.linkReferences[subAssemblyPositionInParentObject + subAssemblyLinkRefPosition] = linkRef;
1777
0
  }
1778
1779
  // TODO: Fill functionDebugData for EOF. It probably should be handled for new code section in the loop above.
1780
0
  solRequire(m_namedTags.empty(), AssemblyException, "Named tags must be empty in EOF context.");
1781
1782
0
  auto const dataStart = ret.bytecode.size();
1783
1784
0
  for (auto const& dataItem: m_data)
1785
0
    ret.bytecode += dataItem.second;
1786
1787
0
  ret.bytecode += m_auxiliaryData;
1788
1789
0
  auto const preDeployDataSectionSize = ret.bytecode.size() - dataStart;
1790
  // DATALOADN loads 32 bytes from EOF data section zero padded if reading out of data bounds.
1791
  // In our case we do not allow DATALOADN with offsets which reads out of data bounds.
1792
0
  auto const staticAuxDataSize = maxAuxDataLoadNOffset.has_value() ? (*maxAuxDataLoadNOffset + 32u) : 0u;
1793
0
  auto const preDeployAndStaticAuxDataSize = preDeployDataSectionSize + staticAuxDataSize;
1794
1795
0
  if (preDeployAndStaticAuxDataSize > std::numeric_limits<uint16_t>::max())
1796
0
    throw Error(
1797
0
      3965_error,
1798
0
      Error::Type::CodeGenerationError,
1799
0
      "The highest accessed data offset exceeds the maximum possible size of the static auxdata section."
1800
0
    );
1801
1802
  // If some data was already added to data section we need to update data section refs accordingly
1803
0
  if (preDeployDataSectionSize > 0)
1804
0
    for (auto [refPosition, staticAuxDataOffset] : dataSectionRef)
1805
0
    {
1806
      // staticAuxDataOffset + preDeployDataSectionSize value is already verified to fit 2 bytes because
1807
      // staticAuxDataOffset < staticAuxDataSize
1808
0
      setBigEndianUint16(ret.bytecode, refPosition, staticAuxDataOffset + preDeployDataSectionSize);
1809
0
    }
1810
1811
0
  setBigEndianUint16(ret.bytecode, dataSectionSizePosition, preDeployAndStaticAuxDataSize);
1812
1813
0
  return ret;
1814
0
}
1815
1816
std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const
1817
38.2k
{
1818
38.2k
  if (_subObjectId < m_subs.size())
1819
38.2k
    return {_subObjectId};
1820
1821
0
  auto subIdPathIt = find_if(
1822
0
    m_subPaths.begin(),
1823
0
    m_subPaths.end(),
1824
0
    [_subObjectId](auto const& subId) { return subId.second == _subObjectId; }
1825
0
  );
1826
1827
0
  assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, "");
1828
0
  return subIdPathIt->first;
1829
0
}
1830
1831
size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath)
1832
12.0k
{
1833
12.0k
  assertThrow(!_subPath.empty(), AssemblyException, "");
1834
12.0k
  if (_subPath.size() == 1)
1835
12.0k
  {
1836
12.0k
    assertThrow(_subPath[0] < m_subs.size(), AssemblyException, "");
1837
12.0k
    return _subPath[0];
1838
12.0k
  }
1839
1840
0
  if (m_subPaths.find(_subPath) == m_subPaths.end())
1841
0
  {
1842
0
    size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size();
1843
0
    assertThrow(objectId >= m_subs.size(), AssemblyException, "");
1844
0
    m_subPaths[_subPath] = objectId;
1845
0
  }
1846
1847
0
  return m_subPaths[_subPath];
1848
0
}
1849
1850
Assembly const* Assembly::subAssemblyById(size_t _subId) const
1851
38.2k
{
1852
38.2k
  std::vector<size_t> subIds = decodeSubPath(_subId);
1853
38.2k
  Assembly const* currentAssembly = this;
1854
38.2k
  for (size_t currentSubId: subIds)
1855
38.2k
  {
1856
38.2k
    currentAssembly = currentAssembly->m_subs.at(currentSubId).get();
1857
38.2k
    assertThrow(currentAssembly, AssemblyException, "");
1858
38.2k
  }
1859
1860
38.2k
  assertThrow(currentAssembly != this, AssemblyException, "");
1861
38.2k
  return currentAssembly;
1862
38.2k
}
1863
1864
Assembly::OptimiserSettings Assembly::OptimiserSettings::translateSettings(frontend::OptimiserSettings const& _settings)
1865
59.1k
{
1866
  // Constructing it this way so that we notice changes in the fields.
1867
59.1k
  OptimiserSettings asmSettings{false,  false, false, false, false, false, 0};
1868
59.1k
  asmSettings.runInliner = _settings.runInliner;
1869
59.1k
  asmSettings.runJumpdestRemover = _settings.runJumpdestRemover;
1870
59.1k
  asmSettings.runPeephole = _settings.runPeephole;
1871
59.1k
  asmSettings.runDeduplicate = _settings.runDeduplicate;
1872
59.1k
  asmSettings.runCSE = _settings.runCSE;
1873
59.1k
  asmSettings.runConstantOptimiser = _settings.runConstantOptimiser;
1874
59.1k
  asmSettings.expectedExecutionsPerDeployment = _settings.expectedExecutionsPerDeployment;
1875
59.1k
  return asmSettings;
1876
59.1k
}