package fusefilter
import (
"encoding/binary"
"fmt"
"math"
"os"
"path/filepath"
"unsafe"
"github.com/FastFilter/xorfilter"
"github.com/c2h5oh/datasize"
"github.com/edsrzf/mmap-go"
"github.com/erigontech/erigon/common/dbg"
mm "github.com/erigontech/erigon/common/mmap"
)
type Features uint32
const (
IsLittleEndianFeature Features = 0b1
)
// headerSize is the fixed byte layout of the fusefilter on-disk header:
//
// 1B version + 3B features + 4B SegmentCount + 4B SegmentCountLength +
// 8B Seed + 4B SegmentLength + 4B SegmentLengthMask + 8B fingerprints-len.
const headerSize = 1 + 3 + 4 + 4 + 8 + 4 + 4 + 8
type Reader struct {
inner *xorfilter.BinaryFuse[uint8]
keepInMem bool // keep it in mem insted of mmap
fileName string
f *os.File
m mmap.MMap
features Features
version uint8
}
var (
MadvWillNeedByDefault = dbg.EnvBool("FUSE_MADV_WILLNEED", false)
MadvNormalByDefault = dbg.EnvBool("FUSE_MADV_NORMAL", false)
)
func NewReader(filePath string) (_ *Reader, err error) {
f, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
_ = f.Close() //nolint
}
}()
st, err := f.Stat()
if err != nil {
return nil, err
}
sz := int(st.Size())
m, err := mmap.MapRegion(f, sz, mmap.RDONLY, 0, 0)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
_ = m.Unmap() //nolint
}
}()
_, fileName := filepath.Split(filePath)
r, _, err := NewReaderOnBytes(m, fileName)
if err != nil {
return nil, err
}
r.f = f
r.m = m
r.fileName = fileName
return r, nil
}
func parseHeaderFeatures(header []byte, fName string) (version uint8, features Features, err error) {
version = header[0]
features = Features(binary.BigEndian.Uint32(header[:4]) & 0x00FFFFFF)
if (features&IsLittleEndianFeature != 0) != IsLittleEndian {
return 0, 0, fmt.Errorf("file %s is not compatible with your machine (different Endianness), but you can run `erigon snapshots index`", fName)
}
return version, features, nil
}
func NewReaderOnBytes(m []byte, fName string) (*Reader, int, error) {
if len(m) < headerSize {
return nil, 0, fmt.Errorf("fusefilter %s: too small for header (%d < %d)", fName, len(m), headerSize)
}
filter := &xorfilter.BinaryFuse[uint8]{}
header, data := m[:headerSize], m[headerSize:]
v, features, err := parseHeaderFeatures(header, fName)
if err != nil {
return nil, 0, err
}
filter.SegmentCount = binary.BigEndian.Uint32(header[4:])
filter.SegmentCountLength = binary.BigEndian.Uint32(header[4+4:])
filter.Seed = binary.BigEndian.Uint64(header[4+4+4:])
filter.SegmentLength = binary.BigEndian.Uint32(header[4+4+4+8:])
filter.SegmentLengthMask = binary.BigEndian.Uint32(header[4+4+4+8+4:])
fingerprintsLen64 := binary.BigEndian.Uint64(header[4+4+4+8+4+4:])
if fingerprintsLen64 > math.MaxInt || uint64(len(data)) < fingerprintsLen64 {
return nil, 0, fmt.Errorf("fusefilter %s: fingerprints length %d exceeds available bytes %d", fName, fingerprintsLen64, len(data))
}
fingerprintsLen := int(fingerprintsLen64)
if err := validateFilterGeometry(filter, fingerprintsLen, fName); err != nil {
return nil, 0, err
}
filter.Fingerprints = data[:fingerprintsLen]
total := headerSize + fingerprintsLen
return &Reader{inner: filter, version: v, features: features, m: m[:total]}, total, nil
}
// validateFilterGeometry rejects on-disk header values that would let
// xorfilter.Contains panic with an out-of-bounds Fingerprints index. The fuse
// filter encodes h_i = uint32(Mul64(hash, SegmentCountLength).hi) + i*SegmentLength
// for i in {0,1,2}; the largest index it can produce is SegmentCountLength-1 +
// 2*SegmentLength, so we need len(Fingerprints) >= (SegmentCount+2)*SegmentLength.
func validateFilterGeometry(filter *xorfilter.BinaryFuse[uint8], fingerprintsLen int, fName string) error {
if filter.SegmentLength == 0 || filter.SegmentCount == 0 {
return fmt.Errorf("fusefilter %s: zero SegmentLength=%d SegmentCount=%d", fName, filter.SegmentLength, filter.SegmentCount)
}
// SegmentLength is a power of two by construction (calculateSegmentLength
// returns 1<<k). The mask must equal SegmentLength-1; otherwise hi-bit
// shifts in getHashFromHash compute indices beyond SegmentLengthMask+1,
// breaking the implicit bound used below.
if filter.SegmentLength&(filter.SegmentLength-1) != 0 {
return fmt.Errorf("fusefilter %s: SegmentLength=%d is not a power of two", fName, filter.SegmentLength)
}
if filter.SegmentLengthMask != filter.SegmentLength-1 {
return fmt.Errorf("fusefilter %s: SegmentLengthMask=%d != SegmentLength-1=%d", fName, filter.SegmentLengthMask, filter.SegmentLength-1)
}
wantSCL := uint64(filter.SegmentCount) * uint64(filter.SegmentLength)
if wantSCL > math.MaxUint32 {
return fmt.Errorf("fusefilter %s: SegmentCount*SegmentLength=%d overflows uint32", fName, wantSCL)
}
if uint64(filter.SegmentCountLength) != wantSCL {
return fmt.Errorf("fusefilter %s: SegmentCountLength=%d != SegmentCount*SegmentLength=%d", fName, filter.SegmentCountLength, wantSCL)
}
wantFP := (uint64(filter.SegmentCount) + 2) * uint64(filter.SegmentLength)
if wantFP > math.MaxInt {
return fmt.Errorf("fusefilter %s: required fingerprints length %d overflows int", fName, wantFP)
}
if uint64(fingerprintsLen) < wantFP {
return fmt.Errorf("fusefilter %s: fingerprints length %d < required %d for SegmentCount=%d SegmentLength=%d", fName, fingerprintsLen, wantFP, filter.SegmentCount, filter.SegmentLength)
}
return nil
}
func (r *Reader) ForceInMem() datasize.ByteSize {
if r.m == nil || r.inner == nil {
return 0
}
cpy := make([]byte, len(r.inner.Fingerprints)) //don't use bytes.Clone - to see ram owner on heap profiler
copy(cpy, r.inner.Fingerprints)
r.inner.Fingerprints = cpy
r.keepInMem = true
return datasize.ByteSize(len(r.inner.Fingerprints))
}
func (r *Reader) MadvWillNeed() {
if r == nil || r.f == nil || r.m == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseWillNeed(r.m); err != nil {
panic(err)
}
}
func (r *Reader) MadvNormal() {
if r == nil || r.f == nil || r.m == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseNormal(r.m); err != nil {
panic(err)
}
}
func (r *Reader) MadvRandom() {
if r == nil || r.f == nil || r.m == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseRandom(r.m); err != nil {
panic(err)
}
}
func (r *Reader) FileName() string { return r.fileName }
func (r *Reader) ContainsHash(v uint64) bool { return r.inner.Contains(v) }
func (r *Reader) Close() {
if r == nil || r.f == nil {
return
}
_ = r.m.Unmap() //nolint
_ = r.f.Close() //nolint
r.f = nil
}
// ReaderSharded reads a sharded fusefilter (outer header version=1).
// Only the shard matching keyHash >> 56 is checked on lookup.
// shards is a value array (not pointer array) so ContainsHash needs only one pointer dereference.
type ReaderSharded struct {
m mmap.MMap // outer slice spanning header + all shard blobs, used for madvise
f *os.File // non-nil only when opened via NewReaderSharded(filePath)
fileName string
keepInMem bool // ForceInMem replaced m with an anonymous heap copy; skip madvise/munmap
shards [256]Reader
}
func NewReaderSharded(filePath string) (_ *ReaderSharded, err error) {
f, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
_ = f.Close() //nolint
}
}()
st, err := f.Stat()
if err != nil {
return nil, err
}
sz := int(st.Size())
m, err := mmap.MapRegion(f, sz, mmap.RDONLY, 0, 0)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
_ = m.Unmap() //nolint
}
}()
_, fileName := filepath.Split(filePath)
r, _, err := NewReaderShardedOnBytes(m, fileName)
if err != nil {
return nil, err
}
r.f = f
r.m = m
r.fileName = fileName
return r, nil
}
func (r *ReaderSharded) FileName() string { return r.fileName }
func (r *ReaderSharded) Close() {
if r == nil || r.f == nil {
return
}
if !r.keepInMem {
_ = r.m.Unmap() //nolint
}
_ = r.f.Close() //nolint
r.f = nil
}
func NewReaderShardedOnBytes(m []byte, fName string) (*ReaderSharded, int, error) {
const headerSize = 4
if len(m) < headerSize {
return nil, 0, fmt.Errorf("fusefilter sharded %s: too small (%d bytes)", fName, len(m))
}
v, _, err := parseHeaderFeatures(m[:4], fName)
if err != nil {
return nil, 0, err
}
if v != 1 {
return nil, 0, fmt.Errorf("fusefilter sharded %s: unsupported version %d", fName, v)
}
r := &ReaderSharded{}
offset := headerSize
for i := range 256 {
if offset+8 > len(m) {
return nil, 0, fmt.Errorf("fusefilter sharded %s: truncated at shard %d", fName, i)
}
sz64 := binary.BigEndian.Uint64(m[offset:])
offset += 8
if sz64 == 0 {
continue
}
if sz64 > math.MaxInt || sz64 > uint64(len(m)-offset) {
return nil, 0, fmt.Errorf("fusefilter sharded %s: shard %d blob overflows (offset=%d sz=%d total=%d)", fName, i, offset, sz64, len(m))
}
sz := int(sz64)
if sz < filterBlobHeaderSize {
return nil, 0, fmt.Errorf("fusefilter sharded %s: shard %d size %d < header %d", fName, i, sz, filterBlobHeaderSize)
}
shard, consumed, err := NewReaderOnBytes(m[offset:offset+sz], fName)
if err != nil {
return nil, 0, fmt.Errorf("shard %d of %s: %w", i, fName, err)
}
if consumed != sz {
return nil, 0, fmt.Errorf("fusefilter sharded %s: shard %d consumed %d != declared size %d", fName, i, consumed, sz)
}
r.shards[i] = *shard
offset += sz
}
r.m = m[:offset]
return r, offset, nil
}
func (r *ReaderSharded) ContainsHash(v uint64) bool {
s := &r.shards[v>>56]
if s.inner == nil {
return false
}
return s.ContainsHash(v)
}
// ForceInMem clones each shard's fingerprints into anonymous heap memory.
func (r *ReaderSharded) ForceInMem() datasize.ByteSize {
var res datasize.ByteSize
for i := range r.shards {
res += r.shards[i].ForceInMem()
}
return res
}
// MadvWillNeed hints to the OS that all shard blobs will be accessed.
// One madvise on the outer mmap slice covers all shards in a single syscall,
// instead of 256 madvise calls on adjacent sub-slices of the same VMA.
func (r *ReaderSharded) MadvWillNeed() {
if r == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseWillNeed(r.m); err != nil {
panic(err)
}
}
func (r *ReaderSharded) MadvNormal() {
if r == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseNormal(r.m); err != nil {
panic(err)
}
}
func (r *ReaderSharded) MadvRandom() {
if r == nil || len(r.m) == 0 || r.keepInMem {
return
}
if err := mm.MadviseRandom(r.m); err != nil {
panic(err)
}
}
var IsLittleEndian = isLittleEndian()
func isLittleEndian() bool {
var x uint16 = 0x0102
xb := *(*[2]byte)(unsafe.Pointer(&x))
return (xb[0] == 0x02)
}
package fusefilter
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"math"
"os"
"path/filepath"
"slices"
"unsafe"
"github.com/erigontech/erigon/common/dir"
"github.com/FastFilter/xorfilter"
"github.com/edsrzf/mmap-go"
)
const bufSize = 4096
// WriterOffHeap does write all keys to temporary mmap file - and using it as a source for `fusefilter` building
type WriterOffHeap struct {
count int
buf [bufSize]uint64
features Features
tmpFile *os.File
tmpFilePath string
}
func initFeatures() Features {
var f Features
if IsLittleEndian {
f |= IsLittleEndianFeature
}
return f
}
func NewWriterOffHeap(filePath string) (*WriterOffHeap, error) {
f, err := dir.CreateTempWithExtension(filePath, "existence.tmp")
if err != nil {
return nil, err
}
return &WriterOffHeap{tmpFile: f, features: initFeatures(), tmpFilePath: f.Name()}, nil
}
func (w *WriterOffHeap) Close() {
if w.tmpFile != nil {
w.tmpFile.Close()
dir.RemoveFile(w.tmpFilePath)
w.tmpFile = nil
}
}
func (w *WriterOffHeap) build() (*xorfilter.BinaryFuse[uint8], error) {
defer func() {
if w.tmpFile != nil {
w.tmpFile.Close()
w.tmpFile = nil
}
dir.RemoveFile(w.tmpFilePath)
}()
if w.count%bufSize != 0 {
if _, err := w.tmpFile.Write(castToBytes(w.buf[:w.count%bufSize])); err != nil {
return nil, err
}
}
st, err := w.tmpFile.Stat()
if err != nil {
return nil, err
}
sz := int(st.Size())
m, err := mmap.MapRegion(w.tmpFile, sz, mmap.RDONLY, 0, 0)
if err != nil {
return nil, fmt.Errorf("%s %w", w.tmpFilePath, err)
}
defer m.Unmap()
keysHashes := castToArrU64(m[:sz])
filter, err := xorfilter.NewBinaryFuse[uint8](keysHashes)
if err != nil {
return nil, fmt.Errorf("%s %w", w.tmpFilePath, err)
}
return filter, nil
}
// filterBlobHeaderSize is the fixed header size of a serialised BinaryFuse[uint8] blob.
const filterBlobHeaderSize = 1 + 3 + 4 + 4 + 8 + 4 + 4 + 8
// writeFilter serialises a built BinaryFuse filter as a version-0 fusefilter blob.
func writeFilter(features Features, filter *xorfilter.BinaryFuse[uint8], fw io.Writer) (int, error) {
if filter.SegmentCount > math.MaxUint32/2 {
return 0, fmt.Errorf("SegmentCount=%d cannot be greater than u32/2", filter.SegmentCount)
}
const version uint8 = 0
var header [headerSize]byte
binary.BigEndian.PutUint32(header[:], uint32(features)) //nolint:gocritic
header[0] = version
binary.BigEndian.PutUint32(header[4:], filter.SegmentCount)
binary.BigEndian.PutUint32(header[4+4:], filter.SegmentCountLength)
binary.BigEndian.PutUint64(header[4+4+4:], filter.Seed)
binary.BigEndian.PutUint32(header[4+4+4+8:], filter.SegmentLength)
binary.BigEndian.PutUint32(header[4+4+4+8+4:], filter.SegmentLengthMask)
binary.BigEndian.PutUint64(header[4+4+4+8+4+4:], uint64(len(filter.Fingerprints)))
if _, err := fw.Write(header[:]); err != nil { //nolint:gocritic
return 0, err
}
if _, err := fw.Write(filter.Fingerprints); err != nil {
return 0, err
}
return headerSize + len(filter.Fingerprints), nil
}
func (w *WriterOffHeap) AddHash(k uint64) error {
w.buf[w.count%bufSize] = k
w.count++
if w.count%bufSize == 0 {
_, err := w.tmpFile.Write(castToBytes(w.buf[:]))
if err != nil {
return err
}
}
return nil
}
func (w *WriterOffHeap) BuildTo(to io.Writer) (int, error) {
filter, err := w.build()
if err != nil {
return 0, fmt.Errorf("%s %w", w.tmpFilePath, err)
}
return writeFilter(w.features, filter, to)
}
type Writer struct {
filePath string
fileName string
noFsync bool
data *WriterOffHeap
}
func NewWriter(filePath string) (*Writer, error) {
_, fileName := filepath.Split(filePath)
w, err := NewWriterOffHeap(filePath)
if err != nil {
return nil, err
}
return &Writer{
filePath: filePath,
fileName: fileName,
data: w,
}, nil
}
func (w *Writer) DisableFsync() { w.noFsync = true }
func (w *Writer) FileName() string { return w.fileName }
func (w *Writer) AddHash(k uint64) error { return w.data.AddHash(k) }
func (w *Writer) Build() error {
f, err := dir.CreateTemp(w.filePath)
if err != nil {
return fmt.Errorf("%s %w", w.filePath, err)
}
defer dir.RemoveFile(f.Name())
defer f.Close()
fw := bufio.NewWriter(f)
defer fw.Flush()
if _, err = w.data.BuildTo(fw); err != nil {
return fmt.Errorf("%s %w", w.filePath, err)
}
if err = fw.Flush(); err != nil {
return err
}
if !w.noFsync {
if err = f.Sync(); err != nil {
return err
}
}
if err = f.Close(); err != nil {
return err
}
if err = os.Rename(f.Name(), w.filePath); err != nil {
return err
}
return nil
}
func (w *Writer) Close() {
if w.data != nil {
w.data.Close()
w.data = nil
}
}
// WriterSharded shards keys into 256 sub-filters by first byte of keyHash (keyHash >> 56).
// Produces fusefilter file version 1. Lookup checks only the one relevant shard.
// Embeds WriterOffHeap for the shared page-buffer + temp-file machinery.
type WriterSharded struct {
WriterOffHeap
filePath string
noFsync bool
}
func NewWriterSharded(filePath string) (*WriterSharded, error) {
f, err := dir.CreateTempWithExtension(filePath, "existence-sharded.tmp")
if err != nil {
return nil, err
}
return &WriterSharded{
WriterOffHeap: WriterOffHeap{tmpFile: f, tmpFilePath: f.Name(), features: initFeatures()},
filePath: filePath,
}, nil
}
func (w *WriterSharded) DisableFsync() { w.noFsync = true }
// Build writes the sharded fusefilter to w.filePath using the same atomic
// temp-then-rename pattern as Writer.Build.
func (w *WriterSharded) Build() error {
f, err := dir.CreateTemp(w.filePath)
if err != nil {
return fmt.Errorf("%s %w", w.filePath, err)
}
defer dir.RemoveFile(f.Name())
defer f.Close()
fw := bufio.NewWriter(f)
if _, err = w.BuildTo(fw); err != nil {
return fmt.Errorf("%s %w", w.filePath, err)
}
if err = fw.Flush(); err != nil {
return err
}
if !w.noFsync {
if err = f.Sync(); err != nil {
return err
}
}
if err = f.Close(); err != nil {
return err
}
return os.Rename(f.Name(), w.filePath)
}
// BuildTo writes sharded fusefilter (file version 1) to fw.
// Format: [4 bytes header] then 256 × [8 bytes size | blob] pairs (size==0 means absent).
// Fully streaming: no intermediate files, one shard's fingerprints in RAM at a time.
func (w *WriterSharded) BuildTo(fw io.Writer) (int, error) {
defer func() {
if w.tmpFile != nil {
w.tmpFile.Close()
w.tmpFile = nil
}
dir.RemoveFile(w.tmpFilePath)
}()
if rem := w.count % bufSize; rem != 0 {
if _, err := w.tmpFile.Write(castToBytes(w.buf[:rem])); err != nil {
return 0, err
}
}
st, err := w.tmpFile.Stat()
if err != nil {
return 0, err
}
sz := int(st.Size())
if sz == 0 {
return 0, fmt.Errorf("WriterSharded: no keys added")
}
m, err := mmap.MapRegion(w.tmpFile, sz, mmap.RDWR, 0, 0)
if err != nil {
return 0, fmt.Errorf("%s %w", w.tmpFilePath, err)
}
defer m.Unmap()
all := castToArrU64(m[:sz])
slices.Sort(all) // ascending sort groups hashes by top byte = shard index
// Find the largest shard so MakeBinaryFuseBuilder preallocates buffers that
// fit every shard. Without this, sizing the builder to the average shard
// (len(all)/256) leaves xorfilter's reuseBuffer to grow buffers on the first
// above-average shard via `*buf = append((*buf)[:0], make([]T, size)...)`,
// which holds the old AND new buffers live until GC — doubling peak heap
// briefly. This prescan is one sequential walk over the just-sorted (and
// thus cache-warm) mmap, so it's essentially free.
var maxShardSize int
{
i := 0
for shard := range 256 {
start := i
for i < len(all) && all[i]>>56 == uint64(shard) {
i++
}
if sz := i - start; sz > maxShardSize {
maxShardSize = sz
}
}
}
const version1 uint8 = 1
var header [4]byte
binary.BigEndian.PutUint32(header[:], uint32(w.features))
header[0] = version1
if _, err := fw.Write(header[:]); err != nil {
return 0, err
}
total := 4
builder := xorfilter.MakeBinaryFuseBuilder[uint8](maxShardSize)
var sizeBuf [8]byte
i := 0
for shard := range 256 {
start := i
for i < len(all) && all[i]>>56 == uint64(shard) {
i++
}
if start == i {
binary.BigEndian.PutUint64(sizeBuf[:], 0)
if _, err := fw.Write(sizeBuf[:]); err != nil {
return 0, err
}
total += 8
continue
}
filter, err := xorfilter.BuildBinaryFuse[uint8](&builder, all[start:i])
if err != nil {
return 0, fmt.Errorf("shard %d: %w", shard, err)
}
blobSize := filterSize(&filter)
binary.BigEndian.PutUint64(sizeBuf[:], uint64(blobSize))
if _, err := fw.Write(sizeBuf[:]); err != nil {
return 0, err
}
n, err := writeFilter(w.features, &filter, fw)
if err != nil {
return 0, fmt.Errorf("shard %d: %w", shard, err)
}
total += 8 + n
}
return total, nil
}
func filterSize(f *xorfilter.BinaryFuse[uint8]) int {
return filterBlobHeaderSize + len(f.Fingerprints)
}
// castToBytes converts []uint64 to []byte without copying data
func castToBytes(in []uint64) []byte {
if len(in) == 0 {
return nil
}
return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*8) // each uint64 is 8 bytes
}
// castToArrU64 converts []byte to []uint64 without copying data on little endian machines
func castToArrU64(b []byte) []uint64 {
if len(b) == 0 {
return nil
}
if len(b)%8 != 0 {
panic("byte slice length must be a multiple of 8")
}
return unsafe.Slice((*uint64)(unsafe.Pointer(&b[0])), len(b)/8)
}
// Copyright 2021 The Erigon Authors
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package eliasfano16
import (
"encoding/binary"
"io"
"math"
"math/bits"
"unsafe"
"github.com/c2h5oh/datasize"
"github.com/erigontech/erigon/db/recsplit/efcommon"
"github.com/erigontech/erigon/common/bitutil"
)
// EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html
// P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974.
// Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf
const (
log2q uint64 = 8
q uint64 = 1 << log2q
qMask = q - 1
superQ uint64 = 1 << 14
superQMask = superQ - 1
qPerSuperQ = superQ / q // 64
superQSize = 1 + qPerSuperQ/4 // 1 + 64/4 = 17
)
// EliasFano can be used to encode one monotone sequence
type EliasFano struct {
data []uint64
lowerBits []uint64
upperBits []uint64
jump []uint64
lowerBitsMask uint64
count uint64
u uint64
l uint64
maxOffset uint64
minDelta uint64
i uint64
delta uint64
wordsUpperBits int
}
func NewEliasFano(count uint64, maxOffset, minDelta uint64) *EliasFano {
//fmt.Printf("count=%d,maxOffset=%d,minDelta=%d\n", count, maxOffset, minDelta)
ef := &EliasFano{
count: count - 1,
maxOffset: maxOffset,
minDelta: minDelta,
}
ef.u = maxOffset - ef.count*ef.minDelta + 1
ef.wordsUpperBits = ef.deriveFields()
return ef
}
func (ef *EliasFano) AddOffset(offset uint64) {
//fmt.Printf("0x%x,\n", offset)
if ef.l != 0 {
setBits(ef.lowerBits, ef.i*ef.l, (offset-ef.delta)&ef.lowerBitsMask)
}
//pos := ((offset - ef.delta) >> ef.l) + ef.i
set(ef.upperBits, ((offset-ef.delta)>>ef.l)+ef.i)
//fmt.Printf("add:%x, pos=%x, set=%x, res=%x\n", offset, pos, pos/64, uint64(1)<<(pos%64))
ef.i++
ef.delta += ef.minDelta
}
func (ef *EliasFano) jumpSizeWords() int {
size := ((ef.count + 1) / superQ) * superQSize // Whole blocks
if (ef.count+1)%superQ != 0 {
size += 1 + (((ef.count+1)%superQ+q-1)/q+3)/4 // Partial block
}
return int(size)
}
func (ef *EliasFano) deriveFields() int {
if ef.u/(ef.count+1) == 0 {
ef.l = 0
} else {
ef.l = 63 ^ uint64(bits.LeadingZeros64(ef.u/(ef.count+1))) // pos of first non-zero bit
//fmt.Printf("lllllllll: %d, %d\n", 63^uint64(bits.LeadingZeros64(24/7)), msb(ef.u/(ef.count+1)))
}
ef.lowerBitsMask = (uint64(1) << ef.l) - 1
wordsLowerBits := int(((ef.count+1)*ef.l+63)/64 + 1)
wordsUpperBits := int((ef.count + 1 + (ef.u >> ef.l) + 63) / 64)
jumpWords := ef.jumpSizeWords()
totalWords := wordsLowerBits + wordsUpperBits + jumpWords
if ef.data == nil {
ef.data = make([]uint64, totalWords)
} else {
ef.data = ef.data[:totalWords]
}
ef.lowerBits = ef.data[:wordsLowerBits]
ef.upperBits = ef.data[wordsLowerBits : wordsLowerBits+wordsUpperBits]
ef.jump = ef.data[wordsLowerBits+wordsUpperBits:]
return wordsUpperBits
}
const jumpOffsetOverflowMsg = "eliasfano16: superQ-block span exceeds the 16-bit jump offset; sequence too sparse for the compact variant, use eliasfano32"
// Build constructs the Elias-Fano jump table; it panics if the sequence is too
// sparse for this variant's 16-bit jump offsets (encode such data with eliasfano32).
func (ef *EliasFano) Build() {
if !ef.build() {
panic(jumpOffsetOverflowMsg)
}
}
// build fills the jump table, returning false (with the table left partial, so
// the result must be discarded) when a jump offset exceeds 16 bits, so callers
// can reject out-of-range sequences instead of panicking.
func (ef *EliasFano) build() bool {
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*superQSize] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
offset := i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
if offset >= (1 << 16) { // must fit the 16-bit jump slot
return false
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * superQSize
jumpInsideSuperQ := (c % superQ) / q
idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
return true
}
func (ef *EliasFano) get(i uint64) (val, window uint64, sel int, currWord, lower, delta uint64) {
lower = i * ef.l
idx64 := lower / 64
shift := lower % 64
lower = ef.lowerBits[idx64] >> shift
if shift > 0 {
lower |= ef.lowerBits[idx64+1] << (64 - shift)
}
jumpSuperQ := (i / superQ) * superQSize
jumpInsideSuperQ := (i % superQ) / q
idx64 = jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
shift = 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
jump := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift
currWord = jump / 64
window = ef.upperBits[currWord] & (uint64(0xffffffffffffffff) << (jump % 64))
d := int(i & qMask)
for bitCount := bits.OnesCount64(window); bitCount <= d; bitCount = bits.OnesCount64(window) {
currWord++
window = ef.upperBits[currWord]
d -= bitCount
}
sel = bitutil.Select64(window, d)
delta = i * ef.minDelta
val = ((currWord*64+uint64(sel)-i)<<ef.l | (lower & ef.lowerBitsMask)) + delta
return
}
func (ef *EliasFano) Get(i uint64) uint64 {
val, _, _, _, _, _ := ef.get(i)
return val
}
func (ef *EliasFano) Get2(i uint64) (val, valNext uint64) {
var window uint64
var sel int
var currWord uint64
var lower uint64
var delta uint64
val, window, sel, currWord, lower, delta = ef.get(i)
window &= (uint64(0xffffffffffffffff) << sel) << 1
for window == 0 {
currWord++
window = ef.upperBits[currWord]
}
lower >>= ef.l
valNext = ((currWord*64+uint64(bits.TrailingZeros64(window))-i-1)<<ef.l | (lower & ef.lowerBitsMask)) + delta + ef.minDelta
return
}
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (ef *EliasFano) Write(w io.Writer) error {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], ef.count)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.u)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.minDelta)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
p := (*[maxDataSize]byte)(unsafe.Pointer(&ef.data[0]))
b := (*p)[:]
if _, e := w.Write(b[:len(ef.data)*8]); e != nil {
return e
}
return nil
}
// Read inputs the state of golomb rice encoding from a reader s
func ReadEliasFano(r []byte) (*EliasFano, int) {
ef := &EliasFano{}
ef.count = binary.BigEndian.Uint64(r[:8])
ef.u = binary.BigEndian.Uint64(r[8:16])
ef.minDelta = binary.BigEndian.Uint64(r[16:24])
p := (*[maxDataSize / 8]uint64)(unsafe.Pointer(&r[24]))
ef.data = p[:]
ef.deriveFields()
return ef, 24 + 8*len(ef.data)
}
const maxDataSize = 0xFFFFFFFFFFFF
// DoubleEliasFano can be used to encode two monotone sequences
// it is called "double" because the lower bits array contains two sequences interleaved
type DoubleEliasFano struct {
data []uint64
lowerBits []uint64
upperBitsPosition []uint64
upperBitsCumKeys []uint64
jump []uint64
lowerBitsMaskCumKeys uint64
lowerBitsMaskPosition uint64
numBuckets uint64
uCumKeys uint64
uPosition uint64
lPosition uint64
lCumKeys uint64
cumKeysMinDelta uint64
posMinDelta uint64
}
func (ef *DoubleEliasFano) Size() datasize.ByteSize { return datasize.ByteSize(len(ef.data) * 8) }
func (ef *DoubleEliasFano) deriveFields() (int, int) {
r := efcommon.DeriveDoubleEFFields(ef.numBuckets, ef.uCumKeys, ef.uPosition, ef.data, ef.jumpSizeWords())
ef.lPosition = r.LPosition
ef.lCumKeys = r.LCumKeys
ef.lowerBitsMaskCumKeys = r.LowerBitsMaskCumKeys
ef.lowerBitsMaskPosition = r.LowerBitsMaskPosition
ef.data = r.Data
ef.lowerBits = r.LowerBits
ef.upperBitsCumKeys = r.UpperBitsCumKeys
ef.upperBitsPosition = r.UpperBitsPosition
ef.jump = r.Jump
return r.WordsCumKeys, r.WordsPosition
}
// Build constructs the double Elias-Fano jump table; it panics if either
// sequence is too sparse for this variant's 16-bit jump offsets (use eliasfano32).
func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
if !ef.build(cumKeys, position) {
panic(jumpOffsetOverflowMsg)
}
}
// build mirrors (*EliasFano).build: it returns false (jump table left partial)
// on a jump offset exceeding 16 bits instead of panicking.
func (ef *DoubleEliasFano) build(cumKeys []uint64, position []uint64) bool {
if len(cumKeys) != len(position) {
panic("len(cumKeys) != len(position)")
}
ef.numBuckets = uint64(len(cumKeys) - 1)
ef.posMinDelta = math.MaxUint64
ef.cumKeysMinDelta = math.MaxUint64
for i := uint64(1); i <= ef.numBuckets; i++ {
if cumKeys[i] < cumKeys[i-1] {
panic("cumKeys[i] <= cumKeys[i-1]")
}
nkeysDelta := cumKeys[i] - cumKeys[i-1]
if nkeysDelta < ef.cumKeysMinDelta {
ef.cumKeysMinDelta = nkeysDelta
}
if position[i] < position[i-1] {
panic("position[i] < position[i-1]")
}
bucketBits := position[i] - position[i-1]
if bucketBits < ef.posMinDelta {
ef.posMinDelta = bucketBits
}
}
//fmt.Printf("cumKeysMinDelta = %d, posMinDelta = %d\n", ef.cumKeysMinDelta, ef.posMinDelta)
ef.uPosition = position[ef.numBuckets] - ef.numBuckets*ef.posMinDelta + 1
ef.uCumKeys = cumKeys[ef.numBuckets] - ef.numBuckets*ef.cumKeysMinDelta + 1 // Largest possible encoding of the cumKeys
wordsCumKeys, wordsPosition := ef.deriveFields()
for i, cumDelta, bitDelta := uint64(0), uint64(0), uint64(0); i <= ef.numBuckets; i, cumDelta, bitDelta = i+1, cumDelta+ef.cumKeysMinDelta, bitDelta+ef.posMinDelta {
if ef.lCumKeys != 0 {
//fmt.Printf("i=%d, set_bits cum for %d = %b\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys)
setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition), (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys)
//fmt.Printf("loweBits %b\n", ef.lowerBits)
}
set(ef.upperBitsCumKeys, ((cumKeys[i]-cumDelta)>>ef.lCumKeys)+i)
//fmt.Printf("i=%d, set cum for %d = %d\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)>>ef.lCumKeys+i)
if ef.lPosition != 0 {
//fmt.Printf("i=%d, set_bits pos for %d = %b\n", i, position[i]-bitDelta, (position[i]-bitDelta)&ef.lowerBitsMaskPosition)
setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition)+ef.lCumKeys, (position[i]-bitDelta)&ef.lowerBitsMaskPosition)
//fmt.Printf("lowerBits %b\n", ef.lowerBits)
}
set(ef.upperBitsPosition, ((position[i]-bitDelta)>>ef.lPosition)+i)
//fmt.Printf("i=%d, set pos for %d = %d\n", i, position[i]-bitDelta, (position[i]-bitDelta)>>ef.lPosition+i)
}
//fmt.Printf("loweBits %b\n", ef.lowerBits)
//fmt.Printf("upperBitsCumKeys %b\n", ef.upperBitsCumKeys)
//fmt.Printf("upperBitsPosition %b\n", ef.upperBitsPosition)
// i iterates over the 64-bit words in the wordCumKeys vector
// c iterates over bits in the wordCumKeys
// lastSuperQ is the largest multiple of 2^14 (4096) which is no larger than c
// c/superQ is the index of the current 4096 block of bits
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
offset := i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
if offset >= (1 << 16) { // must fit the 16-bit jump slot
return false
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2 * (c % superQ) / q
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
}
if (c & qMask) == 0 {
offset := i*64 + b - lastSuperQ
if offset >= (1 << 16) { // must fit the 16-bit jump slot
return false
}
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
return true
}
// setBits stores a value at bit position start.
// All callers write in monotonic order, so target bits are guaranteed zero
// and we can use |= instead of clear-and-set. The lowerBits slice always
// has +1 padding word, making the unconditional second write safe.
// When shift+width <= 64, value>>(64-shift) == 0, so the write is a no-op.
func setBits(bits []uint64, start uint64, value uint64) {
idx64, shift := start>>6, int(start&63)
_ = bits[idx64+1] // BCE hint: proves both accesses are in-bounds
bits[idx64] |= value << shift
bits[idx64+1] |= value >> (64 - shift)
}
func set(bits []uint64, pos uint64) {
//bits[pos>>6] |= uint64(1) << (pos & 63)
bits[pos/64] |= uint64(1) << (pos % 64)
}
func (ef *DoubleEliasFano) jumpSizeWords() int {
size := ((ef.numBuckets + 1) / superQ) * superQSize * 2 // Whole blocks
if (ef.numBuckets+1)%superQ != 0 {
size += (1 + (((ef.numBuckets+1)%superQ+q-1)/q+3)/4) * 2 // Partial block
}
return int(size)
}
// Data returns binary representation of double Ellias-Fano index that has been built
func (ef *DoubleEliasFano) Data() []uint64 {
return ef.data
}
func (ef *DoubleEliasFano) get2(i uint64) (cumKeys, position uint64,
windowCumKeys uint64, selectCumKeys int, currWordCumKeys, lower, cumDelta uint64) {
posLower := i * (ef.lCumKeys + ef.lPosition)
idx64, shift := posLower/64, posLower%64
lower = ef.lowerBits[idx64] >> shift
if shift > 0 {
lower |= ef.lowerBits[idx64+1] << (64 - shift)
}
//fmt.Printf("i = %d, posLower = %d, lower = %b\n", i, posLower, lower)
jumpSuperQ := (i / superQ) * superQSize * 2
jumpInsideSuperQ := (i % superQ) / q
idx16 := 4*(jumpSuperQ+2) + 2*jumpInsideSuperQ
idx64 = idx16 / 4
shift = 16 * (idx16 % 4)
mask := uint64(0xffff) << shift
jumpCumKeys := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift
idx16++
idx64 = idx16 / 4
shift = 16 * (idx16 % 4)
mask = uint64(0xffff) << shift
jumpPosition := ef.jump[jumpSuperQ+1] + (ef.jump[idx64]&mask)>>shift
//fmt.Printf("i = %d, jumpCumKeys = %d, jumpPosition = %d\n", i, jumpCumKeys, jumpPosition)
currWordCumKeys = jumpCumKeys / 64
currWordPosition := jumpPosition / 64
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys] & (uint64(0xffffffffffffffff) << (jumpCumKeys % 64))
windowPosition := ef.upperBitsPosition[currWordPosition] & (uint64(0xffffffffffffffff) << (jumpPosition % 64))
deltaCumKeys := int(i & qMask)
deltaPosition := int(i & qMask)
for bitCount := bits.OnesCount64(windowCumKeys); bitCount <= deltaCumKeys; bitCount = bits.OnesCount64(windowCumKeys) {
//fmt.Printf("i = %d, bitCount cum = %d\n", i, bitCount)
currWordCumKeys++
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys]
deltaCumKeys -= bitCount
}
for bitCount := bits.OnesCount64(windowPosition); bitCount <= deltaPosition; bitCount = bits.OnesCount64(windowPosition) {
//fmt.Printf("i = %d, bitCount pos = %d\n", i, bitCount)
currWordPosition++
windowPosition = ef.upperBitsPosition[currWordPosition]
deltaPosition -= bitCount
}
selectCumKeys = bitutil.Select64(windowCumKeys, deltaCumKeys)
//fmt.Printf("i = %d, select cum in %b for %d = %d\n", i, windowCumKeys, deltaCumKeys, selectCumKeys)
cumDelta = i * ef.cumKeysMinDelta
cumKeys = ((currWordCumKeys*64+uint64(selectCumKeys)-i)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta
lower >>= ef.lCumKeys
//fmt.Printf("i = %d, lower = %b\n", i, lower)
selectPosition := bitutil.Select64(windowPosition, deltaPosition)
//fmt.Printf("i = %d, select pos in %b for %d = %d\n", i, windowPosition, deltaPosition, selectPosition)
bitDelta := i * ef.posMinDelta
position = ((currWordPosition*64+uint64(selectPosition)-i)<<ef.lPosition | (lower & ef.lowerBitsMaskPosition)) + bitDelta
return
}
func (ef *DoubleEliasFano) Get2(i uint64) (cumKeys, position uint64) {
cumKeys, position, _, _, _, _, _ = ef.get2(i)
return
}
func (ef *DoubleEliasFano) Get3(i uint64) (cumKeys, cumKeysNext, position uint64) {
var (
windowCumKeys, currWordCumKeys, lower, cumDelta uint64
selectCumKeys int
)
cumKeys, position, windowCumKeys, selectCumKeys, currWordCumKeys, lower, cumDelta = ef.get2(i)
windowCumKeys &= (uint64(0xffffffffffffffff) << selectCumKeys) << 1
for windowCumKeys == 0 {
currWordCumKeys++
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys]
}
lower >>= ef.lPosition
cumKeysNext = ((currWordCumKeys*64+uint64(bits.TrailingZeros64(windowCumKeys))-i-1)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta + ef.cumKeysMinDelta
return
}
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (ef *DoubleEliasFano) Write(w io.Writer) error {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], ef.numBuckets)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.uCumKeys)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.uPosition)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.cumKeysMinDelta)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.posMinDelta)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
p := (*[maxDataSize]byte)(unsafe.Pointer(&ef.data[0]))
b := (*p)[:]
if _, e := w.Write(b[:len(ef.data)*8]); e != nil {
return e
}
return nil
}
// Read inputs the state of golomb rice encoding from a reader s
func (ef *DoubleEliasFano) Read(r []byte) int {
ef.numBuckets = binary.BigEndian.Uint64(r[:8])
ef.uCumKeys = binary.BigEndian.Uint64(r[8:16])
ef.uPosition = binary.BigEndian.Uint64(r[16:24])
ef.cumKeysMinDelta = binary.BigEndian.Uint64(r[24:32])
ef.posMinDelta = binary.BigEndian.Uint64(r[32:40])
p := (*[maxDataSize / 8]uint64)(unsafe.Pointer(&r[40]))
ef.data = p[:]
ef.deriveFields()
return 40 + 8*len(ef.data)
}
// Copyright 2022 The Erigon Authors
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package eliasfano32
import (
"encoding/binary"
"fmt"
"io"
"math"
"math/bits"
"os"
"sort"
"unsafe"
"github.com/c2h5oh/datasize"
"github.com/edsrzf/mmap-go"
"github.com/erigontech/erigon/db/recsplit/efcommon"
"github.com/erigontech/erigon/common/bitutil"
"github.com/erigontech/erigon/common/dir"
"github.com/erigontech/erigon/db/kv/stream"
)
// EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html
// P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974.
// Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf
// Quasi-Succinct Indices, Sebastiano Vigna https://arxiv.org/pdf/1206.4300
const (
log2q uint64 = 8
q uint64 = 1 << log2q
qMask uint64 = q - 1
superQ uint64 = 1 << 14
superQMask uint64 = superQ - 1
qPerSuperQ uint64 = superQ / q // 64
superQSize uint64 = 1 + qPerSuperQ/2 // 1 + 64/2 = 33
)
// EliasFano can be used to encode one monotone sequence
type EliasFano struct {
data []uint64
lowerBits []uint64
upperBits []uint64
jump []uint64
lowerBitsMask uint64
count uint64
u uint64
l uint64 // number of lower bits per element. = floor(log2(average gap between elements))
maxOffset uint64
i uint64
wordsUpperBits int
}
// OffHeapBuilder wraps *EliasFano with a mmapped temp file as the backing
// buffer. OS resources (file descriptor + mmap) live here rather than on
// EliasFano so heap-backed EliasFano carries no extra overhead.
type OffHeapBuilder struct {
*EliasFano
backingMmap mmap.MMap
backingFile *os.File
}
func (b *OffHeapBuilder) Close() {
if b.backingMmap != nil {
_ = b.backingMmap.Unmap()
b.backingMmap = nil
// Nil the sub-slices so any use-after-Close panics immediately
// instead of silently reading unmapped memory.
b.EliasFano.data = nil
b.EliasFano.lowerBits = nil
b.EliasFano.upperBits = nil
b.EliasFano.jump = nil
}
if b.backingFile != nil {
name := b.backingFile.Name()
_ = b.backingFile.Close()
dir.RemoveFile(name)
b.backingFile = nil
}
}
func NewEliasFano(count uint64, maxOffset uint64) *EliasFano {
if count == 0 {
panic(fmt.Sprintf("too small count: %d", count))
}
//fmt.Printf("count=%d,maxOffset=%d,minDelta=%d\n", count, maxOffset, minDelta)
ef := &EliasFano{
count: count - 1,
maxOffset: maxOffset,
}
ef.u = maxOffset + 1
ef.wordsUpperBits = ef.deriveFields()
return ef
}
// fallocate extends f to size bytes and forces disk-block allocation. On
// sparse-file filesystems (Linux ext4) an RDWR mmap write to an unallocated
// region raises SIGBUS; this write surfaces ENOSPC as a normal error instead.
func fallocate(f *os.File, size int64) error {
_, err := f.WriteAt([]byte{0}, size-1)
return err
}
// NewEliasFanoOffHeap is like NewEliasFano but backs the data buffer with a
// mmapped temp file. Use when the buffer would be too large for the Go heap
// (multi-GB EFs during snapshot builds). Caller MUST Close() after Write.
func NewEliasFanoOffHeap(count uint64, maxOffset uint64, tmpFilePath string) (_ *OffHeapBuilder, err error) {
if count == 0 {
panic(fmt.Sprintf("too small count: %d", count))
}
ef := &EliasFano{
count: count - 1,
maxOffset: maxOffset,
}
ef.u = maxOffset + 1
_, _, _, totalWords := ef.computeLayout()
sizeBytes := int64(totalWords) * uint64Size
if sizeBytes > math.MaxInt {
return nil, fmt.Errorf("elias fano size %d exceeds platform mmap limit", sizeBytes)
}
f, err := dir.CreateTempWithExtension(tmpFilePath, "ef.tmp")
if err != nil {
return nil, fmt.Errorf("create ef tmp file %q: %w", tmpFilePath, err)
}
defer func() {
if err != nil {
f.Close()
dir.RemoveFile(f.Name())
}
}()
if err := fallocate(f, sizeBytes); err != nil {
return nil, fmt.Errorf("pre-allocate ef tmp file: %w", err)
}
m, err := mmap.MapRegion(f, int(sizeBytes), mmap.RDWR, 0, 0)
if err != nil {
return nil, fmt.Errorf("mmap ef tmp file: %w", err)
}
defer func() {
if err != nil {
_ = m.Unmap() //nolint
}
}()
ef.data = unsafe.Slice((*uint64)(unsafe.Pointer(&m[0])), totalWords)
ef.wordsUpperBits = ef.deriveFields()
return &OffHeapBuilder{EliasFano: ef, backingMmap: m, backingFile: f}, nil
}
func (ef *EliasFano) Size() datasize.ByteSize { return datasize.ByteSize(len(ef.data) * 8) }
func (ef *EliasFano) AddOffset(offset uint64) {
//fmt.Printf("0x%x,\n", offset)
if ef.l != 0 {
setBits(ef.lowerBits, ef.i*ef.l, offset&ef.lowerBitsMask)
}
//pos := ((offset - ef.delta) >> ef.l) + ef.i
set(ef.upperBits, (offset>>ef.l)+ef.i)
//fmt.Printf("add:%x, pos=%x, set=%x, res=%x\n", offset, pos, pos/64, uint64(1)<<(pos%64))
ef.i++
}
func (ef *EliasFano) jumpSizeWords() int {
size := ((ef.count + 1) / superQ) * superQSize // Whole blocks
if (ef.count+1)%superQ != 0 {
size += 1 + (((ef.count+1)%superQ+q-1)/q+3)/2 // Partial block
}
return int(size)
}
// computeLayout returns the bit-width l and the word counts that partition ef.data.
// Shared by deriveFields (heap path) and NewEliasFanoOffHeap (mmap pre-sizing).
func (ef *EliasFano) computeLayout() (l uint64, wordsLowerBits, wordsUpperBits, totalWords int) {
if ef.u/(ef.count+1) != 0 {
l = 63 ^ uint64(bits.LeadingZeros64(ef.u/(ef.count+1)))
}
wordsLowerBits = int(((ef.count+1)*l+63)/64 + 1)
wordsUpperBits = int((ef.count + 1 + (ef.u >> l) + 63) / 64)
totalWords = wordsLowerBits + wordsUpperBits + ef.jumpSizeWords()
return
}
func (ef *EliasFano) deriveFields() int {
l, wordsLowerBits, wordsUpperBits, totalWords := ef.computeLayout()
ef.l = l
ef.lowerBitsMask = (uint64(1) << ef.l) - 1
//fmt.Printf("EF: %d, %d,%d,%d\n", totalWords, wordsLowerBits, wordsUpperBits, jumpWords)
if cap(ef.data) < totalWords {
alloc := totalWords
if c := cap(ef.data); c > 0 { // means `ef` object is used as re-usable buffer. then re-alloc in `append()` style: grow at-least 2x times - to amortize future re-allocs
alloc = max(totalWords, c*2)
}
ef.data = make([]uint64, totalWords, alloc)
} else {
ef.data = ef.data[:totalWords]
}
ef.lowerBits = ef.data[:wordsLowerBits]
ef.upperBits = ef.data[wordsLowerBits : wordsLowerBits+wordsUpperBits]
ef.jump = ef.data[wordsLowerBits+wordsUpperBits:]
return wordsUpperBits
}
// ResetForWrite reinitializes the EliasFano for writing a new sequence, reusing
// the existing data slice if it has sufficient capacity (avoiding allocation).
// The caller must call Build() after all AddOffset calls, same as with NewEliasFano.
func (ef *EliasFano) ResetForWrite(count, maxOffset uint64) {
ef.count = count - 1
ef.maxOffset = maxOffset
ef.u = maxOffset + 1
ef.i = 0
ef.wordsUpperBits = ef.deriveFields()
// Zero out the backing array so OR-style setBits starts from a clean slate.
// deriveFields() may have resliced ef.data without zeroing it.
clear(ef.data)
}
const jumpOffsetOverflowMsg = "eliasfano32: superQ-block span exceeds the 32-bit jump offset"
// Build constructs the Elias-Fano jump table; it panics if the sequence is too
// sparse for the 32-bit jump offsets (unreachable for any realistic universe).
func (ef *EliasFano) Build() {
if !ef.build() {
panic(jumpOffsetOverflowMsg)
}
}
// build fills the jump table, returning false (with the table left partial, so
// the result must be discarded) when a jump offset exceeds 32 bits, so callers
// can reject out-of-range sequences instead of panicking.
func (ef *EliasFano) build() bool {
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*superQSize] = lastSuperQ
}
if (c & qMask) != 0 {
c++
continue
}
// When c is multiple of 2^8 (256)
offset := i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
if offset >= (1 << 32) { // must fit the 32-bit jump slot
return false
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * superQSize
jumpInsideSuperQ := (c % superQ) / q
idx64, shift := jumpSuperQ+1+(jumpInsideSuperQ>>1), 32*(jumpInsideSuperQ%2)
mask := uint64(0xffffffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
c++
}
}
return true
}
func (ef *EliasFano) get(i uint64) (val uint64, window uint64, sel int, currWord uint64, lower uint64) {
if ef.l != 0 {
lowerPos := i * ef.l
idx64, shift := lowerPos/64, lowerPos%64
lower = ef.lowerBits[idx64] >> shift
if shift > 0 {
lower |= ef.lowerBits[idx64+1] << (64 - shift)
}
}
jumpSuperQ := (i / superQ) * superQSize
jumpInsideSuperQ := (i % superQ) / q
idx64, shift := jumpSuperQ+1+(jumpInsideSuperQ>>1), 32*(jumpInsideSuperQ%2)
mask := uint64(0xffffffff) << shift
jump := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift
currWord = jump / 64
window = ef.upperBits[currWord] & (uint64(0xffffffffffffffff) << (jump % 64))
d := int(i & qMask)
for bitCount := bits.OnesCount64(window); bitCount <= d; bitCount = bits.OnesCount64(window) {
currWord++
window = ef.upperBits[currWord]
d -= bitCount
}
sel = bitutil.Select64(window, d)
val = (currWord*64+uint64(sel)-i)<<ef.l | (lower & ef.lowerBitsMask)
return
}
func (ef *EliasFano) Get(i uint64) uint64 {
val, _, _, _, _ := ef.get(i)
return val
}
func (ef *EliasFano) Get2(i uint64) (val uint64, valNext uint64) {
var window uint64
var sel int
var currWord uint64
var lower uint64
val, window, sel, currWord, lower = ef.get(i)
window &= (uint64(0xffffffffffffffff) << sel) << 1
for window == 0 {
currWord++
window = ef.upperBits[currWord]
}
lower >>= ef.l
valNext = (currWord*64+uint64(bits.TrailingZeros64(window))-i-1)<<ef.l | (lower & ef.lowerBitsMask)
return
}
func (ef *EliasFano) upper(i uint64) uint64 {
jumpSuperQ := (i / superQ) * superQSize
jumpInsideSuperQ := (i % superQ) / q
idx64, shift := jumpSuperQ+1+(jumpInsideSuperQ>>1), 32*(jumpInsideSuperQ%2)
mask := uint64(0xffffffff) << shift
jump := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift
currWord := jump / 64
window := ef.upperBits[currWord] & (uint64(0xffffffffffffffff) << (jump % 64))
d := int(i & qMask)
for bitCount := bits.OnesCount64(window); bitCount <= d; bitCount = bits.OnesCount64(window) {
currWord++
window = ef.upperBits[currWord]
d -= bitCount
}
sel := bitutil.Select64(window, d)
return currWord*64 + uint64(sel) - i
}
func Seek(data []byte, n uint64) (uint64, uint64, bool) {
ef, _ := ReadEliasFano(data) //for better perf: app-code can use ef.Reset(data).Seek(n)
return ef.Seek(n)
}
func (ef *EliasFano) searchForward(v uint64) (val uint64, pos uint64, ok bool) {
if v == 0 {
return ef.Min(), 0, true // .Min() touching `mmap`
}
_max := ef.Max()
if v == _max {
return _max, ef.count, true
}
if v > _max { // ~3% search-miss on mainnet (up to 15% at certain block ranges)
return 0, 0, false
}
hi := v >> ef.l
var lo uint64
// Real-data (eth-mainnet):
// - 80% of seeks are on tiny EFs: 35% upperBits=8bytes, 45% upperBits=8-24bytes ( 1–3 words = 8–24 bytes, fits in one cache line)
// - 63% have upper(0) >= hi
found := ef.upper(0) >= hi // fast-lane
if !found {
lo = ef.searchUpperForward(hi)
}
for pos = lo; pos <= ef.count; pos++ {
val, _, _, _, _ = ef.get(pos)
if val >= v {
return val, pos, true
}
}
return 0, 0, false
}
// searchUpperForward finds the first index j in [1, count] where upper(j) >= hi.
// Interpolation guess + exponential search to find a tight bracket, then binary search.
// For uniform data the guess is nearly exact: 1–3 upper() calls.
// For non-uniform data degrades gracefully to ~binary search.
func (ef *EliasFano) searchUpperForward(hi uint64) uint64 {
lo, hiIdx := uint64(0), ef.count
if maxUpper := ef.u >> ef.l; maxUpper > 0 {
guessHi, guessLo := bits.Mul64(hi, ef.count)
guess, _ := bits.Div64(guessHi, guessLo, maxUpper)
guess = min(guess, ef.count)
if guess == 0 {
guess = 1
}
if ef.upper(guess) >= hi {
// bracket backward: find lo where upper(lo) < hi
hiIdx = guess
for step := uint64(1); step <= guess; step <<= 1 {
if ef.upper(guess-step) < hi {
lo = guess - step
break
}
hiIdx = guess - step // tighten upper bound
}
} else {
// bracket forward: find hiIdx where upper(hiIdx) >= hi
lo = guess
for step := uint64(1); ; step <<= 1 {
pos := guess + step
if pos >= ef.count {
break
}
if ef.upper(pos) >= hi {
hiIdx = pos
break
}
lo = pos // tighten lower bound
}
}
}
n := int(hiIdx - lo)
if n <= 0 {
return lo + 1
}
i := sort.Search(n, func(i int) bool {
return ef.upper(lo+uint64(i)+1) >= hi
})
return lo + uint64(i) + 1
}
// searchUpperReverse finds the offset from count where upper(count-offset) <= hi.
func (ef *EliasFano) searchUpperReverse(hi uint64) uint64 {
lo, hiIdx := uint64(0), ef.count
if maxUpper := ef.u >> ef.l; maxUpper > 0 && hi < maxUpper {
// guess how far back from count the answer is
rem := maxUpper - hi
guessHi, guessLo := bits.Mul64(rem, ef.count)
guess, _ := bits.Div64(guessHi, guessLo, maxUpper)
guess = min(guess, ef.count)
if guess == 0 {
guess = 1
}
if ef.upper(ef.count-guess) <= hi {
// bracket backward (toward count): find lo where upper(count-lo) > hi
hiIdx = guess
for step := uint64(1); step <= guess; step <<= 1 {
if ef.upper(ef.count-guess+step) > hi {
lo = guess - step
break
}
hiIdx = guess - step // tighten upper bound
}
} else {
// bracket forward (away from count)
lo = guess
for step := uint64(1); ; step <<= 1 {
pos := guess + step
if pos >= ef.count {
break
}
if ef.upper(ef.count-pos) <= hi {
hiIdx = pos
break
}
lo = pos // tighten lower bound
}
}
}
n := int(hiIdx - lo)
if n <= 0 {
return lo + 1
}
i := sort.Search(n+1, func(i int) bool {
return ef.upper(ef.count-lo-uint64(i)) <= hi
})
return lo + uint64(i)
}
func (ef *EliasFano) searchReverse(v uint64) (val uint64, pos uint64, ok bool) {
if v == 0 {
return 0, 0, ef.Min() == 0 // .Max() touching `mmap`
}
_max := ef.Max() // .Max() doesn't touch `mmap`
if v == _max {
return _max, ef.count, true
}
if v > _max { // reverse: v beyond range still returns max (not a miss)
return _max, ef.count, true
}
hi := v >> ef.l
var lo uint64
found := ef.upper(ef.count) <= hi // fast-lane. 60% hit-rate
if !found {
lo = ef.searchUpperReverse(hi)
}
for j := lo; j <= ef.count; j++ {
pos = ef.count - j
val, _, _, _, _ = ef.get(pos)
if val <= v {
return val, pos, true
}
}
return 0, 0, false
}
// Seek returns the value and its 0-based position in the sequence, equal or greater than given value
func (ef *EliasFano) Seek(v uint64) (uint64, uint64, bool) {
return ef.searchForward(v)
}
func (ef *EliasFano) Max() uint64 {
return ef.maxOffset
}
func (ef *EliasFano) Min() uint64 {
return ef.Get(0)
}
func (ef *EliasFano) Count() uint64 {
return ef.count + 1
}
// AddedCount is the number of AddOffset calls so far (build-time position).
func (ef *EliasFano) AddedCount() uint64 {
return ef.i
}
func (ef *EliasFano) Iterator() *EliasFanoIter {
it := &EliasFanoIter{
ef: ef,
lowerBits: ef.lowerBits,
upperBits: ef.upperBits,
count: ef.count,
lowerBitsMask: ef.lowerBitsMask,
l: ef.l,
upperStep: uint64(1) << ef.l,
}
it.init()
return it
}
func (ef *EliasFano) ReverseIterator() *EliasFanoIter {
it := &EliasFanoIter{
ef: ef,
lowerBits: ef.lowerBits,
upperBits: ef.upperBits,
count: ef.count,
lowerBitsMask: ef.lowerBitsMask,
l: ef.l,
upperStep: uint64(1) << ef.l,
reverse: true,
}
it.init()
return it
}
type EliasFanoIter struct {
ef *EliasFano
lowerBits []uint64
upperBits []uint64
//constants
count uint64
lowerBitsMask uint64
l uint64
upperStep uint64
reverse bool
//fields of current value
upper uint64
upperIdx uint64
//fields of next value
lowerIdx uint64
upperMask uint64
itemsIterated uint64
}
func (efi *EliasFanoIter) Close() {}
func (efi *EliasFanoIter) HasNext() bool {
return efi.itemsIterated <= efi.count
}
func (efi *EliasFanoIter) Reset(ef *EliasFano, reverse bool) {
efi.ef = ef
efi.lowerBits = ef.lowerBits
efi.upperBits = ef.upperBits
efi.count = ef.count
efi.lowerBitsMask = ef.lowerBitsMask
efi.l = ef.l
efi.upperStep = uint64(1) << ef.l
efi.reverse = reverse
efi.internalReset()
}
func (efi *EliasFanoIter) internalReset() { // no `return parameter` to avoid heap-allocation of `efi` object
efi.upper = 0
efi.upperIdx = 0
efi.lowerIdx = 0
efi.upperMask = 0
efi.itemsIterated = 0
efi.init()
}
func (efi *EliasFanoIter) init() {
if efi.itemsIterated != 0 {
return
}
if efi.reverse {
higherBitsMaxValue := efi.ef.u >> efi.l
lastUpperBitIdx := (efi.ef.count + 1) + higherBitsMaxValue
efi.upperMask = 1 << (lastUpperBitIdx % 64) >> 1 // last bit is always 0 delimiter, so we move >> 1
efi.upperIdx = lastUpperBitIdx / 64
efi.upper = higherBitsMaxValue << efi.l
efi.lowerIdx = efi.count * efi.l
} else {
efi.upperMask = 1
}
}
func (efi *EliasFanoIter) Seek(n uint64) {
//fmt.Printf("b seek2: efi.upperMask(%d)=%d, upperIdx=%d, lowerIdx=%d, itemsIterated=%d\n", n, bits.TrailingZeros64(efi.upperMask), efi.upperIdx, efi.lowerIdx, efi.itemsIterated)
//fmt.Printf("b seek2: efi.upper=%d\n", efi.upper)
efi.internalReset()
var nn uint64
var nextI uint64
var ok bool
if efi.reverse {
nn, nextI, ok = efi.ef.searchReverse(n)
} else {
nn, nextI, ok = efi.ef.searchForward(n)
}
_ = nn
if !ok {
efi.itemsIterated = efi.count + 1
return
}
if nextI == 0 && !efi.reverse {
return
}
if nextI == efi.count && efi.reverse {
return
}
if efi.reverse {
efi.itemsIterated = efi.count - nextI
// fields of current value
v, _, sel, currWords, lower := efi.ef.get(nextI + 1)
efi.upper = v &^ (lower & efi.ef.lowerBitsMask)
efi.upperIdx = currWords
// fields of next value
efi.lowerIdx -= efi.itemsIterated * efi.l
if sel > 0 {
efi.upperMask = 1 << (sel - 1)
} else {
efi.upperMask = 0
}
} else {
efi.itemsIterated = nextI
// fields of current value
v, _, sel, currWords, lower := efi.ef.get(nextI - 1)
efi.upper = v &^ (lower & efi.ef.lowerBitsMask)
efi.upperIdx = currWords
// fields of next value
efi.lowerIdx = nextI * efi.l
efi.upperMask = 1 << (sel + 1)
}
//fmt.Printf("seek2: efi.upperMask(%d)=%d, upperIdx=%d, lowerIdx=%d, itemsIterated=%d\n", n, bits.TrailingZeros64(efi.upperMask), efi.upperIdx, efi.lowerIdx, efi.itemsIterated)
//fmt.Printf("seek2: efi.upper=%d\n", efi.upper)
}
func (efi *EliasFanoIter) moveNext() {
if efi.reverse {
efi.decrement()
} else {
efi.increment()
}
efi.itemsIterated++
}
func (efi *EliasFanoIter) increment() {
if efi.upperMask == 0 {
efi.upperIdx++
efi.upperMask = 1
}
for efi.upperBits[efi.upperIdx]&efi.upperMask == 0 {
efi.upper += efi.upperStep
efi.upperMask <<= 1
if efi.upperMask == 0 {
efi.upperIdx++
efi.upperMask = 1
}
}
efi.upperMask <<= 1
efi.lowerIdx += efi.l
}
func (efi *EliasFanoIter) decrement() {
if efi.upperMask == 0 {
if efi.upperIdx == 0 {
panic("decrement: unexpected efi.upperIdx underflow")
}
efi.upperIdx--
efi.upperMask = 1 << 63
}
for efi.upperBits[efi.upperIdx]&efi.upperMask == 0 {
if efi.upper < efi.upperStep {
panic("decrement: unexpected efi.upper underflow")
}
efi.upper -= efi.upperStep
efi.upperMask >>= 1
if efi.upperMask == 0 {
if efi.upperIdx == 0 {
panic("decrement: unexpected efi.upperIdx underflow")
}
efi.upperIdx--
efi.upperMask = 1 << 63
}
}
// note: there can be an underflow here after the last Next()
// but that is ok since we are protected from ErrEliasFanoIterExhausted
efi.lowerIdx -= efi.l
efi.upperMask >>= 1
}
func (efi *EliasFanoIter) Next() (uint64, error) {
if !efi.HasNext() {
return 0, stream.ErrIteratorExhausted
}
idx64, shift := efi.lowerIdx/64, efi.lowerIdx%64
lower := efi.lowerBits[idx64] >> shift
if shift > 0 {
lower |= efi.lowerBits[idx64+1] << (64 - shift)
}
efi.moveNext()
return efi.upper | (lower & efi.lowerBitsMask), nil
}
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (ef *EliasFano) Write(w io.Writer) error {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], ef.count)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.u)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
b := unsafe.Slice((*byte)(unsafe.Pointer(&ef.data[0])), len(ef.data)*uint64Size)
if _, e := w.Write(b); e != nil {
return e
}
return nil
}
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (ef *EliasFano) AppendBytes(buf []byte) []byte {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], ef.count)
buf = append(buf, numBuf[:]...)
binary.BigEndian.PutUint64(numBuf[:], ef.u)
buf = append(buf, numBuf[:]...)
b := unsafe.Slice((*byte)(unsafe.Pointer(&ef.data[0])), len(ef.data)*uint64Size)
buf = append(buf, b...)
return buf
}
// Read inputs the state of golomb rice encoding from a reader s
func ReadEliasFano(r []byte) (*EliasFano, int) {
ef := &EliasFano{
count: binary.BigEndian.Uint64(r[:8]),
u: binary.BigEndian.Uint64(r[8:16]),
data: unsafe.Slice((*uint64)(unsafe.Pointer(&r[16])), (len(r)-16)/uint64Size),
}
ef.maxOffset = ef.u - 1
ef.deriveFields()
return ef, 16 + 8*len(ef.data)
}
// Reset - like ReadEliasFano, but for existing object
func (ef *EliasFano) Reset(r []byte) *EliasFano { // no `return parameter` to avoid heap-allocation of `ef` object
ef.count = binary.BigEndian.Uint64(r[:8])
ef.u = binary.BigEndian.Uint64(r[8:16])
ef.data = unsafe.Slice((*uint64)(unsafe.Pointer(&r[16])), (len(r)-16)/uint64Size)
ef.maxOffset = ef.u - 1
ef.deriveFields()
return ef
}
func Max(r []byte) uint64 { return binary.BigEndian.Uint64(r[8:16]) - 1 }
func Count(r []byte) uint64 { return binary.BigEndian.Uint64(r[:8]) + 1 }
const uint64Size = 8
func Min(r []byte) uint64 {
count := binary.BigEndian.Uint64(r[:8])
u := binary.BigEndian.Uint64(r[8:16])
p := unsafe.Slice((*uint64)(unsafe.Pointer(&r[16])), (len(r)-16)/uint64Size)
var l uint64
if u/(count+1) == 0 {
l = 0
} else {
l = 63 ^ uint64(bits.LeadingZeros64(u/(count+1))) // pos of first non-zero bit
}
wordsLowerBits := int(((count+1)*l+63)/64 + 1)
wordsUpperBits := int((count + 1 + (u >> l) + 63) / 64)
lowerBits := p[:wordsLowerBits]
upperBits := p[wordsLowerBits : wordsLowerBits+wordsUpperBits]
jump := p[wordsLowerBits+wordsUpperBits:]
lower := lowerBits[0]
mask := uint64(0xffffffff)
j := jump[0] + jump[1]&mask
currWord := j / 64
window := upperBits[currWord] & (uint64(0xffffffffffffffff) << (j % 64))
if bitCount := bits.OnesCount64(window); bitCount <= 0 {
currWord++
window = upperBits[currWord]
}
sel := bitutil.Select64(window, 0)
lowerBitsMask := (uint64(1) << l) - 1
val := (currWord*64+uint64(sel))<<l | (lower & lowerBitsMask)
return val
}
// DoubleEliasFano can be used to encode two monotone sequences
// it is called "double" because the lower bits array contains two sequences interleaved
type DoubleEliasFano struct {
data []uint64
lowerBits []uint64
upperBitsPosition []uint64
upperBitsCumKeys []uint64
jump []uint64
lowerBitsMaskCumKeys uint64
lowerBitsMaskPosition uint64
numBuckets uint64
uCumKeys uint64
uPosition uint64
lPosition uint64
lCumKeys uint64
cumKeysMinDelta uint64
posMinDelta uint64
}
func (ef *DoubleEliasFano) deriveFields() (int, int) {
r := efcommon.DeriveDoubleEFFields(ef.numBuckets, ef.uCumKeys, ef.uPosition, ef.data, ef.jumpSizeWords())
ef.lPosition = r.LPosition
ef.lCumKeys = r.LCumKeys
ef.lowerBitsMaskCumKeys = r.LowerBitsMaskCumKeys
ef.lowerBitsMaskPosition = r.LowerBitsMaskPosition
ef.data = r.Data
ef.lowerBits = r.LowerBits
ef.upperBitsCumKeys = r.UpperBitsCumKeys
ef.upperBitsPosition = r.UpperBitsPosition
ef.jump = r.Jump
return r.WordsCumKeys, r.WordsPosition
}
// Build constructs the double Elias-Fano jump table; it panics if either
// sequence is too sparse for the 32-bit jump offsets (unreachable in practice).
func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
if !ef.build(cumKeys, position) {
panic(jumpOffsetOverflowMsg)
}
}
// build mirrors (*EliasFano).build: it returns false (jump table left partial)
// on a jump offset exceeding 32 bits instead of panicking.
func (ef *DoubleEliasFano) build(cumKeys []uint64, position []uint64) bool {
if len(cumKeys) != len(position) {
panic("len(cumKeys) != len(position)")
}
ef.numBuckets = uint64(len(cumKeys) - 1)
ef.posMinDelta = math.MaxUint64
ef.cumKeysMinDelta = math.MaxUint64
for i := uint64(1); i <= ef.numBuckets; i++ {
if cumKeys[i] < cumKeys[i-1] {
panic("cumKeys[i] <= cumKeys[i-1]")
}
nkeysDelta := cumKeys[i] - cumKeys[i-1]
if nkeysDelta < ef.cumKeysMinDelta {
ef.cumKeysMinDelta = nkeysDelta
}
if position[i] < position[i-1] {
panic("position[i] < position[i-1]")
}
bucketBits := position[i] - position[i-1]
if bucketBits < ef.posMinDelta {
ef.posMinDelta = bucketBits
}
}
//fmt.Printf("cumKeysMinDelta = %d, posMinDelta = %d\n", ef.cumKeysMinDelta, ef.posMinDelta)
ef.uPosition = position[ef.numBuckets] - ef.numBuckets*ef.posMinDelta + 1
ef.uCumKeys = cumKeys[ef.numBuckets] - ef.numBuckets*ef.cumKeysMinDelta + 1 // Largest possible encoding of the cumKeys
wordsCumKeys, wordsPosition := ef.deriveFields()
for i, cumDelta, bitDelta := uint64(0), uint64(0), uint64(0); i <= ef.numBuckets; i, cumDelta, bitDelta = i+1, cumDelta+ef.cumKeysMinDelta, bitDelta+ef.posMinDelta {
if ef.lCumKeys != 0 {
//fmt.Printf("i=%d, set_bits cum for %d = %b\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys)
setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition), (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys)
//fmt.Printf("loweBits %b\n", ef.lowerBits)
}
set(ef.upperBitsCumKeys, ((cumKeys[i]-cumDelta)>>ef.lCumKeys)+i)
//fmt.Printf("i=%d, set cum for %d = %d\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)>>ef.lCumKeys+i)
if ef.lPosition != 0 {
//fmt.Printf("i=%d, set_bits pos for %d = %b\n", i, position[i]-bitDelta, (position[i]-bitDelta)&ef.lowerBitsMaskPosition)
setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition)+ef.lCumKeys, (position[i]-bitDelta)&ef.lowerBitsMaskPosition)
//fmt.Printf("lowerBits %b\n", ef.lowerBits)
}
set(ef.upperBitsPosition, ((position[i]-bitDelta)>>ef.lPosition)+i)
//fmt.Printf("i=%d, set pos for %d = %d\n", i, position[i]-bitDelta, (position[i]-bitDelta)>>ef.lPosition+i)
}
//fmt.Printf("loweBits %b\n", ef.lowerBits)
//fmt.Printf("upperBitsCumKeys %b\n", ef.upperBitsCumKeys)
//fmt.Printf("upperBitsPosition %b\n", ef.upperBitsPosition)
// i iterates over the 64-bit words in the wordCumKeys vector
// c iterates over bits in the wordCumKeys
// lastSuperQ is the largest multiple of 2^14 (4096) which is no larger than c
// c/superQ is the index of the current 4096 block of bits
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
offset := i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
if offset >= (1 << 32) { // must fit the 32-bit jump slot
return false
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2 * (c % superQ) / q
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 1)
shift := 32 * (jumpInsideSuperQ % 2)
mask := uint64(0xffffffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
}
if (c & qMask) == 0 {
offset := i*64 + b - lastSuperQ
if offset >= (1 << 32) { // must fit the 32-bit jump slot
return false
}
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 1)
shift := 32 * (jumpInsideSuperQ % 2)
mask := uint64(0xffffffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
return true
}
// setBits stores a value at bit position start.
// All callers write in monotonic order, so target bits are guaranteed zero
// and we can use |= instead of clear-and-set. The lowerBits slice always
// has +1 padding word, making the unconditional second write safe.
// When shift+width <= 64, value>>(64-shift) == 0, so the write is a no-op.
func setBits(bits []uint64, start uint64, value uint64) {
idx64, shift := start>>6, int(start&63)
_ = bits[idx64+1] // BCE hint: proves both accesses are in-bounds
bits[idx64] |= value << shift
bits[idx64+1] |= value >> (64 - shift)
}
func set(bits []uint64, pos uint64) {
//bits[pos>>6] |= uint64(1) << (pos & 63)
bits[pos/64] |= uint64(1) << (pos % 64)
}
func (ef *DoubleEliasFano) jumpSizeWords() int {
size := ((ef.numBuckets + 1) / superQ) * superQSize * 2 // Whole blocks
if (ef.numBuckets+1)%superQ != 0 {
size += (1 + (((ef.numBuckets+1)%superQ+q-1)/q+3)/2) * 2 // Partial block
}
return int(size)
}
// Data returns binary representation of double Ellias-Fano index that has been built
func (ef *DoubleEliasFano) Data() []uint64 {
return ef.data
}
func (ef *DoubleEliasFano) get2(i uint64) (cumKeys uint64, position uint64,
windowCumKeys uint64, selectCumKeys int, currWordCumKeys uint64, lower uint64, cumDelta uint64) {
posLower := i * (ef.lCumKeys + ef.lPosition)
idx64, shift := posLower/64, posLower%64
lower = ef.lowerBits[idx64] >> shift
if shift > 0 {
lower |= ef.lowerBits[idx64+1] << (64 - shift)
}
//fmt.Printf("i = %d, posLower = %d, lower = %b\n", i, posLower, lower)
jumpSuperQ := (i / superQ) * superQSize * 2
jumpInsideSuperQ := (i % superQ) / q
idx16 := 2*(jumpSuperQ+2) + 2*jumpInsideSuperQ
idx64, shift = idx16/2, 32*(idx16%2)
mask := uint64(0xffffffff) << shift
jumpCumKeys := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift
idx16++
idx64, shift = idx16/2, 32*(idx16%2)
mask = uint64(0xffffffff) << shift
jumpPosition := ef.jump[jumpSuperQ+1] + (ef.jump[idx64]&mask)>>shift
//fmt.Printf("i = %d, jumpCumKeys = %d, jumpPosition = %d\n", i, jumpCumKeys, jumpPosition)
currWordCumKeys = jumpCumKeys / 64
currWordPosition := jumpPosition / 64
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys] & (uint64(0xffffffffffffffff) << (jumpCumKeys % 64))
windowPosition := ef.upperBitsPosition[currWordPosition] & (uint64(0xffffffffffffffff) << (jumpPosition % 64))
deltaCumKeys := int(i & qMask)
deltaPosition := int(i & qMask)
for bitCount := bits.OnesCount64(windowCumKeys); bitCount <= deltaCumKeys; bitCount = bits.OnesCount64(windowCumKeys) {
//fmt.Printf("i = %d, bitCount cum = %d\n", i, bitCount)
currWordCumKeys++
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys]
deltaCumKeys -= bitCount
}
for bitCount := bits.OnesCount64(windowPosition); bitCount <= deltaPosition; bitCount = bits.OnesCount64(windowPosition) {
//fmt.Printf("i = %d, bitCount pos = %d\n", i, bitCount)
currWordPosition++
windowPosition = ef.upperBitsPosition[currWordPosition]
deltaPosition -= bitCount
}
selectCumKeys = bitutil.Select64(windowCumKeys, deltaCumKeys)
//fmt.Printf("i = %d, select cum in %b for %d = %d\n", i, windowCumKeys, deltaCumKeys, selectCumKeys)
cumDelta = i * ef.cumKeysMinDelta
cumKeys = ((currWordCumKeys*64+uint64(selectCumKeys)-i)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta
lower >>= ef.lCumKeys
//fmt.Printf("i = %d, lower = %b\n", i, lower)
selectPosition := bitutil.Select64(windowPosition, deltaPosition)
//fmt.Printf("i = %d, select pos in %b for %d = %d\n", i, windowPosition, deltaPosition, selectPosition)
bitDelta := i * ef.posMinDelta
position = ((currWordPosition*64+uint64(selectPosition)-i)<<ef.lPosition | (lower & ef.lowerBitsMaskPosition)) + bitDelta
return
}
func (ef *DoubleEliasFano) Get2(i uint64) (cumKeys, position uint64) {
cumKeys, position, _, _, _, _, _ = ef.get2(i)
return
}
func (ef *DoubleEliasFano) Get3(i uint64) (cumKeys, cumKeysNext, position uint64) {
var windowCumKeys uint64
var selectCumKeys int
var currWordCumKeys uint64
var lower uint64
var cumDelta uint64
cumKeys, position, windowCumKeys, selectCumKeys, currWordCumKeys, lower, cumDelta = ef.get2(i)
windowCumKeys &= (uint64(0xffffffffffffffff) << selectCumKeys) << 1
for windowCumKeys == 0 {
currWordCumKeys++
windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys]
}
lower >>= ef.lPosition
cumKeysNext = ((currWordCumKeys*64+uint64(bits.TrailingZeros64(windowCumKeys))-i-1)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta + ef.cumKeysMinDelta
return
}
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (ef *DoubleEliasFano) Write(w io.Writer) error {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], ef.numBuckets)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.uCumKeys)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.uPosition)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.cumKeysMinDelta)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
binary.BigEndian.PutUint64(numBuf[:], ef.posMinDelta)
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
b := unsafe.Slice((*byte)(unsafe.Pointer(&ef.data[0])), len(ef.data)*uint64Size)
if _, e := w.Write(b); e != nil {
return e
}
return nil
}
// Read inputs the state of golomb rice encoding from a reader s
func (ef *DoubleEliasFano) Read(r []byte) int {
ef.numBuckets = binary.BigEndian.Uint64(r[:8])
ef.uCumKeys = binary.BigEndian.Uint64(r[8:16])
ef.uPosition = binary.BigEndian.Uint64(r[16:24])
ef.cumKeysMinDelta = binary.BigEndian.Uint64(r[24:32])
ef.posMinDelta = binary.BigEndian.Uint64(r[32:40])
ef.data = unsafe.Slice((*uint64)(unsafe.Pointer(&r[40])), (len(r)-40)/uint64Size)
ef.deriveFields()
return 40 + 8*len(ef.data)
}
package eliasfano32
// This is a wrapper of "plain" EliasFano for optimizing scenarios where the number sequence
// is constrained in a closed range [from, to], so we can store the entire sequence as deltas
// of "from" and save space.
//
// This is specially useful when the starting "from" is a huge number, so the binary representation
// of the Elias Fano sequence can be made smaller.
//
// The baseNum stores the base value which is added to each element when it is accessed. It is
// not meant to be stored together with the serialized data, but derived from some other source,
// like the start txNum of a snapshot file, so it can be globally applied to all sequences in the
// same file, resulting in huge space savings.
type RebasedEliasFano struct {
baseNum uint64
ef EliasFano
}
func (ref *RebasedEliasFano) Get(i uint64) uint64 {
return ref.baseNum + ref.ef.Get(i)
}
func (ref *RebasedEliasFano) Min() uint64 {
return ref.baseNum + ref.ef.Min()
}
func (ref *RebasedEliasFano) Max() uint64 {
return ref.baseNum + ref.ef.Max()
}
func (ref *RebasedEliasFano) Count() uint64 {
return ref.ef.Count()
}
func (ref *RebasedEliasFano) Reset(baseNum uint64, raw []byte) { // no `return parameter` to avoid heap-allocation of `ref` object
ref.baseNum = baseNum
ref.ef.Reset(raw)
}
func (ref *RebasedEliasFano) Seek(v uint64) (uint64, uint64, bool) {
if v < ref.baseNum {
v = ref.baseNum
}
n, pos, found := ref.ef.Seek(v - ref.baseNum)
return ref.baseNum + n, pos, found
}
func (ref *RebasedEliasFano) Has(v uint64) bool {
n, _, ok := ref.Seek(v)
return ok && n == v
}
func (ref *RebasedEliasFano) Iterator() *RebasedIterWrapper {
it := &RebasedIterWrapper{}
it.Reset(ref, false)
return it
}
func (ref *RebasedEliasFano) ReverseIterator() *RebasedIterWrapper {
it := &RebasedIterWrapper{}
it.Reset(ref, true)
return it
}
type RebasedIterWrapper struct {
baseNum uint64
it *EliasFanoIter
reverse bool
}
func (it *RebasedIterWrapper) Reset(ref *RebasedEliasFano, reverse bool) {
it.baseNum = ref.baseNum
if it.it == nil {
it.it = &EliasFanoIter{}
}
it.it.Reset(&ref.ef, reverse)
it.reverse = reverse
}
func (it *RebasedIterWrapper) HasNext() bool {
return it.it.HasNext()
}
func (it *RebasedIterWrapper) Next() (uint64, error) {
n, err := it.it.Next()
return it.baseNum + n, err
}
func (it *RebasedIterWrapper) Seek(v uint64) {
if v < it.baseNum {
it.it.Seek(0)
if it.reverse {
// force exhaustion as we are seeking before the first elem
it.it.Next()
}
return
}
it.it.Seek(v - it.baseNum)
}
func (it *RebasedIterWrapper) Close() {
it.it.Close()
}
// Copyright 2026 The Erigon Authors
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package patricia
import (
"sort"
)
// Match is a single pattern occurrence: its associated value and the [Start, End)
// byte range it covers in the searched data.
type Match struct {
Val any
Start int
End int
}
type Matches []Match
// AhoCorasick is a byte-level multi-pattern automaton. Build it once from a
// pattern dictionary, then share it read-only across any number of ACMatcher
// instances (one per goroutine).
type AhoCorasick struct {
// build-time trie
children []map[byte]int32
depth []int32
val []any
hasVal []bool
// compiled automaton
rootNext [256]int32 // dense transitions from root (-1 = none)
edgeBytes [][]byte // per-node sorted edge labels
edgeTo [][]int32 // per-node child indices, parallel to edgeBytes
fail []int32
matchLen []int32 // longest pattern ending at this node (0 = none)
matchVal []any
built bool
}
func NewAhoCorasick() *AhoCorasick {
ac := &AhoCorasick{}
ac.addNode(0) // root
return ac
}
func (ac *AhoCorasick) addNode(depth int32) int32 {
ac.children = append(ac.children, nil)
ac.depth = append(ac.depth, depth)
ac.val = append(ac.val, nil)
ac.hasVal = append(ac.hasVal, false)
return int32(len(ac.children) - 1)
}
// Insert adds a pattern with its value. Must be called before Build.
func (ac *AhoCorasick) Insert(pattern []byte, v any) {
if ac.built {
panic("AhoCorasick: Insert after Build")
}
if len(pattern) == 0 {
return
}
cur := int32(0)
for _, b := range pattern {
m := ac.children[cur]
if m == nil {
m = make(map[byte]int32, 1)
ac.children[cur] = m
}
nxt, ok := m[b]
if !ok {
nxt = ac.addNode(ac.depth[cur] + 1)
m = ac.children[cur] // addNode may have grown the slice backing array
m[b] = nxt
}
cur = nxt
}
ac.val[cur] = v
ac.hasVal[cur] = true
}
// Build compiles fail links and per-node longest-suffix-match info (BFS).
func (ac *AhoCorasick) Build() {
if ac.built {
return
}
n := len(ac.children)
ac.fail = make([]int32, n)
ac.matchLen = make([]int32, n)
ac.matchVal = make([]any, n)
ac.edgeBytes = make([][]byte, n)
ac.edgeTo = make([][]int32, n)
for i := range ac.rootNext {
ac.rootNext[i] = -1
}
// sorted edge arrays
for node, m := range ac.children {
if len(m) == 0 {
continue
}
bs := make([]byte, 0, len(m))
for b := range m {
bs = append(bs, b)
}
sort.Slice(bs, func(i, j int) bool { return bs[i] < bs[j] })
tos := make([]int32, len(bs))
for i, b := range bs {
tos[i] = m[b]
}
ac.edgeBytes[node] = bs
ac.edgeTo[node] = tos
}
// BFS fail links
queue := make([]int32, 0, n)
for i, b := range ac.edgeBytes[0] {
child := ac.edgeTo[0][i]
ac.rootNext[b] = child
ac.fail[child] = 0
queue = append(queue, child)
}
for qi := 0; qi < len(queue); qi++ {
node := queue[qi]
// longest pattern ending at this state: own pattern wins (it is the
// full path, longer than any proper suffix from the fail chain)
if ac.hasVal[node] {
ac.matchLen[node] = ac.depth[node]
ac.matchVal[node] = ac.val[node]
} else {
f := ac.fail[node]
ac.matchLen[node] = ac.matchLen[f]
ac.matchVal[node] = ac.matchVal[f]
}
for i, b := range ac.edgeBytes[node] {
child := ac.edgeTo[node][i]
f := ac.fail[node]
for {
if nxt := ac.next(f, b); nxt >= 0 {
ac.fail[child] = nxt
break
}
if f == 0 {
ac.fail[child] = 0
break
}
f = ac.fail[f]
}
queue = append(queue, child)
}
}
ac.children = nil // free build-time maps
ac.built = true
}
// next returns the child of node on byte b, or -1.
func (ac *AhoCorasick) next(node int32, b byte) int32 {
if node == 0 {
return ac.rootNext[b]
}
bs := ac.edgeBytes[node]
lo, hi := 0, len(bs)
for lo < hi {
mid := (lo + hi) >> 1
if bs[mid] < b {
lo = mid + 1
} else {
hi = mid
}
}
if lo < len(bs) && bs[lo] == b {
return ac.edgeTo[node][lo]
}
return -1
}
// ACMatcher is a per-goroutine matcher over a shared AhoCorasick automaton.
// It caches per-position automaton states of the previous word: the state
// after j bytes depends only on those bytes, so for a word sharing a prefix
// with its predecessor (sorted streams) the scan resumes at the first
// differing byte.
type ACMatcher struct {
ac *AhoCorasick
matches Matches
prev []byte
states []int32 // states[j] = automaton state after consuming data[:j+1]
}
func NewACMatcher(ac *AhoCorasick) *ACMatcher {
ac.Build()
return &ACMatcher{ac: ac}
}
// FindLongestMatches returns the maximal pattern matches in data: sorted by
// Start, End strictly increasing, no match contained in another.
func (m *ACMatcher) FindLongestMatches(data []byte) []Match {
ac := m.ac
n := len(data)
k := 0
maxK := min(len(m.prev), n)
for k < maxK && m.prev[k] == data[k] {
k++
}
if cap(m.states) < n {
states := make([]int32, n+64)
copy(states, m.states[:len(m.states)])
m.states = states[:n]
} else {
m.states = m.states[:n]
}
cur := int32(0)
if k > 0 {
cur = m.states[k-1]
}
for j := k; j < n; j++ {
b := data[j]
for {
if nxt := ac.next(cur, b); nxt >= 0 {
cur = nxt
break
}
if cur == 0 {
break
}
cur = ac.fail[cur]
}
m.states[j] = cur
}
m.prev = append(m.prev[:0], data...)
out := m.matches[:0]
for j := 0; j < n; j++ {
st := m.states[j]
ml := ac.matchLen[st]
if ml == 0 {
continue
}
start := j + 1 - int(ml)
// drop previous matches contained in this one
for len(out) > 0 && out[len(out)-1].Start >= start {
out = out[:len(out)-1]
}
out = append(out, Match{Start: start, End: j + 1, Val: ac.matchVal[st]})
}
m.matches = out
return out
}
// Copyright 2015 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
//nolint:goconst
package abi
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"math/big"
"github.com/erigontech/erigon/common"
"github.com/erigontech/erigon/common/crypto"
)
// The ABI holds information about a contract's context and available
// invocable methods. It will allow you to type check function calls and
// packs data accordingly.
type ABI struct {
Constructor Method
Methods map[string]Method
Events map[string]Event
Errors map[string]Error
// Additional "special" functions introduced in solidity v0.6.0.
// It's separated from the original default fallback. Each contract
// can only define one fallback and receive function.
Fallback Method // Note it's also used to represent legacy fallback before v0.6.0
Receive Method
}
// JSON returns a parsed ABI interface and error if it failed.
func JSON(reader io.Reader) (ABI, error) {
dec := json.NewDecoder(reader)
var abi ABI
if err := dec.Decode(&abi); err != nil {
return ABI{}, err
}
return abi, nil
}
// Pack the given method name to conform the ABI. Method call's data
// will consist of method_id, args0, arg1, ... argN. Method id consists
// of 4 bytes and arguments are all 32 bytes.
// Method ids are created from the first 4 bytes of the hash of the
// methods string signature. (signature = baz(uint32,string32))
func (abi ABI) Pack(name string, args ...any) ([]byte, error) {
// Fetch the ABI of the requested method
if name == "" {
// constructor
arguments, err := abi.Constructor.Inputs.Pack(args...)
if err != nil {
return nil, err
}
return arguments, nil
}
method, exist := abi.Methods[name]
if !exist {
return nil, fmt.Errorf("method '%s' not found", name)
}
arguments, err := method.Inputs.Pack(args...)
if err != nil {
return nil, err
}
// Pack up the method ID too if not a constructor and return
return append(method.ID, arguments...), nil
}
func (abi ABI) getArguments(name string, data []byte) (Arguments, error) {
// since there can't be naming collisions with contracts and events,
// we need to decide whether we're calling a method or an event
var args Arguments
if method, ok := abi.Methods[name]; ok {
if len(data)%32 != 0 {
return nil, fmt.Errorf("abi: improperly formatted output: %s - Bytes: [%+v]", string(data), data)
}
args = method.Outputs
}
if event, ok := abi.Events[name]; ok {
args = event.Inputs
}
if args == nil {
return nil, errors.New("abi: could not locate named method or event")
}
return args, nil
}
// Unpack unpacks the output according to the abi specification.
func (abi ABI) Unpack(name string, data []byte) ([]any, error) {
args, err := abi.getArguments(name, data)
if err != nil {
return nil, err
}
return args.Unpack(data)
}
// UnpackIntoInterface unpacks the output in v according to the abi specification.
// It performs an additional copy. Please only use, if you want to unpack into a
// structure that does not strictly conform to the abi structure (e.g. has additional arguments)
func (abi ABI) UnpackIntoInterface(v any, name string, data []byte) error {
args, err := abi.getArguments(name, data)
if err != nil {
return err
}
unpacked, err := args.Unpack(data)
if err != nil {
return err
}
return args.Copy(v, unpacked)
}
// UnpackIntoMap unpacks a log into the provided map[string]interface{}.
func (abi ABI) UnpackIntoMap(v map[string]any, name string, data []byte) (err error) {
args, err := abi.getArguments(name, data)
if err != nil {
return err
}
return args.UnpackIntoMap(v, data)
}
// UnmarshalJSON implements json.Unmarshaler interface.
func (abi *ABI) UnmarshalJSON(data []byte) error {
var fields []struct {
Type string
Name string
Inputs []Argument
Outputs []Argument
// Status indicator which can be: "pure", "view",
// "nonpayable" or "payable".
StateMutability string
// Deprecated Status indicators, but removed in v0.6.0.
Constant bool // True if function is either pure or view
Payable bool // True if function is payable
// Event relevant indicator represents the event is
// declared as anonymous.
Anonymous bool
}
if err := json.Unmarshal(data, &fields); err != nil {
return err
}
abi.Methods = make(map[string]Method)
abi.Events = make(map[string]Event)
abi.Errors = make(map[string]Error)
for _, field := range fields {
switch field.Type {
case "constructor":
abi.Constructor = NewMethod("", "", Constructor, field.StateMutability, field.Constant, field.Payable, field.Inputs, nil)
case "function":
name := abi.overloadedMethodName(field.Name)
abi.Methods[name] = NewMethod(name, field.Name, Function, field.StateMutability, field.Constant, field.Payable, field.Inputs, field.Outputs)
case "fallback":
// New introduced function type in v0.6.0, check more detail
// here https://solidity.readthedocs.io/en/v0.6.0/contracts.html#fallback-function
if abi.HasFallback() {
return errors.New("only single fallback is allowed")
}
abi.Fallback = NewMethod("", "", Fallback, field.StateMutability, field.Constant, field.Payable, nil, nil)
case "receive":
// New introduced function type in v0.6.0, check more detail
// here https://solidity.readthedocs.io/en/v0.6.0/contracts.html#fallback-function
if abi.HasReceive() {
return errors.New("only single receive is allowed")
}
if field.StateMutability != "payable" {
return errors.New("the statemutability of receive can only be payable")
}
abi.Receive = NewMethod("", "", Receive, field.StateMutability, field.Constant, field.Payable, nil, nil)
case "event":
name := abi.overloadedEventName(field.Name)
abi.Events[name] = NewEvent(name, field.Name, field.Anonymous, field.Inputs)
case "error":
abi.Errors[field.Name] = NewError(field.Name, field.Inputs)
default:
return fmt.Errorf("abi: could not recognize type %v of field %v", field.Type, field.Name)
}
}
return nil
}
// overloadedMethodName returns the next available name for a given function.
// Needed since solidity allows for function overload.
//
// e.g. if the abi contains Methods send, send1
// overloadedMethodName would return send2 for input send.
func (abi *ABI) overloadedMethodName(rawName string) string {
name := rawName
_, ok := abi.Methods[name]
for idx := 0; ok; idx++ {
name = fmt.Sprintf("%s%d", rawName, idx)
_, ok = abi.Methods[name]
}
return name
}
// overloadedEventName returns the next available name for a given event.
// Needed since solidity allows for event overload.
//
// e.g. if the abi contains events received, received1
// overloadedEventName would return received2 for input received.
func (abi *ABI) overloadedEventName(rawName string) string {
name := rawName
_, ok := abi.Events[name]
for idx := 0; ok; idx++ {
name = fmt.Sprintf("%s%d", rawName, idx)
_, ok = abi.Events[name]
}
return name
}
// MethodById looks up a method by the 4-byte id.
// It returns an error if the id is too short or no method is found.
func (abi *ABI) MethodById(sigdata []byte) (*Method, error) {
if len(sigdata) < 4 {
return nil, fmt.Errorf("data too short (%d bytes) for abi method lookup", len(sigdata))
}
for _, method := range abi.Methods {
if bytes.Equal(method.ID, sigdata[:4]) {
return &method, nil
}
}
return nil, fmt.Errorf("no method with id: %#x", sigdata[:4])
}
// EventByID looks an event up by its topic hash in the ABI.
// It returns an error if no event is found.
func (abi *ABI) EventByID(topic common.Hash) (*Event, error) {
for _, event := range abi.Events {
if bytes.Equal(event.ID[:], topic[:]) {
return &event, nil
}
}
return nil, fmt.Errorf("no event with id: %#x", topic.Hex())
}
// HasFallback returns an indicator whether a fallback function is included.
func (abi *ABI) HasFallback() bool {
return abi.Fallback.Type == Fallback
}
// HasReceive returns an indicator whether a receive function is included.
func (abi *ABI) HasReceive() bool {
return abi.Receive.Type == Receive
}
// revertSelector is a special function selector for revert reason unpacking.
var revertSelector = crypto.Keccak256([]byte("Error(string)"))[:4]
// panicSelector is a special function selector for panic reason unpacking.
var panicSelector = crypto.Keccak256([]byte("Panic(uint256)"))[:4]
// panicReasons map is for readable panic codes
// see this linkage for the details
// https://docs.soliditylang.org/en/v0.8.21/control-structures.html#panic-via-assert-and-error-via-require
var panicReasons = map[uint64]string{
0x00: "generic panic",
0x01: "assert(false)",
0x11: "arithmetic underflow or overflow",
0x12: "division or modulo by zero",
0x21: "enum overflow",
0x22: "invalid encoded storage byte array accessed",
0x31: "out-of-bounds array access; popping on an empty array",
0x32: "out-of-bounds access of an array or bytesN",
0x41: "out of memory",
0x51: "uninitialized function",
}
// UnpackRevert resolves the abi-encoded revert reason. According to the solidity
// spec https://solidity.readthedocs.io/en/latest/control-structures.html#revert,
// the provided revert reason is abi-encoded as if it were a call to function
// `Error(string)` or `Panic(uint256)`. So it's a special tool for it.
func UnpackRevert(data []byte) (string, error) {
if len(data) < 4 {
return "", errors.New("invalid data for unpacking")
}
switch {
case bytes.Equal(data[:4], revertSelector):
typ, err := NewType("string", "", nil)
if err != nil {
return "", err
}
unpacked, err := (Arguments{{Type: typ}}).Unpack(data[4:])
if err != nil {
return "", err
}
return unpacked[0].(string), nil
case bytes.Equal(data[:4], panicSelector):
typ, err := NewType("uint256", "", nil)
if err != nil {
return "", err
}
unpacked, err := (Arguments{{Type: typ}}).Unpack(data[4:])
if err != nil {
return "", err
}
pCode := unpacked[0].(*big.Int)
if pCode.IsUint64() {
if reason, ok := panicReasons[pCode.Uint64()]; ok {
return reason, nil
}
}
return fmt.Sprintf("unknown panic code: %#x", pCode), nil
default:
return "", errors.New("invalid data for unpacking")
}
}
// Copyright 2015 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"encoding/json"
"errors"
"fmt"
"reflect"
"strings"
)
// Argument holds the name of the argument and the corresponding type.
// Types are used when packing and testing arguments.
type Argument struct {
Name string
Type Type
Indexed bool // indexed is only used by events
}
type Arguments []Argument
type ArgumentMarshaling struct {
Name string
Type string
InternalType string
Components []ArgumentMarshaling
Indexed bool
}
// UnmarshalJSON implements json.Unmarshaler interface.
func (argument *Argument) UnmarshalJSON(data []byte) error {
var arg ArgumentMarshaling
err := json.Unmarshal(data, &arg)
if err != nil {
return fmt.Errorf("argument json err: %w", err)
}
argument.Type, err = NewType(arg.Type, arg.InternalType, arg.Components)
if err != nil {
return err
}
argument.Name = arg.Name
argument.Indexed = arg.Indexed
return nil
}
// NonIndexed returns the arguments with indexed arguments filtered out.
func (arguments Arguments) NonIndexed() Arguments {
var ret []Argument
for _, arg := range arguments {
if !arg.Indexed {
ret = append(ret, arg)
}
}
return ret
}
// isTuple returns true for non-atomic constructs, like (uint,uint) or uint[].
func (arguments Arguments) isTuple() bool {
return len(arguments) > 1
}
// Unpack performs the operation hexdata -> Go format.
func (arguments Arguments) Unpack(data []byte) ([]any, error) {
if len(data) == 0 {
if len(arguments) != 0 {
return nil, errors.New("abi: attempting to unmarshall an empty string while arguments are expected")
}
// Nothing to unmarshal, return default variables
nonIndexedArgs := arguments.NonIndexed()
defaultVars := make([]any, len(nonIndexedArgs))
for index, arg := range nonIndexedArgs {
defaultVars[index] = reflect.New(arg.Type.GetType())
}
return defaultVars, nil
}
return arguments.UnpackValues(data)
}
// UnpackIntoMap performs the operation hexdata -> mapping of argument name to argument value.
func (arguments Arguments) UnpackIntoMap(v map[string]any, data []byte) error {
// Make sure map is not nil
if v == nil {
return errors.New("abi: cannot unpack into a nil map")
}
if len(data) == 0 {
if len(arguments) != 0 {
return errors.New("abi: attempting to unmarshall an empty string while arguments are expected")
}
return nil // Nothing to unmarshal, return
}
marshalledValues, err := arguments.UnpackValues(data)
if err != nil {
return err
}
for i, arg := range arguments.NonIndexed() {
v[arg.Name] = marshalledValues[i]
}
return nil
}
// Copy performs the operation go format -> provided struct.
func (arguments Arguments) Copy(v any, values []any) error {
// make sure the passed value is arguments pointer
if reflect.Pointer != reflect.ValueOf(v).Kind() {
return fmt.Errorf("abi: Unpack(non-pointer %T)", v)
}
if len(values) == 0 {
if len(arguments) != 0 {
return fmt.Errorf("abi: attempting to copy no values while %d arguments are expected", len(arguments))
}
return nil // Nothing to copy, return
}
if arguments.isTuple() {
return arguments.copyTuple(v, values)
}
return arguments.copyAtomic(v, values[0])
}
// unpackAtomic unpacks ( hexdata -> go ) a single value
func (arguments Arguments) copyAtomic(v any, marshalledValues any) error {
dst := reflect.ValueOf(v).Elem()
src := reflect.ValueOf(marshalledValues)
if dst.Kind() == reflect.Struct && src.Kind() != reflect.Struct {
return set(dst.Field(0), src)
}
return set(dst, src)
}
// copyTuple copies a batch of values from marshalledValues to v.
func (arguments Arguments) copyTuple(v any, marshalledValues []any) error {
value := reflect.ValueOf(v).Elem()
nonIndexedArgs := arguments.NonIndexed()
switch value.Kind() {
case reflect.Struct:
argNames := make([]string, len(nonIndexedArgs))
for i, arg := range nonIndexedArgs {
argNames[i] = arg.Name
}
var err error
abi2struct, err := mapArgNamesToStructFields(argNames, value)
if err != nil {
return err
}
for i, arg := range nonIndexedArgs {
field := value.FieldByName(abi2struct[arg.Name])
if !field.IsValid() {
return fmt.Errorf("abi: field %s can't be found in the given value", arg.Name)
}
if err := set(field, reflect.ValueOf(marshalledValues[i])); err != nil {
return err
}
}
case reflect.Slice, reflect.Array:
if value.Len() < len(marshalledValues) {
return fmt.Errorf("abi: insufficient number of arguments for unpack, want %d, got %d", len(marshalledValues), value.Len())
}
for i := range nonIndexedArgs {
if err := set(value.Index(i), reflect.ValueOf(marshalledValues[i])); err != nil {
return err
}
}
default:
return fmt.Errorf("abi:[2] cannot unmarshal tuple in to %v", value.Type())
}
return nil
}
// UnpackValues can be used to unpack ABI-encoded hexdata according to the ABI-specification,
// without supplying a struct to unpack into. Instead, this method returns a list containing the
// values. An atomic argument will be a list with one element.
func (arguments Arguments) UnpackValues(data []byte) ([]any, error) {
nonIndexedArgs := arguments.NonIndexed()
retval := make([]any, 0, len(nonIndexedArgs))
virtualArgs := 0
for index, arg := range nonIndexedArgs {
marshalledValue, err := toGoType((index+virtualArgs)*32, arg.Type, data)
if arg.Type.T == ArrayTy && !isDynamicType(arg.Type) {
// If we have a static array, like [3]uint256, these are coded as
// just like uint256,uint256,uint256.
// This means that we need to add two 'virtual' arguments when
// we count the index from now on.
//
// Array values nested multiple levels deep are also encoded inline:
// [2][3]uint256: uint256,uint256,uint256,uint256,uint256,uint256
//
// Calculate the full array size to get the correct offset for the next argument.
// Decrement it by 1, as the normal index increment is still applied.
virtualArgs += getTypeSize(arg.Type)/32 - 1
} else if arg.Type.T == TupleTy && !isDynamicType(arg.Type) {
// If we have a static tuple, like (uint256, bool, uint256), these are
// coded as just like uint256,bool,uint256
virtualArgs += getTypeSize(arg.Type)/32 - 1
}
if err != nil {
return nil, err
}
retval = append(retval, marshalledValue)
}
return retval, nil
}
// PackValues performs the operation Go format -> Hexdata.
// It is the semantic opposite of UnpackValues.
func (arguments Arguments) PackValues(args []any) ([]byte, error) {
return arguments.Pack(args...)
}
// Pack performs the operation Go format -> Hexdata.
func (arguments Arguments) Pack(args ...any) ([]byte, error) {
// Make sure arguments match up and pack them
abiArgs := arguments
if len(args) != len(abiArgs) {
return nil, fmt.Errorf("argument count mismatch: got %d for %d", len(args), len(abiArgs))
}
// variable input is the output appended at the end of packed
// output. This is used for strings and bytes types input.
var variableInput []byte
// input offset is the bytes offset for packed output
inputOffset := 0
for _, abiArg := range abiArgs {
inputOffset += getTypeSize(abiArg.Type)
}
var ret []byte
for i, a := range args {
input := abiArgs[i]
// pack the input
packed, err := input.Type.pack(reflect.ValueOf(a))
if err != nil {
return nil, err
}
// check for dynamic types
if isDynamicType(input.Type) {
// set the offset
ret = append(ret, packNum(reflect.ValueOf(inputOffset))...)
// calculate next offset
inputOffset += len(packed)
// append to variable input
variableInput = append(variableInput, packed...)
} else {
// append the packed value to the input
ret = append(ret, packed...)
}
}
// append the variable input at the end of the packed input
ret = append(ret, variableInput...)
return ret, nil
}
// ToCamelCase converts an under-score string to a camel-case string
func ToCamelCase(input string) string {
parts := strings.Split(input, "_")
for i, s := range parts {
if len(s) > 0 {
parts[i] = strings.ToUpper(s[:1]) + s[1:]
}
}
return strings.Join(parts, "")
}
// Copyright 2016 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"bytes"
"errors"
"fmt"
"reflect"
"strings"
"github.com/erigontech/erigon/common"
"github.com/erigontech/erigon/common/crypto"
)
type Error struct {
Name string
Inputs Arguments
str string
// Sig contains the string signature according to the ABI spec.
// e.g. event foo(uint32 a, int b) = "foo(uint32,int256)"
// Please note that "int" is substitute for its canonical representation "int256"
Sig string
// ID returns the canonical representation of the event's signature used by the
// abi definition to identify event names and types.
ID common.Hash
}
func NewError(name string, inputs Arguments) Error {
names := make([]string, len(inputs))
types := make([]string, len(inputs))
for i, input := range inputs {
if input.Name == "" {
inputs[i] = Argument{
Name: fmt.Sprintf("arg%d", i),
Indexed: input.Indexed,
Type: input.Type,
}
} else {
inputs[i] = input
}
// string representation
names[i] = fmt.Sprintf("%v %v", input.Type, inputs[i].Name)
if input.Indexed {
names[i] = fmt.Sprintf("%v indexed %v", input.Type, inputs[i].Name)
}
// sig representation
types[i] = input.Type.String()
}
str := fmt.Sprintf("error %v(%v)", name, strings.Join(names, ", "))
sig := fmt.Sprintf("%v(%v)", name, strings.Join(types, ","))
id := common.BytesToHash(crypto.Keccak256([]byte(sig)))
return Error{
Name: name,
Inputs: inputs,
str: str,
Sig: sig,
ID: id,
}
}
func (e *Error) String() string {
return e.str
}
func (e *Error) Unpack(data []byte) (any, error) {
if len(data) < 4 {
return "", errors.New("invalid data for unpacking")
}
if !bytes.Equal(data[:4], e.ID[:4]) {
return "", errors.New("invalid data for unpacking")
}
return e.Inputs.Unpack(data[4:])
}
var (
errBadBool = errors.New("abi: improperly encoded boolean value")
)
// formatSliceString formats the reflection kind with the given slice size
// and returns a formatted string representation.
func formatSliceString(kind reflect.Kind, sliceSize int) string {
if sliceSize == -1 {
return fmt.Sprintf("[]%v", kind)
}
return fmt.Sprintf("[%d]%v", sliceSize, kind)
}
// sliceTypeCheck checks that the given slice can by assigned to the reflection
// type in t.
func sliceTypeCheck(t Type, val reflect.Value) error {
if val.Kind() != reflect.Slice && val.Kind() != reflect.Array {
return typeErr(formatSliceString(t.GetType().Kind(), t.Size), val.Type())
}
if t.T == ArrayTy && val.Len() != t.Size {
return typeErr(formatSliceString(t.Elem.GetType().Kind(), t.Size), formatSliceString(val.Type().Elem().Kind(), val.Len()))
}
if t.Elem.T == SliceTy || t.Elem.T == ArrayTy {
if val.Len() > 0 {
return sliceTypeCheck(*t.Elem, val.Index(0))
}
}
if val.Type().Elem().Kind() != t.Elem.GetType().Kind() {
return typeErr(formatSliceString(t.Elem.GetType().Kind(), t.Size), val.Type())
}
return nil
}
// typeCheck checks that the given reflection value can be assigned to the reflection
// type in t.
func typeCheck(t Type, value reflect.Value) error {
if t.T == SliceTy || t.T == ArrayTy {
return sliceTypeCheck(t, value)
}
// Check base type validity. Element types will be checked later on.
if t.GetType().Kind() != value.Kind() {
return typeErr(t.GetType().Kind(), value.Kind())
} else if t.T == FixedBytesTy && t.Size != value.Len() {
return typeErr(t.GetType(), value.Type())
} else {
return nil
}
}
// typeErr returns a formatted type casting error.
func typeErr(expected, got any) error {
return fmt.Errorf("abi: cannot use %v as type %v as argument", got, expected)
}
// Copyright 2016 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"fmt"
"strings"
"github.com/erigontech/erigon/common"
"github.com/erigontech/erigon/common/crypto"
)
// Event is an event potentially triggered by the EVM's LOG mechanism. The Event
// holds type information (inputs) about the yielded output. Anonymous events
// don't get the signature canonical representation as the first LOG topic.
type Event struct {
// Name is the event name used for internal representation. It's derived from
// the raw name and a suffix will be added in the case of an event overload.
//
// e.g.
// These are two events that have the same name:
// * foo(int,int)
// * foo(uint,uint)
// The event name of the first one wll be resolved as foo while the second one
// will be resolved as foo0.
Name string
// RawName is the raw event name parsed from ABI.
RawName string
Anonymous bool
Inputs Arguments
str string
// Sig contains the string signature according to the ABI spec.
// e.g. event foo(uint32 a, int b) = "foo(uint32,int256)"
// Please note that "int" is substitute for its canonical representation "int256"
Sig string
// ID returns the canonical representation of the event's signature used by the
// abi definition to identify event names and types.
ID common.Hash
}
// NewEvent creates a new Event.
// It sanitizes the input arguments to remove unnamed arguments.
// It also precomputes the id, signature and string representation
// of the event.
func NewEvent(name, rawName string, anonymous bool, inputs Arguments) Event {
// sanitize inputs to remove inputs without names
// and precompute string and sig representation.
names := make([]string, len(inputs))
types := make([]string, len(inputs))
for i, input := range inputs {
if input.Name == "" {
inputs[i] = Argument{
Name: fmt.Sprintf("arg%d", i),
Indexed: input.Indexed,
Type: input.Type,
}
} else {
inputs[i] = input
}
// string representation
names[i] = fmt.Sprintf("%v %v", input.Type, inputs[i].Name)
if input.Indexed {
names[i] = fmt.Sprintf("%v indexed %v", input.Type, inputs[i].Name)
}
// sig representation
types[i] = input.Type.String()
}
str := fmt.Sprintf("event %v(%v)", rawName, strings.Join(names, ", "))
sig := fmt.Sprintf("%v(%v)", rawName, strings.Join(types, ","))
id := common.BytesToHash(crypto.Keccak256([]byte(sig)))
return Event{
Name: name,
RawName: rawName,
Anonymous: anonymous,
Inputs: inputs,
str: str,
Sig: sig,
ID: id,
}
}
func (e Event) String() string {
return e.str
}
// Copyright 2015 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"fmt"
"strings"
"github.com/erigontech/erigon/common/crypto"
)
// FunctionType represents different types of functions a contract might have.
type FunctionType int
const (
// Constructor represents the constructor of the contract.
// The constructor function is called while deploying a contract.
Constructor FunctionType = iota
// Fallback represents the fallback function.
// This function is executed if no other function matches the given function
// signature and no receive function is specified.
Fallback
// Receive represents the receive function.
// This function is executed on plain Ether transfers.
Receive
// Function represents a normal function.
Function
)
// Method represents a callable given a `Name` and whether the method is a constant.
// If the method is `Const` no transaction needs to be created for this
// particular Method call. It can easily be simulated using a local VM.
// For example a `Balance()` method only needs to retrieve something
// from the storage and therefore requires no Txn to be sent to the
// network. A method such as `Transact` does require a Txn and thus will
// be flagged `false`.
// Input specifies the required input parameters for this gives method.
type Method struct {
// Name is the method name used for internal representation. It's derived from
// the raw name and a suffix will be added in the case of a function overload.
//
// e.g.
// These are two functions that have the same name:
// * foo(int,int)
// * foo(uint,uint)
// The method name of the first one will be resolved as foo while the second one
// will be resolved as foo0.
Name string
RawName string // RawName is the raw method name parsed from ABI
// Type indicates whether the method is a
// special fallback introduced in solidity v0.6.0
Type FunctionType
// StateMutability indicates the mutability state of method,
// the default value is nonpayable. It can be empty if the abi
// is generated by legacy compiler.
StateMutability string
// Legacy indicators generated by compiler before v0.6.0
Constant bool
Payable bool
Inputs Arguments
Outputs Arguments
str string
// Sig returns the methods string signature according to the ABI spec.
// e.g. function foo(uint32 a, int b) = "foo(uint32,int256)"
// Please note that "int" is substitute for its canonical representation "int256"
Sig string
// ID returns the canonical representation of the method's signature used by the
// abi definition to identify method names and types.
ID []byte
}
// NewMethod creates a new Method.
// A method should always be created using NewMethod.
// It also precomputes the sig representation and the string representation
// of the method.
func NewMethod(name string, rawName string, funType FunctionType, mutability string, isConst, isPayable bool, inputs Arguments, outputs Arguments) Method {
var (
types = make([]string, len(inputs))
inputNames = make([]string, len(inputs))
outputNames = make([]string, len(outputs))
)
for i, input := range inputs {
inputNames[i] = fmt.Sprintf("%v %v", input.Type, input.Name)
types[i] = input.Type.String()
}
for i, output := range outputs {
outputNames[i] = output.Type.String()
if len(output.Name) > 0 {
outputNames[i] += fmt.Sprintf(" %v", output.Name)
}
}
// calculate the signature and method id. Note only function
// has meaningful signature and id.
var (
sig string
id []byte
)
if funType == Function {
sig = fmt.Sprintf("%v(%v)", rawName, strings.Join(types, ","))
id = crypto.Keccak256([]byte(sig))[:4]
}
// Extract meaningful state mutability of solidity method.
// If it's default value, never print it.
state := mutability
if state == "nonpayable" {
state = ""
}
if state != "" {
state = state + " "
}
identity := fmt.Sprintf("function %v", rawName)
if funType == Fallback {
identity = "fallback"
} else if funType == Receive {
identity = "receive"
} else if funType == Constructor {
identity = "constructor"
}
str := fmt.Sprintf("%v(%v) %sreturns(%v)", identity, strings.Join(inputNames, ", "), state, strings.Join(outputNames, ", "))
return Method{
Name: name,
RawName: rawName,
Type: funType,
StateMutability: mutability,
Constant: isConst,
Payable: isPayable,
Inputs: inputs,
Outputs: outputs,
str: str,
Sig: sig,
ID: id,
}
}
func (method Method) String() string {
return method.str
}
// IsConstant returns the indicator whether the method is read-only.
func (method Method) IsConstant() bool {
return method.StateMutability == "view" || method.StateMutability == "pure" || method.Constant
}
// IsPayable returns the indicator whether the method can process
// plain ether transfers.
func (method Method) IsPayable() bool {
return method.StateMutability == "payable" || method.Payable
}
// Copyright 2016 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"errors"
"fmt"
"math/big"
"reflect"
"github.com/erigontech/erigon/common"
"github.com/erigontech/erigon/common/math"
)
// packBytesSlice packs the given bytes as [L, V] as the canonical representation
// bytes slice.
func packBytesSlice(bytes []byte, l int) []byte {
packedLen := packNum(reflect.ValueOf(l))
return append(packedLen, common.RightPadBytes(bytes, (l+31)/32*32)...)
}
// packElement packs the given reflect value according to the abi specification in
// t.
func packElement(t Type, reflectValue reflect.Value) ([]byte, error) {
switch t.T {
case IntTy, UintTy:
return packNum(reflectValue), nil
case StringTy:
return packBytesSlice([]byte(reflectValue.String()), reflectValue.Len()), nil
case AddressTy:
if reflectValue.Kind() == reflect.Array {
reflectValue = mustArrayToByteSlice(reflectValue)
}
return common.LeftPadBytes(reflectValue.Bytes(), 32), nil
case BoolTy:
if reflectValue.Bool() {
return math.PaddedBigBytes(common.Big1, 32), nil
}
return math.PaddedBigBytes(common.Big0, 32), nil
case BytesTy:
if reflectValue.Kind() == reflect.Array {
reflectValue = mustArrayToByteSlice(reflectValue)
}
if reflectValue.Type() != reflect.TypeFor[[]byte]() {
return []byte{}, errors.New("bytes type is neither slice nor array")
}
return packBytesSlice(reflectValue.Bytes(), reflectValue.Len()), nil
case FixedBytesTy, FunctionTy:
if reflectValue.Kind() == reflect.Array {
reflectValue = mustArrayToByteSlice(reflectValue)
}
return common.RightPadBytes(reflectValue.Bytes(), 32), nil
default:
return []byte{}, fmt.Errorf("could not pack element, unknown type: %v", t.T)
}
}
// packNum packs the given number (using the reflect value) and will cast it to appropriate number representation.
func packNum(value reflect.Value) []byte {
switch kind := value.Kind(); kind {
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return math.U256Bytes(new(big.Int).SetUint64(value.Uint()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return math.U256Bytes(big.NewInt(value.Int()))
case reflect.Pointer:
return math.U256Bytes(new(big.Int).Set(value.Interface().(*big.Int)))
default:
panic("abi: fatal error")
}
}
// Copyright 2016 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"errors"
"fmt"
"math/big"
"reflect"
"strings"
)
// ConvertType converts an interface of a runtime type into an interface of the
// given type
// e.g. turn
// var fields []reflect.StructField
//
// fields = append(fields, reflect.StructField{
// Name: "X",
// Type: reflect.TypeOf(new(big.Int)),
// Tag: reflect.StructTag("json:\"" + "x" + "\""),
// }
//
// into
// type TupleT struct { X *big.Int }
func ConvertType(in any, proto any) any {
protoType := reflect.TypeOf(proto)
if reflect.TypeOf(in).ConvertibleTo(protoType) {
return reflect.ValueOf(in).Convert(protoType).Interface()
}
// Use set as a last ditch effort
if err := set(reflect.ValueOf(proto), reflect.ValueOf(in)); err != nil {
panic(err)
}
return proto
}
// indirect recursively dereferences the value until it either gets the value
// or finds a big.Int
func indirect(v reflect.Value) reflect.Value {
if v.Kind() == reflect.Pointer && v.Elem().Type() != reflect.TypeFor[big.Int]() {
return indirect(v.Elem())
}
return v
}
// reflectIntType returns the reflect using the given size and
// unsignedness.
func reflectIntType(unsigned bool, size int) reflect.Type {
if unsigned {
switch size {
case 8:
return reflect.TypeFor[uint8]()
case 16:
return reflect.TypeFor[uint16]()
case 32:
return reflect.TypeFor[uint32]()
case 64:
return reflect.TypeFor[uint64]()
}
}
switch size {
case 8:
return reflect.TypeFor[int8]()
case 16:
return reflect.TypeFor[int16]()
case 32:
return reflect.TypeFor[int32]()
case 64:
return reflect.TypeFor[int64]()
}
return reflect.TypeFor[*big.Int]()
}
// mustArrayToByteSlice creates a new byte slice with the exact same size as value
// and copies the bytes in value to the new slice.
func mustArrayToByteSlice(value reflect.Value) reflect.Value {
slice := reflect.MakeSlice(reflect.TypeFor[[]byte](), value.Len(), value.Len())
reflect.Copy(slice, value)
return slice
}
// set attempts to assign src to dst by either setting, copying or otherwise.
//
// set is a bit more lenient when it comes to assignment and doesn't force an as
// strict ruleset as bare `reflect` does.
func set(dst, src reflect.Value) error {
dstType, srcType := dst.Type(), src.Type()
switch {
case dstType.Kind() == reflect.Interface && dst.Elem().IsValid():
return set(dst.Elem(), src)
case dstType.Kind() == reflect.Pointer && dstType.Elem() != reflect.TypeFor[big.Int]():
return set(dst.Elem(), src)
case srcType.AssignableTo(dstType) && dst.CanSet():
dst.Set(src)
case dstType.Kind() == reflect.Slice && srcType.Kind() == reflect.Slice && dst.CanSet():
return setSlice(dst, src)
case dstType.Kind() == reflect.Array:
return setArray(dst, src)
case dstType.Kind() == reflect.Struct:
return setStruct(dst, src)
default:
return fmt.Errorf("abi: cannot unmarshal %v in to %v", src.Type(), dst.Type())
}
return nil
}
// setSlice attempts to assign src to dst when slices are not assignable by default
// e.g. src: [][]byte -> dst: [][15]byte
// setSlice ignores if we cannot copy all of src' elements.
func setSlice(dst, src reflect.Value) error {
slice := reflect.MakeSlice(dst.Type(), src.Len(), src.Len())
for i := 0; i < src.Len(); i++ {
if err := set(slice.Index(i), src.Index(i)); err != nil {
return err
}
}
if dst.CanSet() {
dst.Set(slice)
return nil
}
return errors.New("cannot set slice, destination not settable")
}
func setArray(dst, src reflect.Value) error {
if src.Kind() == reflect.Pointer {
return set(dst, indirect(src))
}
array := reflect.New(dst.Type()).Elem()
for i := 0; i < min(src.Len(), dst.Len()); i++ {
if err := set(array.Index(i), src.Index(i)); err != nil {
return err
}
}
if dst.CanSet() {
dst.Set(array)
return nil
}
return errors.New("cannot set array, destination not settable")
}
func setStruct(dst, src reflect.Value) error {
for i := 0; i < src.NumField(); i++ {
srcField := src.Field(i)
dstField := dst.Field(i)
if !dstField.IsValid() || !srcField.IsValid() {
return fmt.Errorf("could not find src field: %v value: %v in destination", srcField.Type().Name(), srcField)
}
if err := set(dstField, srcField); err != nil {
return err
}
}
return nil
}
// mapArgNamesToStructFields maps a slice of argument names to struct fields.
// first round: for each Exportable field that contains a `abi:""` tag
//
// and this field name exists in the given argument name list, pair them together.
//
// second round: for each argument name that has not been already linked,
//
// find what variable is expected to be mapped into, if it exists and has not been
// used, pair them.
//
// Note this function assumes the given value is a struct value.
func mapArgNamesToStructFields(argNames []string, value reflect.Value) (map[string]string, error) {
typ := value.Type()
abi2struct := make(map[string]string)
struct2abi := make(map[string]string)
// first round ~~~
for i := 0; i < typ.NumField(); i++ {
structFieldName := typ.Field(i).Name
// skip private struct fields.
if structFieldName[:1] != strings.ToUpper(structFieldName[:1]) {
continue
}
// skip fields that have no abi:"" tag.
tagName, ok := typ.Field(i).Tag.Lookup("abi")
if !ok {
continue
}
// check if tag is empty.
if tagName == "" {
return nil, fmt.Errorf("struct: abi tag in '%s' is empty", structFieldName)
}
// check which argument field matches with the abi tag.
found := false
for _, arg := range argNames {
if arg == tagName {
if abi2struct[arg] != "" {
return nil, fmt.Errorf("struct: abi tag in '%s' already mapped", structFieldName)
}
// pair them
abi2struct[arg] = structFieldName
struct2abi[structFieldName] = arg
found = true
}
}
// check if this tag has been mapped.
if !found {
return nil, fmt.Errorf("struct: abi tag '%s' defined but not found in abi", tagName)
}
}
// second round ~~~
for _, argName := range argNames {
structFieldName := ToCamelCase(argName)
if structFieldName == "" {
return nil, errors.New("abi: purely underscored output cannot unpack to struct")
}
// this abi has already been paired, skip it... unless there exists another, yet unassigned
// struct field with the same field name. If so, raise an error:
// abi: [ { "name": "value" } ]
// struct { Value *big.Int , Value1 *big.Int `abi:"value"`}
if abi2struct[argName] != "" {
if abi2struct[argName] != structFieldName &&
struct2abi[structFieldName] == "" &&
value.FieldByName(structFieldName).IsValid() {
return nil, fmt.Errorf("abi: multiple variables maps to the same abi field '%s'", argName)
}
continue
}
// return an error if this struct field has already been paired.
if struct2abi[structFieldName] != "" {
return nil, fmt.Errorf("abi: multiple outputs mapping to the same struct field '%s'", structFieldName)
}
if value.FieldByName(structFieldName).IsValid() {
// pair them
abi2struct[argName] = structFieldName
struct2abi[structFieldName] = argName
} else {
// not paired, but annotate as used, to detect cases like
// abi : [ { "name": "value" }, { "name": "_value" } ]
// struct { Value *big.Int }
struct2abi[structFieldName] = argName
}
}
return abi2struct, nil
}
// Copyright 2018 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"encoding/binary"
"errors"
"fmt"
"math/big"
"reflect"
"github.com/erigontech/erigon/common"
"github.com/erigontech/erigon/common/crypto"
"github.com/erigontech/erigon/common/length"
"github.com/erigontech/erigon/common/math"
)
// MakeTopics converts a filter query argument list into a filter topic set.
func MakeTopics(query ...[]any) ([][]common.Hash, error) {
topics := make([][]common.Hash, len(query))
for i, filter := range query {
for _, rule := range filter {
var topic common.Hash
// Try to generate the topic based on simple types
switch rule := rule.(type) {
case common.Hash:
copy(topic[:], rule[:])
case common.Address:
copy(topic[length.Hash-length.Addr:], rule[:])
case *big.Int:
copy(topic[:], math.U256Bytes(new(big.Int).Set(rule)))
case bool:
if rule {
topic[length.Hash-1] = 1
}
case int8:
copy(topic[:], genIntType(int64(rule), 1))
case int16:
copy(topic[:], genIntType(int64(rule), 2))
case int32:
copy(topic[:], genIntType(int64(rule), 4))
case int64:
copy(topic[:], genIntType(rule, 8))
case uint8:
blob := new(big.Int).SetUint64(uint64(rule)).Bytes()
copy(topic[length.Hash-len(blob):], blob)
case uint16:
blob := new(big.Int).SetUint64(uint64(rule)).Bytes()
copy(topic[length.Hash-len(blob):], blob)
case uint32:
blob := new(big.Int).SetUint64(uint64(rule)).Bytes()
copy(topic[length.Hash-len(blob):], blob)
case uint64:
blob := new(big.Int).SetUint64(rule).Bytes()
copy(topic[length.Hash-len(blob):], blob)
case string:
topic = crypto.HashData([]byte(rule))
case []byte:
topic = crypto.HashData(rule)
default:
// todo(rjl493456442) according solidity documentation, indexed event
// parameters that are not value types i.e. arrays and structs are not
// stored directly but instead a keccak256-hash of an encoding is stored.
//
// We only convert stringS and bytes to hash, still need to deal with
// array(both fixed-size and dynamic-size) and struct.
// Attempt to generate the topic from funky types
val := reflect.ValueOf(rule)
switch {
// static byte array
case val.Kind() == reflect.Array && reflect.TypeOf(rule).Elem().Kind() == reflect.Uint8:
if val.Len() > len(topic) {
return nil, fmt.Errorf("indexed byte array too large: [%d]byte", val.Len())
}
reflect.Copy(reflect.ValueOf(topic[:val.Len()]), val)
default:
return nil, fmt.Errorf("unsupported indexed type: %T", rule)
}
}
topics[i] = append(topics[i], topic)
}
}
return topics, nil
}
func genIntType(rule int64, size uint) []byte {
var topic [length.Hash]byte
if rule < 0 {
// if a rule is negative, we need to put it into two's complement.
// extended to length.Hash bytes.
topic = [length.Hash]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}
}
for i := uint(0); i < size; i++ {
topic[length.Hash-i-1] = byte(rule >> (i * 8))
}
return topic[:]
}
// ParseTopics converts the indexed topic fields into actual log field values.
func ParseTopics(out any, fields Arguments, topics []common.Hash) error {
outValue := reflect.ValueOf(out)
if outValue.Kind() != reflect.Pointer || outValue.IsNil() {
return fmt.Errorf("abi: cannot unmarshal indexed event fields into %T", out)
}
outValue = outValue.Elem()
if outValue.Kind() != reflect.Struct {
return fmt.Errorf("abi: cannot unmarshal indexed event fields into %T", out)
}
return parseTopicWithSetter(fields, topics,
func(arg Argument, reconstr any) error {
field := outValue.FieldByName(ToCamelCase(arg.Name))
if !field.IsValid() {
return fmt.Errorf("abi: field %s can't be found in the given value", arg.Name)
}
if !field.CanSet() {
return fmt.Errorf("abi: field %s cannot be set", arg.Name)
}
value := reflect.ValueOf(reconstr)
if !value.IsValid() || !value.Type().AssignableTo(field.Type()) {
return fmt.Errorf("abi: cannot unmarshal %T in to %v", reconstr, field.Type())
}
field.Set(value)
return nil
})
}
// ParseTopicsIntoMap converts the indexed topic field-value pairs into map key-value pairs.
func ParseTopicsIntoMap(out map[string]any, fields Arguments, topics []common.Hash) error {
if out == nil {
return errors.New("abi: cannot unpack into a nil map")
}
return parseTopicWithSetter(fields, topics,
func(arg Argument, reconstr any) error {
out[arg.Name] = reconstr
return nil
})
}
// parseTopicWithSetter converts the indexed topic field-value pairs and stores them using the
// provided set function.
//
// Note, dynamic types cannot be reconstructed since they get mapped to Keccak256
// hashes as the topic value!
func parseTopicWithSetter(fields Arguments, topics []common.Hash, setter func(Argument, any) error) error {
// Sanity check that the fields and topics match up
if len(fields) != len(topics) {
return errors.New("topic/field count mismatch")
}
// Iterate over all the fields and reconstruct them from topics
for i, arg := range fields {
if !arg.Indexed {
return errors.New("non-indexed field in topic reconstruction")
}
var reconstr any
switch arg.Type.T {
case TupleTy:
return errors.New("tuple type in topic reconstruction")
case StringTy, BytesTy, SliceTy, ArrayTy:
// Array types (including strings and bytes) have their keccak256 hashes stored in the topic- not a hash
// whose bytes can be decoded to the actual value- so the best we can do is retrieve that hash
reconstr = topics[i]
case FunctionTy:
if garbage := binary.BigEndian.Uint64(topics[i][0:8]); garbage != 0 {
return fmt.Errorf("bind: got improperly encoded function type, got %v", topics[i][:])
}
var tmp [24]byte
copy(tmp[:], topics[i][8:32])
reconstr = tmp
default:
var err error
reconstr, err = toGoType(0, arg.Type, topics[i][:])
if err != nil {
return err
}
}
// Use the setter function to store the value
if err := setter(arg, reconstr); err != nil {
return err
}
}
return nil
}
// Copyright 2015 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"errors"
"fmt"
"reflect"
"regexp"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"github.com/erigontech/erigon/common"
)
// Type enumerator
const (
IntTy byte = iota
UintTy
BoolTy
StringTy
SliceTy
ArrayTy
TupleTy
AddressTy
FixedBytesTy
BytesTy
HashTy
FixedPointTy
FunctionTy
)
// Type is the reflection of the supported argument type.
type Type struct {
Elem *Type
Size int
T byte // Our own type checking
stringKind string // holds the unparsed string for deriving signatures
// Tuple relative fields
TupleRawName string // Raw struct name defined in source code, may be empty.
TupleElems []*Type // Type information of all tuple fields
TupleRawNames []string // Raw field name of all tuple fields
TupleType reflect.Type // Underlying struct of the tuple
}
var (
// typeRegex parses the abi sub types
typeRegex = regexp.MustCompile("([a-zA-Z]+)(([0-9]+)(x([0-9]+))?)?")
// sliceSizeRegex grabs the slice size with regexp
sliceSizeRegex = regexp.MustCompile("[0-9]+")
)
// NewType creates a new reflection type of abi type given in t.
func NewType(t string, internalType string, components []ArgumentMarshaling) (typ Type, err error) {
// check that array brackets are equal if they exist
if strings.Count(t, "[") != strings.Count(t, "]") {
return Type{}, errors.New("invalid arg type in abi")
}
typ.stringKind = t
// if there are brackets, get ready to go into slice/array mode and
// recursively create the type
if strings.Count(t, "[") != 0 {
// Note internalType can be empty here.
subInternal := internalType
if i := strings.LastIndex(internalType, "["); i != -1 {
subInternal = subInternal[:i]
}
// recursively embed the type
i := strings.LastIndex(t, "[")
embeddedType, err := NewType(t[:i], subInternal, components)
if err != nil {
return Type{}, err
}
// grab the last cell and create a type from there
sliced := t[i:]
intz := sliceSizeRegex.FindAllString(sliced, -1)
if len(intz) == 0 {
// is a slice
typ.T = SliceTy
typ.Elem = &embeddedType
typ.stringKind = embeddedType.stringKind + sliced
} else if len(intz) == 1 {
// is an array
typ.T = ArrayTy
typ.Elem = &embeddedType
typ.Size, err = strconv.Atoi(intz[0])
if err != nil {
return Type{}, fmt.Errorf("abi: error parsing variable size: %w", err)
}
typ.stringKind = embeddedType.stringKind + sliced
} else {
return Type{}, errors.New("invalid formatting of array type")
}
return typ, err
}
// parse the type and size of the abi-type.
matches := typeRegex.FindAllStringSubmatch(t, -1)
if len(matches) == 0 {
return Type{}, fmt.Errorf("invalid type '%v'", t)
}
parsedType := matches[0]
// varSize is the size of the variable
var varSize int
if len(parsedType[3]) > 0 {
var err error
varSize, err = strconv.Atoi(parsedType[3])
if err != nil {
return Type{}, fmt.Errorf("abi: error parsing variable size: %w", err)
}
} else {
if parsedType[0] == "uint" || parsedType[0] == "int" {
// this should fail because it means that there's something wrong with
// the abi type (the compiler should always format it to the size...always)
return Type{}, fmt.Errorf("unsupported arg type: %s", t)
}
}
// varType is the parsed abi type
switch varType := parsedType[1]; varType {
case "int":
if varSize < 8 || varSize > 256 || varSize%8 != 0 {
return Type{}, fmt.Errorf("unsupported arg type: %s", t)
}
typ.Size = varSize
typ.T = IntTy
case "uint":
if varSize < 8 || varSize > 256 || varSize%8 != 0 {
return Type{}, fmt.Errorf("unsupported arg type: %s", t)
}
typ.Size = varSize
typ.T = UintTy
case "bool":
typ.T = BoolTy
case "address":
typ.Size = 20
typ.T = AddressTy
case "string":
typ.T = StringTy
case "bytes":
if varSize == 0 && len(parsedType[3]) == 0 {
typ.T = BytesTy
} else {
if varSize == 0 || varSize > 32 {
return Type{}, fmt.Errorf("unsupported arg type: %s", t)
}
typ.T = FixedBytesTy
typ.Size = varSize
}
case "tuple":
var (
fields []reflect.StructField
elems []*Type
names []string
expression strings.Builder // canonical parameter expression
)
expression.WriteString("(")
overloadedNames := make(map[string]string)
for idx, c := range components {
cType, err := NewType(c.Type, c.InternalType, c.Components)
if err != nil {
return Type{}, err
}
fieldName, err := overloadedArgName(c.Name, overloadedNames)
if err != nil {
return Type{}, err
}
if !isValidFieldName(fieldName) {
return Type{}, fmt.Errorf("abi: field %d has invalid name: abi name %q normalized to go field %q", idx, c.Name, fieldName)
}
overloadedNames[fieldName] = fieldName
fields = append(fields, reflect.StructField{
Name: fieldName, // reflect.StructOf will panic for any exported field.
Type: cType.GetType(),
Tag: reflect.StructTag("json:\"" + c.Name + "\""),
})
elems = append(elems, &cType)
names = append(names, c.Name)
expression.WriteString(cType.stringKind)
if idx != len(components)-1 {
expression.WriteString(",")
}
}
expression.WriteString(")")
typ.TupleType = reflect.StructOf(fields)
typ.TupleElems = elems
typ.TupleRawNames = names
typ.T = TupleTy
typ.stringKind = expression.String()
const structPrefix = "struct "
// After solidity 0.5.10, a new field of abi "internalType"
// is introduced. From that we can obtain the struct name
// user defined in the source code.
if internalType != "" && strings.HasPrefix(internalType, structPrefix) {
// Foo.Bar type definition is not allowed in golang,
// convert the format to FooBar
typ.TupleRawName = strings.ReplaceAll(internalType[len(structPrefix):], ".", "")
}
case "function":
typ.T = FunctionTy
typ.Size = 24
default:
return Type{}, fmt.Errorf("unsupported arg type: %s", t)
}
return
}
// GetType returns the reflection type of the ABI type.
func (t Type) GetType() reflect.Type {
switch t.T {
case IntTy:
return reflectIntType(false, t.Size)
case UintTy:
return reflectIntType(true, t.Size)
case BoolTy:
return reflect.TypeFor[bool]()
case StringTy:
return reflect.TypeFor[string]()
case SliceTy:
return reflect.SliceOf(t.Elem.GetType())
case ArrayTy:
return reflect.ArrayOf(t.Size, t.Elem.GetType())
case TupleTy:
return t.TupleType
case AddressTy:
return reflect.TypeFor[common.Address]()
case FixedBytesTy:
return reflect.ArrayOf(t.Size, reflect.TypeFor[byte]())
case BytesTy:
return reflect.SliceOf(reflect.TypeFor[byte]())
case HashTy:
// hashtype currently not used
return reflect.ArrayOf(32, reflect.TypeFor[byte]())
case FixedPointTy:
// fixedpoint type currently not used
return reflect.ArrayOf(32, reflect.TypeFor[byte]())
case FunctionTy:
return reflect.ArrayOf(24, reflect.TypeFor[byte]())
default:
panic("Invalid type")
}
}
func overloadedArgName(rawName string, names map[string]string) (string, error) {
fieldName := ToCamelCase(rawName)
if fieldName == "" {
return "", errors.New("abi: purely anonymous or underscored field is not supported")
}
// Handle overloaded fieldNames
_, ok := names[fieldName]
for idx := 0; ok; idx++ {
fieldName = fmt.Sprintf("%s%d", ToCamelCase(rawName), idx)
_, ok = names[fieldName]
}
return fieldName, nil
}
// String implements Stringer.
func (t Type) String() (out string) {
return t.stringKind
}
func (t Type) pack(v reflect.Value) ([]byte, error) {
// dereference pointer first if it's a pointer
v = indirect(v)
if err := typeCheck(t, v); err != nil {
return nil, err
}
switch t.T {
case SliceTy, ArrayTy:
var ret []byte
if t.requiresLengthPrefix() {
// append length
ret = append(ret, packNum(reflect.ValueOf(v.Len()))...)
}
// calculate offset if any
offset := 0
offsetReq := isDynamicType(*t.Elem)
if offsetReq {
offset = getTypeSize(*t.Elem) * v.Len()
}
var tail []byte
for i := 0; i < v.Len(); i++ {
val, err := t.Elem.pack(v.Index(i))
if err != nil {
return nil, err
}
if !offsetReq {
ret = append(ret, val...)
continue
}
ret = append(ret, packNum(reflect.ValueOf(offset))...)
offset += len(val)
tail = append(tail, val...)
}
return append(ret, tail...), nil
case TupleTy:
// (T1,...,Tk) for k >= 0 and any types T1, …, Tk
// enc(X) = head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(k))
// where X = (X(1), ..., X(k)) and head and tail are defined for Ti being a static
// type as
// head(X(i)) = enc(X(i)) and tail(X(i)) = "" (the empty string)
// and as
// head(X(i)) = enc(len(head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(i-1))))
// tail(X(i)) = enc(X(i))
// otherwise, i.e. if Ti is a dynamic type.
fieldmap, err := mapArgNamesToStructFields(t.TupleRawNames, v)
if err != nil {
return nil, err
}
// Calculate prefix occupied size.
offset := 0
for _, elem := range t.TupleElems {
offset += getTypeSize(*elem)
}
var ret, tail []byte
for i, elem := range t.TupleElems {
field := v.FieldByName(fieldmap[t.TupleRawNames[i]])
if !field.IsValid() {
return nil, fmt.Errorf("field %s for tuple not found in the given struct", t.TupleRawNames[i])
}
val, err := elem.pack(field)
if err != nil {
return nil, err
}
if isDynamicType(*elem) {
ret = append(ret, packNum(reflect.ValueOf(offset))...)
tail = append(tail, val...)
offset += len(val)
} else {
ret = append(ret, val...)
}
}
return append(ret, tail...), nil
default:
return packElement(t, v)
}
}
// requiresLengthPrefix returns whether the type requires any sort of length
// prefixing.
func (t Type) requiresLengthPrefix() bool {
return t.T == StringTy || t.T == BytesTy || t.T == SliceTy
}
// isDynamicType returns true if the type is dynamic.
// The following types are called “dynamic”:
// * bytes
// * string
// * T[] for any T
// * T[k] for any dynamic T and any k >= 0
// * (T1,...,Tk) if Ti is dynamic for some 1 <= i <= k
func isDynamicType(t Type) bool {
if t.T == TupleTy {
for _, elem := range t.TupleElems {
if isDynamicType(*elem) {
return true
}
}
return false
}
return t.T == StringTy || t.T == BytesTy || t.T == SliceTy || (t.T == ArrayTy && isDynamicType(*t.Elem))
}
// getTypeSize returns the size that this type needs to occupy.
// We distinguish static and dynamic types. Static types are encoded in-place
// and dynamic types are encoded at a separately allocated location after the
// current block.
// So for a static variable, the size returned represents the size that the
// variable actually occupies.
// For a dynamic variable, the returned size is fixed 32 bytes, which is used
// to store the location reference for actual value storage.
func getTypeSize(t Type) int {
if t.T == ArrayTy && !isDynamicType(*t.Elem) {
// Recursively calculate type size if it is a nested array
if t.Elem.T == ArrayTy || t.Elem.T == TupleTy {
return t.Size * getTypeSize(*t.Elem)
}
return t.Size * 32
} else if t.T == TupleTy && !isDynamicType(t) {
total := 0
for _, elem := range t.TupleElems {
total += getTypeSize(*elem)
}
return total
}
return 32
}
// isLetter reports whether a given 'rune' is classified as a Letter.
// Copied from reflect/type.go.
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
}
// isValidFieldName checks if a string is a valid (struct) field name or not.
//
// According to the language spec, a field name should be an identifier.
//
// identifier = letter { letter | unicode_digit } .
// letter = unicode_letter | "_" .
// Copied from reflect/type.go.
func isValidFieldName(fieldName string) bool {
for i, c := range fieldName {
if i == 0 && !isLetter(c) {
return false
}
if !(isLetter(c) || unicode.IsDigit(c)) {
return false
}
}
return len(fieldName) > 0
}
// Copyright 2017 The go-ethereum Authors
// (original work)
// Copyright 2024 The Erigon Authors
// (modifications)
// This file is part of Erigon.
//
// Erigon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Erigon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Erigon. If not, see <http://www.gnu.org/licenses/>.
package abi
import (
"encoding/binary"
"errors"
"fmt"
"math/big"
"reflect"
"github.com/erigontech/erigon/common"
)
var (
// MaxUint256 is the maximum value that can be represented by a uint256.
MaxUint256 = new(big.Int).Sub(new(big.Int).Lsh(common.Big1, 256), common.Big1)
// MaxInt256 is the maximum value that can be represented by a int256.
MaxInt256 = new(big.Int).Sub(new(big.Int).Lsh(common.Big1, 255), common.Big1)
)
// ReadInteger reads the integer based on its kind and returns the appropriate value.
func ReadInteger(typ Type, b []byte) any {
if typ.T == UintTy {
switch typ.Size {
case 8:
return b[len(b)-1]
case 16:
return binary.BigEndian.Uint16(b[len(b)-2:])
case 32:
return binary.BigEndian.Uint32(b[len(b)-4:])
case 64:
return binary.BigEndian.Uint64(b[len(b)-8:])
default:
// the only case left for unsigned integer is uint256.
return new(big.Int).SetBytes(b)
}
}
switch typ.Size {
case 8:
return int8(b[len(b)-1])
case 16:
return int16(binary.BigEndian.Uint16(b[len(b)-2:]))
case 32:
return int32(binary.BigEndian.Uint32(b[len(b)-4:]))
case 64:
return int64(binary.BigEndian.Uint64(b[len(b)-8:]))
default:
// the only case left for integer is int256
// big.SetBytes can't tell if a number is negative or positive in itself.
// On EVM, if the returned number > max int256, it is negative.
// A number is > max int256 if the bit at position 255 is set.
ret := new(big.Int).SetBytes(b)
if ret.Bit(255) == 1 {
ret.Add(MaxUint256, new(big.Int).Neg(ret))
ret.Add(ret, common.Big1)
ret.Neg(ret)
}
return ret
}
}
// readBool reads a bool.
func readBool(word []byte) (bool, error) {
for _, b := range word[:31] {
if b != 0 {
return false, errBadBool
}
}
switch word[31] {
case 0:
return false, nil
case 1:
return true, nil
default:
return false, errBadBool
}
}
// A function type is simply the address with the function selection signature at the end.
//
// readFunctionType enforces that standard by always presenting it as a 24-array (address + sig = 24 bytes)
func readFunctionType(t Type, word []byte) (funcTy [24]byte, err error) {
if t.T != FunctionTy {
return [24]byte{}, errors.New("abi: invalid type in call to make function type byte array")
}
if garbage := binary.BigEndian.Uint64(word[24:32]); garbage != 0 {
err = fmt.Errorf("abi: got improperly encoded function type, got %v", word)
} else {
copy(funcTy[:], word[0:24])
}
return
}
// ReadFixedBytes uses reflection to create a fixed array to be read from.
func ReadFixedBytes(t Type, word []byte) (any, error) {
if t.T != FixedBytesTy {
return nil, errors.New("abi: invalid type in call to make fixed byte array")
}
// convert
array := reflect.New(t.GetType()).Elem()
reflect.Copy(array, reflect.ValueOf(word[0:t.Size]))
return array.Interface(), nil
}
// forEachUnpack iteratively unpack elements.
func forEachUnpack(t Type, output []byte, start, size int) (any, error) {
if size < 0 {
return nil, fmt.Errorf("cannot marshal input to array, size is negative (%d)", size)
}
if start+32*size > len(output) {
return nil, fmt.Errorf("abi: cannot marshal in to go array: offset %d would go over slice boundary (len=%d)", start+32*size, len(output))
}
// this value will become our slice or our array, depending on the type
var refSlice reflect.Value
if t.T == SliceTy {
// declare our slice
refSlice = reflect.MakeSlice(t.GetType(), size, size)
} else if t.T == ArrayTy {
// declare our array
refSlice = reflect.New(t.GetType()).Elem()
} else {
return nil, errors.New("abi: invalid type in array/slice unpacking stage")
}
// Arrays have packed elements, resulting in longer unpack steps.
// Slices have just 32 bytes per element (pointing to the contents).
elemSize := getTypeSize(*t.Elem)
for i, j := start, 0; j < size; i, j = i+elemSize, j+1 {
inter, err := toGoType(i, *t.Elem, output)
if err != nil {
return nil, err
}
// append the item to our reflect slice
refSlice.Index(j).Set(reflect.ValueOf(inter))
}
// return the interface
return refSlice.Interface(), nil
}
func forTupleUnpack(t Type, output []byte) (any, error) {
retval := reflect.New(t.GetType()).Elem()
virtualArgs := 0
for index, elem := range t.TupleElems {
marshalledValue, err := toGoType((index+virtualArgs)*32, *elem, output)
if elem.T == ArrayTy && !isDynamicType(*elem) {
// If we have a static array, like [3]uint256, these are coded as
// just like uint256,uint256,uint256.
// This means that we need to add two 'virtual' arguments when
// we count the index from now on.
//
// Array values nested multiple levels deep are also encoded inline:
// [2][3]uint256: uint256,uint256,uint256,uint256,uint256,uint256
//
// Calculate the full array size to get the correct offset for the next argument.
// Decrement it by 1, as the normal index increment is still applied.
virtualArgs += getTypeSize(*elem)/32 - 1
} else if elem.T == TupleTy && !isDynamicType(*elem) {
// If we have a static tuple, like (uint256, bool, uint256), these are
// coded as just like uint256,bool,uint256
virtualArgs += getTypeSize(*elem)/32 - 1
}
if err != nil {
return nil, err
}
retval.Field(index).Set(reflect.ValueOf(marshalledValue))
}
return retval.Interface(), nil
}
// toGoType parses the output bytes and recursively assigns the value of these bytes
// into a go type with accordance with the ABI spec.
func toGoType(index int, t Type, output []byte) (any, error) {
if index+32 > len(output) {
return nil, fmt.Errorf("abi: cannot marshal in to go type: length insufficient %d require %d", len(output), index+32)
}
var (
returnOutput []byte
begin, length int
err error
)
// if we require a length prefix, find the beginning word and size returned.
if t.requiresLengthPrefix() {
begin, length, err = lengthPrefixPointsTo(index, output)
if err != nil {
return nil, err
}
} else {
returnOutput = output[index : index+32]
}
switch t.T {
case TupleTy:
if isDynamicType(t) {
begin, err := tuplePointsTo(index, output)
if err != nil {
return nil, err
}
return forTupleUnpack(t, output[begin:])
}
return forTupleUnpack(t, output[index:])
case SliceTy:
return forEachUnpack(t, output[begin:], 0, length)
case ArrayTy:
if isDynamicType(*t.Elem) {
offset := binary.BigEndian.Uint64(returnOutput[len(returnOutput)-8:])
if offset > uint64(len(output)) {
return nil, fmt.Errorf("abi: toGoType offset greater than output length: offset: %d, len(output): %d", offset, len(output))
}
return forEachUnpack(t, output[offset:], 0, t.Size)
}
return forEachUnpack(t, output[index:], 0, t.Size)
case StringTy: // variable arrays are written at the end of the return bytes
return string(output[begin : begin+length]), nil
case IntTy, UintTy:
return ReadInteger(t, returnOutput), nil
case BoolTy:
return readBool(returnOutput)
case AddressTy:
return common.BytesToAddress(returnOutput), nil
case HashTy:
return common.BytesToHash(returnOutput), nil
case BytesTy:
return output[begin : begin+length], nil
case FixedBytesTy:
return ReadFixedBytes(t, returnOutput)
case FunctionTy:
return readFunctionType(t, returnOutput)
default:
return nil, fmt.Errorf("abi: unknown type %v", t.T)
}
}
// lengthPrefixPointsTo interprets a 32 byte slice as an offset and then determines which indices to look to decode the type.
func lengthPrefixPointsTo(index int, output []byte) (start int, length int, err error) {
bigOffsetEnd := big.NewInt(0).SetBytes(output[index : index+32])
bigOffsetEnd.Add(bigOffsetEnd, common.Big32)
outputLength := big.NewInt(int64(len(output)))
if bigOffsetEnd.Cmp(outputLength) > 0 {
return 0, 0, fmt.Errorf("abi: cannot marshal in to go slice: offset %v would go over slice boundary (len=%v)", bigOffsetEnd, outputLength)
}
if bigOffsetEnd.BitLen() > 63 {
return 0, 0, fmt.Errorf("abi offset larger than int64: %v", bigOffsetEnd)
}
offsetEnd := int(bigOffsetEnd.Uint64())
lengthBig := big.NewInt(0).SetBytes(output[offsetEnd-32 : offsetEnd])
totalSize := big.NewInt(0)
totalSize.Add(totalSize, bigOffsetEnd)
totalSize.Add(totalSize, lengthBig)
if totalSize.BitLen() > 63 {
return 0, 0, fmt.Errorf("abi: length larger than int64: %v", totalSize)
}
if totalSize.Cmp(outputLength) > 0 {
return 0, 0, fmt.Errorf("abi: cannot marshal in to go type: length insufficient %v require %v", outputLength, totalSize)
}
start = int(bigOffsetEnd.Uint64())
length = int(lengthBig.Uint64())
return
}
// tuplePointsTo resolves the location reference for dynamic tuple.
func tuplePointsTo(index int, output []byte) (start int, err error) {
offset := big.NewInt(0).SetBytes(output[index : index+32])
outputLen := big.NewInt(int64(len(output)))
if offset.Cmp(big.NewInt(int64(len(output)))) > 0 {
return 0, fmt.Errorf("abi: cannot marshal in to go slice: offset %v would go over slice boundary (len=%v)", offset, outputLen)
}
if offset.BitLen() > 63 {
return 0, fmt.Errorf("abi offset larger than int64: %v", offset)
}
return int(offset.Uint64()), nil
}