Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "bytes"
9 : "cmp"
10 : "context"
11 : "encoding/binary"
12 : "fmt"
13 : "io"
14 : "slices"
15 : "sort"
16 : "unsafe"
17 :
18 : "github.com/cockroachdb/pebble/internal/base"
19 : )
20 :
21 : // Layout describes the block organization of an sstable.
22 : type Layout struct {
23 : // NOTE: changes to fields in this struct should also be reflected in
24 : // ValidateBlockChecksums, which validates a static list of BlockHandles
25 : // referenced in this struct.
26 :
27 : Data []BlockHandleWithProperties
28 : Index []BlockHandle
29 : TopIndex BlockHandle
30 : Filter BlockHandle
31 : RangeDel BlockHandle
32 : RangeKey BlockHandle
33 : ValueBlock []BlockHandle
34 : ValueIndex BlockHandle
35 : Properties BlockHandle
36 : MetaIndex BlockHandle
37 : Footer BlockHandle
38 : Format TableFormat
39 : }
40 :
41 : // Describe returns a description of the layout. If the verbose parameter is
42 : // true, details of the structure of each block are returned as well.
43 : func (l *Layout) Describe(
44 : w io.Writer, verbose bool, r *Reader, fmtRecord func(key *base.InternalKey, value []byte),
45 1 : ) {
46 1 : ctx := context.TODO()
47 1 : type block struct {
48 1 : BlockHandle
49 1 : name string
50 1 : }
51 1 : var blocks []block
52 1 :
53 1 : for i := range l.Data {
54 1 : blocks = append(blocks, block{l.Data[i].BlockHandle, "data"})
55 1 : }
56 1 : for i := range l.Index {
57 1 : blocks = append(blocks, block{l.Index[i], "index"})
58 1 : }
59 1 : if l.TopIndex.Length != 0 {
60 1 : blocks = append(blocks, block{l.TopIndex, "top-index"})
61 1 : }
62 1 : if l.Filter.Length != 0 {
63 1 : blocks = append(blocks, block{l.Filter, "filter"})
64 1 : }
65 1 : if l.RangeDel.Length != 0 {
66 1 : blocks = append(blocks, block{l.RangeDel, "range-del"})
67 1 : }
68 1 : if l.RangeKey.Length != 0 {
69 1 : blocks = append(blocks, block{l.RangeKey, "range-key"})
70 1 : }
71 1 : for i := range l.ValueBlock {
72 1 : blocks = append(blocks, block{l.ValueBlock[i], "value-block"})
73 1 : }
74 1 : if l.ValueIndex.Length != 0 {
75 1 : blocks = append(blocks, block{l.ValueIndex, "value-index"})
76 1 : }
77 1 : if l.Properties.Length != 0 {
78 1 : blocks = append(blocks, block{l.Properties, "properties"})
79 1 : }
80 1 : if l.MetaIndex.Length != 0 {
81 1 : blocks = append(blocks, block{l.MetaIndex, "meta-index"})
82 1 : }
83 1 : if l.Footer.Length != 0 {
84 1 : if l.Footer.Length == levelDBFooterLen {
85 1 : blocks = append(blocks, block{l.Footer, "leveldb-footer"})
86 1 : } else {
87 1 : blocks = append(blocks, block{l.Footer, "footer"})
88 1 : }
89 : }
90 :
91 1 : slices.SortFunc(blocks, func(a, b block) int {
92 1 : return cmp.Compare(a.Offset, b.Offset)
93 1 : })
94 1 : for i := range blocks {
95 1 : b := &blocks[i]
96 1 : fmt.Fprintf(w, "%10d %s (%d)\n", b.Offset, b.name, b.Length)
97 1 :
98 1 : if !verbose {
99 1 : continue
100 : }
101 1 : if b.name == "filter" {
102 0 : continue
103 : }
104 :
105 1 : if b.name == "footer" || b.name == "leveldb-footer" {
106 1 : trailer, offset := make([]byte, b.Length), b.Offset
107 1 : _ = r.readable.ReadAt(ctx, trailer, int64(offset))
108 1 :
109 1 : if b.name == "footer" {
110 1 : checksumType := ChecksumType(trailer[0])
111 1 : fmt.Fprintf(w, "%10d checksum type: %s\n", offset, checksumType)
112 1 : trailer, offset = trailer[1:], offset+1
113 1 : }
114 :
115 1 : metaHandle, n := binary.Uvarint(trailer)
116 1 : metaLen, m := binary.Uvarint(trailer[n:])
117 1 : fmt.Fprintf(w, "%10d meta: offset=%d, length=%d\n", offset, metaHandle, metaLen)
118 1 : trailer, offset = trailer[n+m:], offset+uint64(n+m)
119 1 :
120 1 : indexHandle, n := binary.Uvarint(trailer)
121 1 : indexLen, m := binary.Uvarint(trailer[n:])
122 1 : fmt.Fprintf(w, "%10d index: offset=%d, length=%d\n", offset, indexHandle, indexLen)
123 1 : trailer, offset = trailer[n+m:], offset+uint64(n+m)
124 1 :
125 1 : fmt.Fprintf(w, "%10d [padding]\n", offset)
126 1 :
127 1 : trailing := 12
128 1 : if b.name == "leveldb-footer" {
129 0 : trailing = 8
130 0 : }
131 :
132 1 : offset += uint64(len(trailer) - trailing)
133 1 : trailer = trailer[len(trailer)-trailing:]
134 1 :
135 1 : if b.name == "footer" {
136 1 : version := trailer[:4]
137 1 : fmt.Fprintf(w, "%10d version: %d\n", offset, binary.LittleEndian.Uint32(version))
138 1 : trailer, offset = trailer[4:], offset+4
139 1 : }
140 :
141 1 : magicNumber := trailer
142 1 : fmt.Fprintf(w, "%10d magic number: 0x%x\n", offset, magicNumber)
143 1 :
144 1 : continue
145 : }
146 :
147 1 : h, err := r.readBlock(
148 1 : context.Background(), b.BlockHandle, nil /* transform */, nil /* readHandle */, nil /* stats */, nil /* iterStats */, nil /* buffer pool */)
149 1 : if err != nil {
150 0 : fmt.Fprintf(w, " [err: %s]\n", err)
151 0 : continue
152 : }
153 :
154 1 : getRestart := func(data []byte, restarts, i int32) int32 {
155 1 : return decodeRestart(data[restarts+4*i:])
156 1 : }
157 :
158 1 : formatIsRestart := func(data []byte, restarts, numRestarts, offset int32) {
159 1 : i := sort.Search(int(numRestarts), func(i int) bool {
160 1 : return getRestart(data, restarts, int32(i)) >= offset
161 1 : })
162 1 : if i < int(numRestarts) && getRestart(data, restarts, int32(i)) == offset {
163 1 : fmt.Fprintf(w, " [restart]\n")
164 1 : } else {
165 1 : fmt.Fprintf(w, "\n")
166 1 : }
167 : }
168 :
169 1 : formatRestarts := func(data []byte, restarts, numRestarts int32) {
170 1 : for i := int32(0); i < numRestarts; i++ {
171 1 : offset := getRestart(data, restarts, i)
172 1 : fmt.Fprintf(w, "%10d [restart %d]\n",
173 1 : b.Offset+uint64(restarts+4*i), b.Offset+uint64(offset))
174 1 : }
175 : }
176 :
177 1 : formatTrailer := func() {
178 1 : trailer := make([]byte, blockTrailerLen)
179 1 : offset := int64(b.Offset + b.Length)
180 1 : _ = r.readable.ReadAt(ctx, trailer, offset)
181 1 : bt := blockType(trailer[0])
182 1 : checksum := binary.LittleEndian.Uint32(trailer[1:])
183 1 : fmt.Fprintf(w, "%10d [trailer compression=%s checksum=0x%04x]\n", offset, bt, checksum)
184 1 : }
185 :
186 1 : var lastKey InternalKey
187 1 : switch b.name {
188 1 : case "data", "range-del", "range-key":
189 1 : iter, _ := newBlockIter(r.Compare, h.Get())
190 1 : for key, value := iter.First(); key != nil; key, value = iter.Next() {
191 1 : ptr := unsafe.Pointer(uintptr(iter.ptr) + uintptr(iter.offset))
192 1 : shared, ptr := decodeVarint(ptr)
193 1 : unshared, ptr := decodeVarint(ptr)
194 1 : value2, _ := decodeVarint(ptr)
195 1 :
196 1 : total := iter.nextOffset - iter.offset
197 1 : // The format of the numbers in the record line is:
198 1 : //
199 1 : // (<total> = <length> [<shared>] + <unshared> + <value>)
200 1 : //
201 1 : // <total> is the total number of bytes for the record.
202 1 : // <length> is the size of the 3 varint encoded integers for <shared>,
203 1 : // <unshared>, and <value>.
204 1 : // <shared> is the number of key bytes shared with the previous key.
205 1 : // <unshared> is the number of unshared key bytes.
206 1 : // <value> is the number of value bytes.
207 1 : fmt.Fprintf(w, "%10d record (%d = %d [%d] + %d + %d)",
208 1 : b.Offset+uint64(iter.offset), total,
209 1 : total-int32(unshared+value2), shared, unshared, value2)
210 1 : formatIsRestart(iter.data, iter.restarts, iter.numRestarts, iter.offset)
211 1 : if fmtRecord != nil {
212 1 : fmt.Fprintf(w, " ")
213 1 : if l.Format < TableFormatPebblev3 {
214 1 : fmtRecord(key, value.InPlaceValue())
215 1 : } else {
216 1 : // InPlaceValue() will succeed even for data blocks where the
217 1 : // actual value is in a different location, since this value was
218 1 : // fetched from a blockIter which does not know about value
219 1 : // blocks.
220 1 : v := value.InPlaceValue()
221 1 : if base.TrailerKind(key.Trailer) != InternalKeyKindSet {
222 1 : fmtRecord(key, v)
223 1 : } else if !isValueHandle(valuePrefix(v[0])) {
224 1 : fmtRecord(key, v[1:])
225 1 : } else {
226 1 : vh := decodeValueHandle(v[1:])
227 1 : fmtRecord(key, []byte(fmt.Sprintf("value handle %+v", vh)))
228 1 : }
229 : }
230 : }
231 :
232 1 : if base.InternalCompare(r.Compare, lastKey, *key) >= 0 {
233 1 : fmt.Fprintf(w, " WARNING: OUT OF ORDER KEYS!\n")
234 1 : }
235 1 : lastKey.Trailer = key.Trailer
236 1 : lastKey.UserKey = append(lastKey.UserKey[:0], key.UserKey...)
237 : }
238 1 : formatRestarts(iter.data, iter.restarts, iter.numRestarts)
239 1 : formatTrailer()
240 1 : case "index", "top-index":
241 1 : iter, _ := newBlockIter(r.Compare, h.Get())
242 1 : for key, value := iter.First(); key != nil; key, value = iter.Next() {
243 1 : bh, err := decodeBlockHandleWithProperties(value.InPlaceValue())
244 1 : if err != nil {
245 0 : fmt.Fprintf(w, "%10d [err: %s]\n", b.Offset+uint64(iter.offset), err)
246 0 : continue
247 : }
248 1 : fmt.Fprintf(w, "%10d block:%d/%d",
249 1 : b.Offset+uint64(iter.offset), bh.Offset, bh.Length)
250 1 : formatIsRestart(iter.data, iter.restarts, iter.numRestarts, iter.offset)
251 : }
252 1 : formatRestarts(iter.data, iter.restarts, iter.numRestarts)
253 1 : formatTrailer()
254 1 : case "properties":
255 1 : iter, _ := newRawBlockIter(r.Compare, h.Get())
256 1 : for valid := iter.First(); valid; valid = iter.Next() {
257 1 : fmt.Fprintf(w, "%10d %s (%d)",
258 1 : b.Offset+uint64(iter.offset), iter.Key().UserKey, iter.nextOffset-iter.offset)
259 1 : formatIsRestart(iter.data, iter.restarts, iter.numRestarts, iter.offset)
260 1 : }
261 1 : formatRestarts(iter.data, iter.restarts, iter.numRestarts)
262 1 : formatTrailer()
263 1 : case "meta-index":
264 1 : iter, _ := newRawBlockIter(r.Compare, h.Get())
265 1 : for valid := iter.First(); valid; valid = iter.Next() {
266 1 : value := iter.Value()
267 1 : var bh BlockHandle
268 1 : var n int
269 1 : var vbih valueBlocksIndexHandle
270 1 : isValueBlocksIndexHandle := false
271 1 : if bytes.Equal(iter.Key().UserKey, []byte(metaValueIndexName)) {
272 1 : vbih, n, err = decodeValueBlocksIndexHandle(value)
273 1 : bh = vbih.h
274 1 : isValueBlocksIndexHandle = true
275 1 : } else {
276 1 : bh, n = decodeBlockHandle(value)
277 1 : }
278 1 : if n == 0 || n != len(value) {
279 0 : fmt.Fprintf(w, "%10d [err: %s]\n", b.Offset+uint64(iter.offset), err)
280 0 : continue
281 : }
282 1 : var vbihStr string
283 1 : if isValueBlocksIndexHandle {
284 1 : vbihStr = fmt.Sprintf(" value-blocks-index-lengths: %d(num), %d(offset), %d(length)",
285 1 : vbih.blockNumByteLength, vbih.blockOffsetByteLength, vbih.blockLengthByteLength)
286 1 : }
287 1 : fmt.Fprintf(w, "%10d %s block:%d/%d%s",
288 1 : b.Offset+uint64(iter.offset), iter.Key().UserKey,
289 1 : bh.Offset, bh.Length, vbihStr)
290 1 : formatIsRestart(iter.data, iter.restarts, iter.numRestarts, iter.offset)
291 : }
292 1 : formatRestarts(iter.data, iter.restarts, iter.numRestarts)
293 1 : formatTrailer()
294 1 : case "value-block":
295 : // We don't peer into the value-block since it can't be interpreted
296 : // without the valueHandles.
297 1 : case "value-index":
298 : // We have already read the value-index to construct the list of
299 : // value-blocks, so no need to do it again.
300 : }
301 :
302 1 : h.Release()
303 : }
304 :
305 1 : last := blocks[len(blocks)-1]
306 1 : fmt.Fprintf(w, "%10d EOF\n", last.Offset+last.Length)
307 : }
|