Coverage Report

Created: 2025-07-23 07:17

/src/rocksdb/table/plain/plain_table_factory.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
3
// Use of this source code is governed by a BSD-style license that can be
4
// found in the LICENSE file. See the AUTHORS file for names of contributors.
5
6
#pragma once
7
8
#include <stdint.h>
9
10
#include <memory>
11
#include <string>
12
13
#include "rocksdb/table.h"
14
15
namespace ROCKSDB_NAMESPACE {
16
17
struct EnvOptions;
18
19
class Status;
20
class RandomAccessFile;
21
class WritableFile;
22
class Table;
23
class TableBuilder;
24
25
// PlainTableFactory is the entrance function to the PlainTable format of
26
// SST files. It returns instances PlainTableBuilder as the builder
27
// class and PlainTableReader as the reader class, where the format is
28
// actually implemented.
29
//
30
// The PlainTable is designed for memory-mapped file systems, e.g. tmpfs.
31
// Data is not organized in blocks, which allows fast access. Because of
32
// following downsides
33
// 1. Data compression is not supported.
34
// 2. Data is not checksumed.
35
// it is not recommended to use this format on other type of file systems.
36
//
37
// PlainTable requires fixed length key, configured as a constructor
38
// parameter of the factory class. Output file format:
39
// +-------------+-----------------+
40
// | version     | user_key_length |
41
// +------------++------------+-----------------+  <= key1 offset
42
// |  encoded key1            | value_size  |   |
43
// +------------+-------------+-------------+   |
44
// | value1                                     |
45
// |                                            |
46
// +--------------------------+-------------+---+  <= key2 offset
47
// | encoded key2             | value_size  |   |
48
// +------------+-------------+-------------+   |
49
// | value2                                     |
50
// |                                            |
51
// |        ......                              |
52
// +-----------------+--------------------------+
53
//
54
// When the key encoding type is kPlain. Key part is encoded as:
55
// +------------+--------------------+
56
// | [key_size] |  internal key      |
57
// +------------+--------------------+
58
// for the case of user_key_len = kPlainTableVariableLength case,
59
// and simply:
60
// +----------------------+
61
// |  internal key        |
62
// +----------------------+
63
// for user_key_len != kPlainTableVariableLength case.
64
//
65
// If key encoding type is kPrefix. Keys are encoding in this format.
66
// There are three ways to encode a key:
67
// (1) Full Key
68
// +---------------+---------------+-------------------+
69
// | Full Key Flag | Full Key Size | Full Internal Key |
70
// +---------------+---------------+-------------------+
71
// which simply encodes a full key
72
//
73
// (2) A key shared the same prefix as the previous key, which is encoded as
74
//     format of (1).
75
// +-------------+-------------+-------------+-------------+------------+
76
// | Prefix Flag | Prefix Size | Suffix Flag | Suffix Size | Key Suffix |
77
// +-------------+-------------+-------------+-------------+------------+
78
// where key is the suffix part of the key, including the internal bytes.
79
// the actual key will be constructed by concatenating prefix part of the
80
// previous key, with the suffix part of the key here, with sizes given here.
81
//
82
// (3) A key shared the same prefix as the previous key, which is encoded as
83
//     the format of (2).
84
// +-----------------+-----------------+------------------------+
85
// | Key Suffix Flag | Key Suffix Size | Suffix of Internal Key |
86
// +-----------------+-----------------+------------------------+
87
// The key will be constructed by concatenating previous key's prefix (which is
88
// also a prefix which the last key encoded in the format of (1)) and the
89
// key given here.
90
//
91
// For example, we for following keys (prefix and suffix are separated by
92
// spaces):
93
//   0000 0001
94
//   0000 00021
95
//   0000 0002
96
//   00011 00
97
//   0002 0001
98
// Will be encoded like this:
99
//   FK 8 00000001
100
//   PF 4 SF 5 00021
101
//   SF 4 0002
102
//   FK 7 0001100
103
//   FK 8 00020001
104
// (where FK means full key flag, PF means prefix flag and SF means suffix flag)
105
//
106
// All those "key flag + key size" shown above are in this format:
107
// The 8 bits of the first byte:
108
// +----+----+----+----+----+----+----+----+
109
// |  Type   |            Size             |
110
// +----+----+----+----+----+----+----+----+
111
// Type indicates: full key, prefix, or suffix.
112
// The last 6 bits are for size. If the size bits are not all 1, it means the
113
// size of the key. Otherwise, varint32 is read after this byte. This varint
114
// value + 0x3F (the value of all 1) will be the key size.
115
//
116
// For example, full key with length 16 will be encoded as (binary):
117
//     00 010000
118
// (00 means full key)
119
// and a prefix with 100 bytes will be encoded as:
120
//     01 111111    00100101
121
//         (63)       (37)
122
// (01 means key suffix)
123
//
124
// All the internal keys above (including kPlain and kPrefix) are encoded in
125
// this format:
126
// There are two types:
127
// (1) normal internal key format
128
// +----------- ...... -------------+----+---+---+---+---+---+---+---+
129
// |       user key                 |type|      sequence ID          |
130
// +----------- ..... --------------+----+---+---+---+---+---+---+---+
131
// (2) Special case for keys whose sequence ID is 0 and is value type
132
// +----------- ...... -------------+----+
133
// |       user key                 |0x80|
134
// +----------- ..... --------------+----+
135
// To save 7 bytes for the special case where sequence ID = 0.
136
//
137
//
138
class PlainTableFactory : public TableFactory {
139
 public:
140
0
  ~PlainTableFactory() {}
141
  // user_key_len is the length of the user key. If it is set to be
142
  // kPlainTableVariableLength, then it means variable length. Otherwise, all
143
  // the keys need to have the fix length of this value. bloom_bits_per_key is
144
  // number of bits used for bloom filer per key. hash_table_ratio is
145
  // the desired utilization of the hash table used for prefix hashing.
146
  // hash_table_ratio = number of prefixes / #buckets in the hash table
147
  // hash_table_ratio = 0 means skip hash table but only replying on binary
148
  // search.
149
  // index_sparseness determines index interval for keys
150
  // inside the same prefix. It will be the maximum number of linear search
151
  // required after hash and binary search.
152
  // index_sparseness = 0 means index for every key.
153
  // huge_page_tlb_size determines whether to allocate hash indexes from huge
154
  // page TLB and the page size if allocating from there. See comments of
155
  // Arena::AllocateAligned() for details.
156
  explicit PlainTableFactory(
157
      const PlainTableOptions& _table_options = PlainTableOptions());
158
159
  // Method to allow CheckedCast to work for this class
160
0
  static const char* kClassName() { return kPlainTableName(); }
161
0
  const char* Name() const override { return kPlainTableName(); }
162
  using TableFactory::NewTableReader;
163
  Status NewTableReader(const ReadOptions& ro,
164
                        const TableReaderOptions& table_reader_options,
165
                        std::unique_ptr<RandomAccessFileReader>&& file,
166
                        uint64_t file_size, std::unique_ptr<TableReader>* table,
167
                        bool prefetch_index_and_filter_in_cache) const override;
168
169
  TableBuilder* NewTableBuilder(
170
      const TableBuilderOptions& table_builder_options,
171
      WritableFileWriter* file) const override;
172
173
  std::string GetPrintableOptions() const override;
174
  static const char kValueTypeSeqId0 = char(~0);
175
176
0
  std::unique_ptr<TableFactory> Clone() const override {
177
0
    return std::make_unique<PlainTableFactory>(*this);
178
0
  }
179
180
 private:
181
  PlainTableOptions table_options_;
182
};
183
184
}  // namespace ROCKSDB_NAMESPACE