/src/rocksdb/db/log_writer.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
7 | | // Use of this source code is governed by a BSD-style license that can be |
8 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | #pragma once |
10 | | |
11 | | #include <cstdint> |
12 | | #include <memory> |
13 | | #include <unordered_map> |
14 | | #include <vector> |
15 | | |
16 | | #include "db/dbformat.h" |
17 | | #include "db/log_format.h" |
18 | | #include "rocksdb/compression_type.h" |
19 | | #include "rocksdb/env.h" |
20 | | #include "rocksdb/io_status.h" |
21 | | #include "rocksdb/slice.h" |
22 | | #include "rocksdb/status.h" |
23 | | #include "util/compression.h" |
24 | | #include "util/hash_containers.h" |
25 | | |
26 | | namespace ROCKSDB_NAMESPACE { |
27 | | |
28 | | class WritableFileWriter; |
29 | | |
30 | | namespace log { |
31 | | |
32 | | /** |
33 | | * Writer is a general purpose log stream writer. It provides an append-only |
34 | | * abstraction for writing data. The details of the how the data is written is |
35 | | * handled by the WritableFile sub-class implementation. |
36 | | * |
37 | | * File format: |
38 | | * |
39 | | * File is broken down into variable sized records. The format of each record |
40 | | * is described below. |
41 | | * +-----+-------------+--+----+----------+------+-- ... ----+ |
42 | | * File | r0 | r1 |P | r2 | r3 | r4 | | |
43 | | * +-----+-------------+--+----+----------+------+-- ... ----+ |
44 | | * <--- kBlockSize ------>|<-- kBlockSize ------>| |
45 | | * rn = variable size records |
46 | | * P = Padding |
47 | | * |
48 | | * Data is written out in kBlockSize chunks. If next record does not fit |
49 | | * into the space left, the leftover space will be padded with \0. |
50 | | * |
51 | | * Legacy record format: |
52 | | * |
53 | | * +---------+-----------+-----------+--- ... ---+ |
54 | | * |CRC (4B) | Size (2B) | Type (1B) | Payload | |
55 | | * +---------+-----------+-----------+--- ... ---+ |
56 | | * |
57 | | * CRC = 32bit hash computed over the record type and payload using CRC |
58 | | * Size = Length of the payload data |
59 | | * Type = Type of record |
60 | | * (kZeroType, kFullType, kFirstType, kLastType, kMiddleType ) |
61 | | * The type is used to group a bunch of records together to represent |
62 | | * blocks that are larger than kBlockSize |
63 | | * Payload = Byte stream as long as specified by the payload size |
64 | | * |
65 | | * Recyclable record format: |
66 | | * |
67 | | * +---------+-----------+-----------+----------------+--- ... ---+ |
68 | | * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | |
69 | | * +---------+-----------+-----------+----------------+--- ... ---+ |
70 | | * |
71 | | * Same as above, with the addition of |
72 | | * Log number = 32bit log file number, so that we can distinguish between |
73 | | * records written by the most recent log writer vs a previous one. |
74 | | */ |
75 | | class Writer { |
76 | | public: |
77 | | // Create a writer that will append data to "*dest". |
78 | | // "*dest" must be initially empty. |
79 | | // "*dest" must remain live while this Writer is in use. |
80 | | // TODO(hx235): separate WAL related parameters from general `Reader` |
81 | | // parameters |
82 | | explicit Writer(std::unique_ptr<WritableFileWriter>&& dest, |
83 | | uint64_t log_number, bool recycle_log_files, |
84 | | bool manual_flush = false, |
85 | | CompressionType compressionType = kNoCompression, |
86 | | bool track_and_verify_wals = false); |
87 | | // No copying allowed |
88 | | Writer(const Writer&) = delete; |
89 | | void operator=(const Writer&) = delete; |
90 | | |
91 | | ~Writer(); |
92 | | |
93 | | IOStatus AddRecord(const WriteOptions& write_options, const Slice& slice, |
94 | | const SequenceNumber& seqno = 0); |
95 | | IOStatus AddCompressionTypeRecord(const WriteOptions& write_options); |
96 | | IOStatus MaybeAddPredecessorWALInfo(const WriteOptions& write_options, |
97 | | const PredecessorWALInfo& info); |
98 | | |
99 | | // If there are column families in `cf_to_ts_sz` not included in |
100 | | // `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero, |
101 | | // adds a record of type kUserDefinedTimestampSizeType or |
102 | | // kRecyclableUserDefinedTimestampSizeType for these column families. |
103 | | // This timestamp size record applies to all subsequent records. |
104 | | IOStatus MaybeAddUserDefinedTimestampSizeRecord( |
105 | | const WriteOptions& write_options, |
106 | | const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz); |
107 | | |
108 | 703k | WritableFileWriter* file() { return dest_.get(); } |
109 | 0 | const WritableFileWriter* file() const { return dest_.get(); } |
110 | | |
111 | 41.9k | uint64_t get_log_number() const { return log_number_; } |
112 | | |
113 | | IOStatus WriteBuffer(const WriteOptions& write_options); |
114 | | |
115 | | IOStatus Close(const WriteOptions& write_options); |
116 | | |
117 | | // If closing the writer through file(), call this afterwards to modify |
118 | | // this object's state to reflect that. Returns true if the destination file |
119 | | // has been closed. If it hasn't been closed, returns false with no change. |
120 | | bool PublishIfClosed(); |
121 | | |
122 | | bool BufferIsEmpty(); |
123 | | |
124 | 0 | size_t TEST_block_offset() const { return block_offset_; } |
125 | | |
126 | 1.72k | SequenceNumber GetLastSeqnoRecorded() const { return last_seqno_recorded_; }; |
127 | | |
128 | | private: |
129 | | std::unique_ptr<WritableFileWriter> dest_; |
130 | | size_t block_offset_; // Current offset in block |
131 | | uint64_t log_number_; |
132 | | bool recycle_log_files_; |
133 | | int header_size_; |
134 | | |
135 | | // crc32c values for all supported record types. These are |
136 | | // pre-computed to reduce the overhead of computing the crc of the |
137 | | // record type stored in the header. |
138 | | uint32_t type_crc_[kMaxRecordType + 1]; |
139 | | |
140 | | IOStatus EmitPhysicalRecord(const WriteOptions& write_options, |
141 | | RecordType type, const char* ptr, size_t length); |
142 | | |
143 | | IOStatus MaybeHandleSeenFileWriterError(); |
144 | | |
145 | | IOStatus MaybeSwitchToNewBlock(const WriteOptions& write_options, |
146 | | const std::string& content_to_write); |
147 | | |
148 | | // If true, it does not flush after each write. Instead it relies on the upper |
149 | | // layer to manually does the flush by calling ::WriteBuffer() |
150 | | bool manual_flush_; |
151 | | |
152 | | // Compression Type |
153 | | CompressionType compression_type_; |
154 | | StreamingCompress* compress_; |
155 | | // Reusable compressed output buffer |
156 | | std::unique_ptr<char[]> compressed_buffer_; |
157 | | |
158 | | // The recorded user-defined timestamp size that have been written so far. |
159 | | // Since the user-defined timestamp size cannot be changed while the DB is |
160 | | // running, existing entry in this map cannot be updated. |
161 | | UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_; |
162 | | |
163 | | // See `Options::track_and_verify_wals` |
164 | | bool track_and_verify_wals_; |
165 | | |
166 | | SequenceNumber last_seqno_recorded_; |
167 | | }; |
168 | | |
169 | | } // namespace log |
170 | | } // namespace ROCKSDB_NAMESPACE |