Coverage Report

Created: 2026-03-31 07:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/db/log_writer.h
Line
Count
Source
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
// Use of this source code is governed by a BSD-style license that can be
8
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
#pragma once
10
11
#include <cstdint>
12
#include <memory>
13
#include <unordered_map>
14
#include <vector>
15
16
#include "db/dbformat.h"
17
#include "db/log_format.h"
18
#include "rocksdb/compression_type.h"
19
#include "rocksdb/env.h"
20
#include "rocksdb/io_status.h"
21
#include "rocksdb/slice.h"
22
#include "rocksdb/status.h"
23
#include "util/compression.h"
24
#include "util/hash_containers.h"
25
26
namespace ROCKSDB_NAMESPACE {
27
28
class WritableFileWriter;
29
30
namespace log {
31
32
/**
33
 * Writer is a general purpose log stream writer. It provides an append-only
34
 * abstraction for writing data. The details of the how the data is written is
35
 * handled by the WritableFile sub-class implementation.
36
 *
37
 * File format:
38
 *
39
 * File is broken down into variable sized records. The format of each record
40
 * is described below.
41
 *       +-----+-------------+--+----+----------+------+-- ... ----+
42
 * File  | r0  |        r1   |P | r2 |    r3    |  r4  |           |
43
 *       +-----+-------------+--+----+----------+------+-- ... ----+
44
 *       <--- kBlockSize ------>|<-- kBlockSize ------>|
45
 *  rn = variable size records
46
 *  P = Padding
47
 *
48
 * Data is written out in kBlockSize chunks. If next record does not fit
49
 * into the space left, the leftover space will be padded with \0.
50
 *
51
 * Legacy record format:
52
 *
53
 * +---------+-----------+-----------+--- ... ---+
54
 * |CRC (4B) | Size (2B) | Type (1B) | Payload   |
55
 * +---------+-----------+-----------+--- ... ---+
56
 *
57
 * CRC = 32bit hash computed over the record type and payload using CRC
58
 * Size = Length of the payload data
59
 * Type = Type of record
60
 *        (kZeroType, kFullType, kFirstType, kLastType, kMiddleType )
61
 *        The type is used to group a bunch of records together to represent
62
 *        blocks that are larger than kBlockSize
63
 * Payload = Byte stream as long as specified by the payload size
64
 *
65
 * Recyclable record format:
66
 *
67
 * +---------+-----------+-----------+----------------+--- ... ---+
68
 * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload   |
69
 * +---------+-----------+-----------+----------------+--- ... ---+
70
 *
71
 * Same as above, with the addition of
72
 * Log number = 32bit log file number, so that we can distinguish between
73
 * records written by the most recent log writer vs a previous one.
74
 */
75
class Writer {
76
 public:
77
  // Create a writer that will append data to "*dest".
78
  // "*dest" must be initially empty.
79
  // "*dest" must remain live while this Writer is in use.
80
  // TODO(hx235): separate WAL related parameters from general `Reader`
81
  // parameters
82
  explicit Writer(std::unique_ptr<WritableFileWriter>&& dest,
83
                  uint64_t log_number, bool recycle_log_files,
84
                  bool manual_flush = false,
85
                  CompressionType compressionType = kNoCompression,
86
                  bool track_and_verify_wals = false);
87
  // No copying allowed
88
  Writer(const Writer&) = delete;
89
  void operator=(const Writer&) = delete;
90
91
  ~Writer();
92
93
  IOStatus AddRecord(const WriteOptions& write_options, const Slice& slice,
94
                     const SequenceNumber& seqno = 0);
95
  IOStatus AddCompressionTypeRecord(const WriteOptions& write_options);
96
  IOStatus MaybeAddPredecessorWALInfo(const WriteOptions& write_options,
97
                                      const PredecessorWALInfo& info);
98
99
  // If there are column families in `cf_to_ts_sz` not included in
100
  // `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero,
101
  // adds a record of type kUserDefinedTimestampSizeType or
102
  // kRecyclableUserDefinedTimestampSizeType for these column families.
103
  // This timestamp size record applies to all subsequent records.
104
  IOStatus MaybeAddUserDefinedTimestampSizeRecord(
105
      const WriteOptions& write_options,
106
      const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz);
107
108
703k
  WritableFileWriter* file() { return dest_.get(); }
109
0
  const WritableFileWriter* file() const { return dest_.get(); }
110
111
41.9k
  uint64_t get_log_number() const { return log_number_; }
112
113
  IOStatus WriteBuffer(const WriteOptions& write_options);
114
115
  IOStatus Close(const WriteOptions& write_options);
116
117
  // If closing the writer through file(), call this afterwards to modify
118
  // this object's state to reflect that. Returns true if the destination file
119
  // has been closed. If it hasn't been closed, returns false with no change.
120
  bool PublishIfClosed();
121
122
  bool BufferIsEmpty();
123
124
0
  size_t TEST_block_offset() const { return block_offset_; }
125
126
1.72k
  SequenceNumber GetLastSeqnoRecorded() const { return last_seqno_recorded_; };
127
128
 private:
129
  std::unique_ptr<WritableFileWriter> dest_;
130
  size_t block_offset_;  // Current offset in block
131
  uint64_t log_number_;
132
  bool recycle_log_files_;
133
  int header_size_;
134
135
  // crc32c values for all supported record types.  These are
136
  // pre-computed to reduce the overhead of computing the crc of the
137
  // record type stored in the header.
138
  uint32_t type_crc_[kMaxRecordType + 1];
139
140
  IOStatus EmitPhysicalRecord(const WriteOptions& write_options,
141
                              RecordType type, const char* ptr, size_t length);
142
143
  IOStatus MaybeHandleSeenFileWriterError();
144
145
  IOStatus MaybeSwitchToNewBlock(const WriteOptions& write_options,
146
                                 const std::string& content_to_write);
147
148
  // If true, it does not flush after each write. Instead it relies on the upper
149
  // layer to manually does the flush by calling ::WriteBuffer()
150
  bool manual_flush_;
151
152
  // Compression Type
153
  CompressionType compression_type_;
154
  StreamingCompress* compress_;
155
  // Reusable compressed output buffer
156
  std::unique_ptr<char[]> compressed_buffer_;
157
158
  // The recorded user-defined timestamp size that have been written so far.
159
  // Since the user-defined timestamp size cannot be changed while the DB is
160
  // running, existing entry in this map cannot be updated.
161
  UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_;
162
163
  // See `Options::track_and_verify_wals`
164
  bool track_and_verify_wals_;
165
166
  SequenceNumber last_seqno_recorded_;
167
};
168
169
}  // namespace log
170
}  // namespace ROCKSDB_NAMESPACE