/src/rocksdb/db/builder.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
7 | | // Use of this source code is governed by a BSD-style license that can be |
8 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | |
10 | | #include "db/builder.h" |
11 | | |
12 | | #include <algorithm> |
13 | | #include <deque> |
14 | | #include <vector> |
15 | | |
16 | | #include "db/blob/blob_file_builder.h" |
17 | | #include "db/compaction/compaction_iterator.h" |
18 | | #include "db/dbformat.h" |
19 | | #include "db/event_helpers.h" |
20 | | #include "db/internal_stats.h" |
21 | | #include "db/merge_helper.h" |
22 | | #include "db/output_validator.h" |
23 | | #include "db/range_del_aggregator.h" |
24 | | #include "db/table_cache.h" |
25 | | #include "db/version_edit.h" |
26 | | #include "file/file_util.h" |
27 | | #include "file/filename.h" |
28 | | #include "file/read_write_util.h" |
29 | | #include "file/writable_file_writer.h" |
30 | | #include "monitoring/iostats_context_imp.h" |
31 | | #include "monitoring/thread_status_util.h" |
32 | | #include "options/options_helper.h" |
33 | | #include "rocksdb/db.h" |
34 | | #include "rocksdb/env.h" |
35 | | #include "rocksdb/file_system.h" |
36 | | #include "rocksdb/iterator.h" |
37 | | #include "rocksdb/options.h" |
38 | | #include "rocksdb/table.h" |
39 | | #include "seqno_to_time_mapping.h" |
40 | | #include "table/block_based/block_based_table_builder.h" |
41 | | #include "table/format.h" |
42 | | #include "table/internal_iterator.h" |
43 | | #include "table/unique_id_impl.h" |
44 | | #include "test_util/sync_point.h" |
45 | | #include "util/stop_watch.h" |
46 | | |
47 | | namespace ROCKSDB_NAMESPACE { |
48 | | |
49 | | class TableFactory; |
50 | | |
51 | | TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions, |
52 | 6.27k | WritableFileWriter* file) { |
53 | 6.27k | assert((tboptions.column_family_id == |
54 | 6.27k | TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == |
55 | 6.27k | tboptions.column_family_name.empty()); |
56 | 6.27k | return tboptions.ioptions.table_factory->NewTableBuilder(tboptions, file); |
57 | 6.27k | } |
58 | | |
59 | | Status BuildTable( |
60 | | const std::string& dbname, VersionSet* versions, |
61 | | const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions, |
62 | | const FileOptions& file_options, TableCache* table_cache, |
63 | | InternalIterator* iter, |
64 | | std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>> |
65 | | range_del_iters, |
66 | | FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions, |
67 | | std::vector<SequenceNumber> snapshots, SequenceNumber earliest_snapshot, |
68 | | SequenceNumber earliest_write_conflict_snapshot, |
69 | | SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker, |
70 | | bool paranoid_file_checks, InternalStats* internal_stats, |
71 | | IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer, |
72 | | BlobFileCreationReason blob_creation_reason, |
73 | | UnownedPtr<const SeqnoToTimeMapping> seqno_to_time_mapping, |
74 | | EventLogger* event_logger, int job_id, TableProperties* table_properties, |
75 | | Env::WriteLifeTimeHint write_hint, const std::string* full_history_ts_low, |
76 | | BlobFileCompletionCallback* blob_callback, Version* version, |
77 | | uint64_t* num_input_entries, uint64_t* memtable_payload_bytes, |
78 | 4.98k | uint64_t* memtable_garbage_bytes) { |
79 | 4.98k | assert((tboptions.column_family_id == |
80 | 4.98k | TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == |
81 | 4.98k | tboptions.column_family_name.empty()); |
82 | 4.98k | auto& mutable_cf_options = tboptions.moptions; |
83 | 4.98k | auto& ioptions = tboptions.ioptions; |
84 | | // Reports the IOStats for flush for every following bytes. |
85 | 4.98k | const size_t kReportFlushIOStatsEvery = 1048576; |
86 | 4.98k | OutputValidator output_validator(tboptions.internal_comparator, |
87 | 4.98k | /*enable_hash=*/paranoid_file_checks); |
88 | 4.98k | Status s; |
89 | 4.98k | meta->fd.file_size = 0; |
90 | 4.98k | iter->SeekToFirst(); |
91 | 4.98k | std::unique_ptr<CompactionRangeDelAggregator> range_del_agg( |
92 | 4.98k | new CompactionRangeDelAggregator(&tboptions.internal_comparator, |
93 | 4.98k | snapshots, full_history_ts_low)); |
94 | 4.98k | uint64_t num_unfragmented_tombstones = 0; |
95 | 4.98k | uint64_t total_tombstone_payload_bytes = 0; |
96 | 4.98k | for (auto& range_del_iter : range_del_iters) { |
97 | 0 | num_unfragmented_tombstones += |
98 | 0 | range_del_iter->num_unfragmented_tombstones(); |
99 | 0 | total_tombstone_payload_bytes += |
100 | 0 | range_del_iter->total_tombstone_payload_bytes(); |
101 | 0 | range_del_agg->AddTombstones(std::move(range_del_iter)); |
102 | 0 | } |
103 | | |
104 | 4.98k | std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(), |
105 | 4.98k | meta->fd.GetPathId()); |
106 | 4.98k | std::vector<std::string> blob_file_paths; |
107 | 4.98k | std::string file_checksum = kUnknownFileChecksum; |
108 | 4.98k | std::string file_checksum_func_name = kUnknownFileChecksumFuncName; |
109 | 4.98k | EventHelpers::NotifyTableFileCreationStarted(ioptions.listeners, dbname, |
110 | 4.98k | tboptions.column_family_name, |
111 | 4.98k | fname, job_id, tboptions.reason); |
112 | 4.98k | Env* env = db_options.env; |
113 | 4.98k | assert(env); |
114 | 4.98k | FileSystem* fs = db_options.fs.get(); |
115 | 4.98k | assert(fs); |
116 | | |
117 | 4.98k | TableProperties tp; |
118 | 4.98k | bool table_file_created = false; |
119 | 4.98k | if (iter->Valid() || !range_del_agg->IsEmpty()) { |
120 | 4.98k | std::unique_ptr<CompactionFilter> compaction_filter; |
121 | 4.98k | if (ioptions.compaction_filter_factory != nullptr && |
122 | 4.98k | ioptions.compaction_filter_factory->ShouldFilterTableFileCreation( |
123 | 0 | tboptions.reason)) { |
124 | 0 | CompactionFilter::Context context; |
125 | 0 | context.is_full_compaction = false; |
126 | 0 | context.is_manual_compaction = false; |
127 | 0 | context.column_family_id = tboptions.column_family_id; |
128 | 0 | context.reason = tboptions.reason; |
129 | 0 | compaction_filter = |
130 | 0 | ioptions.compaction_filter_factory->CreateCompactionFilter(context); |
131 | 0 | if (compaction_filter != nullptr && |
132 | 0 | !compaction_filter->IgnoreSnapshots()) { |
133 | 0 | s.PermitUncheckedError(); |
134 | 0 | return Status::NotSupported( |
135 | 0 | "CompactionFilter::IgnoreSnapshots() = false is not supported " |
136 | 0 | "anymore."); |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | 4.98k | TableBuilder* builder; |
141 | 4.98k | std::unique_ptr<WritableFileWriter> file_writer; |
142 | 4.98k | { |
143 | 4.98k | std::unique_ptr<FSWritableFile> file; |
144 | | #ifndef NDEBUG |
145 | | bool use_direct_writes = file_options.use_direct_writes; |
146 | | TEST_SYNC_POINT_CALLBACK("BuildTable:create_file", &use_direct_writes); |
147 | | #endif // !NDEBUG |
148 | 4.98k | IOStatus io_s = NewWritableFile(fs, fname, &file, file_options); |
149 | 4.98k | assert(s.ok()); |
150 | 4.98k | s = io_s; |
151 | 4.98k | if (io_status->ok()) { |
152 | 4.98k | *io_status = io_s; |
153 | 4.98k | } |
154 | 4.98k | if (!s.ok()) { |
155 | 0 | EventHelpers::LogAndNotifyTableFileCreationFinished( |
156 | 0 | event_logger, ioptions.listeners, dbname, |
157 | 0 | tboptions.column_family_name, fname, job_id, meta->fd, |
158 | 0 | kInvalidBlobFileNumber, tp, tboptions.reason, s, file_checksum, |
159 | 0 | file_checksum_func_name); |
160 | 0 | return s; |
161 | 0 | } |
162 | | |
163 | 4.98k | table_file_created = true; |
164 | 4.98k | FileTypeSet tmp_set = ioptions.checksum_handoff_file_types; |
165 | 4.98k | file->SetIOPriority(tboptions.write_options.rate_limiter_priority); |
166 | 4.98k | file->SetWriteLifeTimeHint(write_hint); |
167 | 4.98k | file_writer.reset(new WritableFileWriter( |
168 | 4.98k | std::move(file), fname, file_options, ioptions.clock, io_tracer, |
169 | 4.98k | ioptions.stats, Histograms::SST_WRITE_MICROS, ioptions.listeners, |
170 | 4.98k | ioptions.file_checksum_gen_factory.get(), |
171 | 4.98k | tmp_set.Contains(FileType::kTableFile), false)); |
172 | | |
173 | 4.98k | builder = NewTableBuilder(tboptions, file_writer.get()); |
174 | 4.98k | } |
175 | | |
176 | 0 | auto ucmp = tboptions.internal_comparator.user_comparator(); |
177 | 4.98k | MergeHelper merge( |
178 | 4.98k | env, ucmp, ioptions.merge_operator.get(), compaction_filter.get(), |
179 | 4.98k | ioptions.logger, true /* internal key corruption is not ok */, |
180 | 4.98k | snapshots.empty() ? 0 : snapshots.back(), snapshot_checker); |
181 | | |
182 | 4.98k | std::unique_ptr<BlobFileBuilder> blob_file_builder( |
183 | 4.98k | (mutable_cf_options.enable_blob_files && |
184 | 4.98k | tboptions.level_at_creation >= |
185 | 0 | mutable_cf_options.blob_file_starting_level && |
186 | 4.98k | blob_file_additions) |
187 | 4.98k | ? new BlobFileBuilder( |
188 | 0 | versions, fs, &ioptions, &mutable_cf_options, &file_options, |
189 | 0 | &(tboptions.write_options), tboptions.db_id, |
190 | 0 | tboptions.db_session_id, job_id, tboptions.column_family_id, |
191 | 0 | tboptions.column_family_name, write_hint, io_tracer, |
192 | 0 | blob_callback, blob_creation_reason, &blob_file_paths, |
193 | 0 | blob_file_additions) |
194 | 4.98k | : nullptr); |
195 | | |
196 | 4.98k | const std::atomic<bool> kManualCompactionCanceledFalse{false}; |
197 | 4.98k | CompactionIterator c_iter( |
198 | 4.98k | iter, ucmp, &merge, kMaxSequenceNumber, &snapshots, earliest_snapshot, |
199 | 4.98k | earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env, |
200 | 4.98k | ShouldReportDetailedTime(env, ioptions.stats), |
201 | 4.98k | true /* internal key corruption is not ok */, range_del_agg.get(), |
202 | 4.98k | blob_file_builder.get(), ioptions.allow_data_in_errors, |
203 | 4.98k | ioptions.enforce_single_del_contracts, |
204 | 4.98k | /*manual_compaction_canceled=*/kManualCompactionCanceledFalse, |
205 | 4.98k | true /* must_count_input_entries */, |
206 | 4.98k | /*compaction=*/nullptr, compaction_filter.get(), |
207 | 4.98k | /*shutting_down=*/nullptr, db_options.info_log, full_history_ts_low); |
208 | | |
209 | 4.98k | const size_t ts_sz = ucmp->timestamp_size(); |
210 | 4.98k | const bool logical_strip_timestamp = |
211 | 4.98k | ts_sz > 0 && !ioptions.persist_user_defined_timestamps; |
212 | | |
213 | 4.98k | SequenceNumber smallest_preferred_seqno = kMaxSequenceNumber; |
214 | 4.98k | std::string key_after_flush_buf; |
215 | 4.98k | std::string value_buf; |
216 | 4.98k | c_iter.SeekToFirst(); |
217 | 10.3k | for (; c_iter.Valid(); c_iter.Next()) { |
218 | 5.39k | const Slice& key = c_iter.key(); |
219 | 5.39k | const Slice& value = c_iter.value(); |
220 | 5.39k | ParsedInternalKey ikey = c_iter.ikey(); |
221 | 5.39k | key_after_flush_buf.assign(key.data(), key.size()); |
222 | 5.39k | Slice key_after_flush = key_after_flush_buf; |
223 | 5.39k | Slice value_after_flush = value; |
224 | | |
225 | | // If user defined timestamps will be stripped from user key after flush, |
226 | | // the in memory version of the key act logically the same as one with a |
227 | | // minimum timestamp. We update the timestamp here so file boundary and |
228 | | // output validator, block builder all see the effect of the stripping. |
229 | 5.39k | if (logical_strip_timestamp) { |
230 | 0 | key_after_flush_buf.clear(); |
231 | 0 | ReplaceInternalKeyWithMinTimestamp(&key_after_flush_buf, key, ts_sz); |
232 | 0 | key_after_flush = key_after_flush_buf; |
233 | 0 | } |
234 | | |
235 | 5.39k | if (ikey.type == kTypeValuePreferredSeqno) { |
236 | 0 | auto [unpacked_value, unix_write_time] = |
237 | 0 | ParsePackedValueWithWriteTime(value); |
238 | 0 | SequenceNumber preferred_seqno = |
239 | 0 | seqno_to_time_mapping |
240 | 0 | ? seqno_to_time_mapping->GetProximalSeqnoBeforeTime( |
241 | 0 | unix_write_time) |
242 | 0 | : kMaxSequenceNumber; |
243 | 0 | if (preferred_seqno < ikey.sequence) { |
244 | 0 | value_after_flush = |
245 | 0 | PackValueAndSeqno(unpacked_value, preferred_seqno, &value_buf); |
246 | 0 | smallest_preferred_seqno = |
247 | 0 | std::min(smallest_preferred_seqno, preferred_seqno); |
248 | 0 | } else { |
249 | | // Cannot get a useful preferred seqno, convert it to a kTypeValue. |
250 | 0 | UpdateInternalKey(&key_after_flush_buf, ikey.sequence, kTypeValue); |
251 | 0 | ikey = ParsedInternalKey(ikey.user_key, ikey.sequence, kTypeValue); |
252 | 0 | key_after_flush = key_after_flush_buf; |
253 | 0 | value_after_flush = ParsePackedValueForValue(value); |
254 | 0 | } |
255 | 0 | } |
256 | | |
257 | | // Generate a rolling 64-bit hash of the key and values |
258 | | // Note : |
259 | | // Here "key" integrates 'sequence_number'+'kType'+'user key'. |
260 | 5.39k | s = output_validator.Add(key_after_flush, value_after_flush); |
261 | 5.39k | if (!s.ok()) { |
262 | 0 | break; |
263 | 0 | } |
264 | 5.39k | builder->Add(key_after_flush, value_after_flush); |
265 | | |
266 | 5.39k | s = meta->UpdateBoundaries(key_after_flush, value_after_flush, |
267 | 5.39k | ikey.sequence, ikey.type); |
268 | 5.39k | if (!s.ok()) { |
269 | 0 | break; |
270 | 0 | } |
271 | | |
272 | | // TODO(noetzli): Update stats after flush, too. |
273 | | // TODO(hx235): Replace `rate_limiter_priority` with `io_activity` for |
274 | | // flush IO in repair when we have an `Env::IOActivity` enum for it |
275 | 5.39k | if ((tboptions.write_options.io_activity == Env::IOActivity::kFlush || |
276 | 5.39k | tboptions.write_options.io_activity == Env::IOActivity::kDBOpen || |
277 | 5.39k | tboptions.write_options.rate_limiter_priority == Env::IO_HIGH) && |
278 | 5.39k | IOSTATS(bytes_written) >= kReportFlushIOStatsEvery) { |
279 | 4.01k | ThreadStatusUtil::SetThreadOperationProperty( |
280 | 4.01k | ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written)); |
281 | 4.01k | } |
282 | 5.39k | } |
283 | 4.98k | if (!s.ok()) { |
284 | 0 | c_iter.status().PermitUncheckedError(); |
285 | 4.98k | } else if (!c_iter.status().ok()) { |
286 | 0 | s = c_iter.status(); |
287 | 0 | } |
288 | | |
289 | 4.98k | if (s.ok()) { |
290 | 4.98k | auto range_del_it = range_del_agg->NewIterator(); |
291 | 4.98k | Slice last_tombstone_start_user_key{}; |
292 | 4.98k | for (range_del_it->SeekToFirst(); range_del_it->Valid(); |
293 | 4.98k | range_del_it->Next()) { |
294 | | // When user timestamp should not be persisted, we logically strip a |
295 | | // range tombstone's start and end key's timestamp (replace it with min |
296 | | // timestamp) before passing them along to table builder and to update |
297 | | // file boundaries. |
298 | 0 | auto tombstone = range_del_it->Tombstone(logical_strip_timestamp); |
299 | 0 | std::pair<InternalKey, Slice> kv = tombstone.Serialize(); |
300 | 0 | builder->Add(kv.first.Encode(), kv.second); |
301 | 0 | InternalKey tombstone_end = tombstone.SerializeEndKey(); |
302 | 0 | meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_, |
303 | 0 | tboptions.internal_comparator); |
304 | 0 | if (version) { |
305 | 0 | if (last_tombstone_start_user_key.empty() || |
306 | 0 | ucmp->CompareWithoutTimestamp(last_tombstone_start_user_key, |
307 | 0 | range_del_it->start_key()) < 0) { |
308 | 0 | SizeApproximationOptions approx_opts; |
309 | 0 | approx_opts.files_size_error_margin = 0.1; |
310 | 0 | meta->compensated_range_deletion_size += versions->ApproximateSize( |
311 | 0 | approx_opts, tboptions.read_options, version, kv.first.Encode(), |
312 | 0 | tombstone_end.Encode(), 0 /* start_level */, -1 /* end_level */, |
313 | 0 | TableReaderCaller::kFlush); |
314 | 0 | } |
315 | 0 | last_tombstone_start_user_key = range_del_it->start_key(); |
316 | 0 | } |
317 | 0 | } |
318 | 4.98k | } |
319 | | |
320 | 4.98k | TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable"); |
321 | 4.98k | const bool empty = builder->IsEmpty(); |
322 | 4.98k | if (num_input_entries != nullptr) { |
323 | 4.98k | assert(c_iter.HasNumInputEntryScanned()); |
324 | 4.98k | *num_input_entries = |
325 | 4.98k | c_iter.NumInputEntryScanned() + num_unfragmented_tombstones; |
326 | 4.98k | } |
327 | 4.98k | if (!s.ok() || empty) { |
328 | 0 | builder->Abandon(); |
329 | 4.98k | } else { |
330 | 4.98k | SeqnoToTimeMapping relevant_mapping; |
331 | 4.98k | if (seqno_to_time_mapping) { |
332 | 0 | relevant_mapping.CopyFromSeqnoRange( |
333 | 0 | *seqno_to_time_mapping, |
334 | 0 | std::min(meta->fd.smallest_seqno, smallest_preferred_seqno), |
335 | 0 | meta->fd.largest_seqno); |
336 | 0 | relevant_mapping.SetCapacity(kMaxSeqnoTimePairsPerSST); |
337 | 0 | relevant_mapping.Enforce(tboptions.file_creation_time); |
338 | 0 | } |
339 | 4.98k | builder->SetSeqnoTimeTableProperties( |
340 | 4.98k | relevant_mapping, |
341 | 4.98k | ioptions.compaction_style == CompactionStyle::kCompactionStyleFIFO |
342 | 4.98k | ? meta->file_creation_time |
343 | 4.98k | : meta->oldest_ancester_time); |
344 | 4.98k | s = builder->Finish(); |
345 | 4.98k | } |
346 | 4.98k | if (io_status->ok()) { |
347 | 4.98k | *io_status = builder->io_status(); |
348 | 4.98k | } |
349 | | |
350 | 4.98k | if (s.ok() && !empty) { |
351 | 4.98k | uint64_t file_size = builder->FileSize(); |
352 | 4.98k | meta->fd.file_size = file_size; |
353 | 4.98k | meta->tail_size = builder->GetTailSize(); |
354 | 4.98k | meta->marked_for_compaction = builder->NeedCompact(); |
355 | 4.98k | meta->user_defined_timestamps_persisted = |
356 | 4.98k | ioptions.persist_user_defined_timestamps; |
357 | 4.98k | assert(meta->fd.GetFileSize() > 0); |
358 | 4.98k | tp = builder |
359 | 4.98k | ->GetTableProperties(); // refresh now that builder is finished |
360 | 4.98k | if (memtable_payload_bytes != nullptr && |
361 | 4.98k | memtable_garbage_bytes != nullptr) { |
362 | 1.24k | const CompactionIterationStats& ci_stats = c_iter.iter_stats(); |
363 | 1.24k | uint64_t total_payload_bytes = ci_stats.total_input_raw_key_bytes + |
364 | 1.24k | ci_stats.total_input_raw_value_bytes + |
365 | 1.24k | total_tombstone_payload_bytes; |
366 | 1.24k | uint64_t total_payload_bytes_written = |
367 | 1.24k | (tp.raw_key_size + tp.raw_value_size); |
368 | | // Prevent underflow, which may still happen at this point |
369 | | // since we only support inserts, deletes, and deleteRanges. |
370 | 1.24k | if (total_payload_bytes_written <= total_payload_bytes) { |
371 | 1.24k | *memtable_payload_bytes = total_payload_bytes; |
372 | 1.24k | *memtable_garbage_bytes = |
373 | 1.24k | total_payload_bytes - total_payload_bytes_written; |
374 | 1.24k | } else { |
375 | 0 | *memtable_payload_bytes = 0; |
376 | 0 | *memtable_garbage_bytes = 0; |
377 | 0 | } |
378 | 1.24k | } |
379 | 4.98k | if (table_properties) { |
380 | 1.24k | *table_properties = tp; |
381 | 1.24k | } |
382 | 4.98k | } |
383 | 4.98k | delete builder; |
384 | | |
385 | | // Finish and check for file errors |
386 | 4.98k | TEST_SYNC_POINT("BuildTable:BeforeSyncTable"); |
387 | 4.98k | IOOptions opts; |
388 | 4.98k | *io_status = |
389 | 4.98k | WritableFileWriter::PrepareIOOptions(tboptions.write_options, opts); |
390 | 4.98k | if (s.ok() && io_status->ok() && !empty) { |
391 | 4.98k | StopWatch sw(ioptions.clock, ioptions.stats, TABLE_SYNC_MICROS); |
392 | 4.98k | *io_status = file_writer->Sync(opts, ioptions.use_fsync); |
393 | 4.98k | } |
394 | 4.98k | TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile"); |
395 | 4.98k | if (s.ok() && io_status->ok() && !empty) { |
396 | 4.98k | *io_status = file_writer->Close(opts); |
397 | 4.98k | } |
398 | 4.98k | if (s.ok() && io_status->ok() && !empty) { |
399 | | // Add the checksum information to file metadata. |
400 | 4.98k | meta->file_checksum = file_writer->GetFileChecksum(); |
401 | 4.98k | meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName(); |
402 | 4.98k | file_checksum = meta->file_checksum; |
403 | 4.98k | file_checksum_func_name = meta->file_checksum_func_name; |
404 | | // Set unique_id only if db_id and db_session_id exist |
405 | 4.98k | if (!tboptions.db_id.empty() && !tboptions.db_session_id.empty()) { |
406 | 4.98k | if (!GetSstInternalUniqueId(tboptions.db_id, tboptions.db_session_id, |
407 | 4.98k | meta->fd.GetNumber(), &(meta->unique_id)) |
408 | 4.98k | .ok()) { |
409 | | // if failed to get unique id, just set it Null |
410 | 0 | meta->unique_id = kNullUniqueId64x2; |
411 | 0 | } |
412 | 4.98k | } |
413 | 4.98k | } |
414 | | |
415 | 4.98k | if (s.ok()) { |
416 | 4.98k | s = *io_status; |
417 | 4.98k | } |
418 | | |
419 | | // TODO(yuzhangyu): handle the key copy in the blob when ts should be |
420 | | // stripped. |
421 | 4.98k | if (blob_file_builder) { |
422 | 0 | if (s.ok()) { |
423 | 0 | s = blob_file_builder->Finish(); |
424 | 0 | } else { |
425 | 0 | blob_file_builder->Abandon(s); |
426 | 0 | } |
427 | 0 | blob_file_builder.reset(); |
428 | 0 | } |
429 | | |
430 | | // TODO Also check the IO status when create the Iterator. |
431 | | |
432 | 4.98k | TEST_SYNC_POINT("BuildTable:BeforeOutputValidation"); |
433 | 4.98k | if (s.ok() && !empty) { |
434 | | // Verify that the table is usable |
435 | | // We set for_compaction to false and don't OptimizeForCompactionTableRead |
436 | | // here because this is a special case after we finish the table building. |
437 | | // No matter whether use_direct_io_for_flush_and_compaction is true, |
438 | | // the goal is to cache it here for further user reads. |
439 | 4.98k | std::unique_ptr<InternalIterator> it(table_cache->NewIterator( |
440 | 4.98k | tboptions.read_options, file_options, tboptions.internal_comparator, |
441 | 4.98k | *meta, nullptr /* range_del_agg */, |
442 | 4.98k | mutable_cf_options.prefix_extractor, nullptr, |
443 | 4.98k | (internal_stats == nullptr) ? nullptr |
444 | 4.98k | : internal_stats->GetFileReadHist(0), |
445 | 4.98k | TableReaderCaller::kFlush, /*arena=*/nullptr, |
446 | 4.98k | /*skip_filter=*/false, tboptions.level_at_creation, |
447 | 4.98k | MaxFileSizeForL0MetaPin(mutable_cf_options), |
448 | 4.98k | /*smallest_compaction_key=*/nullptr, |
449 | 4.98k | /*largest_compaction_key*/ nullptr, |
450 | 4.98k | /*allow_unprepared_value*/ false, |
451 | 4.98k | mutable_cf_options.block_protection_bytes_per_key)); |
452 | 4.98k | s = it->status(); |
453 | 4.98k | if (s.ok() && paranoid_file_checks) { |
454 | 0 | OutputValidator file_validator(tboptions.internal_comparator, |
455 | 0 | /*enable_hash=*/true); |
456 | 0 | for (it->SeekToFirst(); it->Valid(); it->Next()) { |
457 | | // Generate a rolling 64-bit hash of the key and values |
458 | 0 | file_validator.Add(it->key(), it->value()).PermitUncheckedError(); |
459 | 0 | } |
460 | 0 | s = it->status(); |
461 | 0 | if (s.ok() && !output_validator.CompareValidator(file_validator)) { |
462 | 0 | s = Status::Corruption("Paranoid checksums do not match"); |
463 | 0 | } |
464 | 0 | } |
465 | 4.98k | } |
466 | 4.98k | } |
467 | | |
468 | | // Check for input iterator errors |
469 | 4.98k | if (!iter->status().ok()) { |
470 | 0 | s = iter->status(); |
471 | 0 | } |
472 | | |
473 | 4.98k | if (!s.ok() || meta->fd.GetFileSize() == 0) { |
474 | 0 | TEST_SYNC_POINT("BuildTable:BeforeDeleteFile"); |
475 | |
|
476 | 0 | constexpr IODebugContext* dbg = nullptr; |
477 | |
|
478 | 0 | if (table_file_created) { |
479 | 0 | IOOptions opts; |
480 | 0 | Status prepare = |
481 | 0 | WritableFileWriter::PrepareIOOptions(tboptions.write_options, opts); |
482 | 0 | if (prepare.ok()) { |
483 | 0 | Status ignored = fs->DeleteFile(fname, opts, dbg); |
484 | 0 | ignored.PermitUncheckedError(); |
485 | 0 | } |
486 | 0 | } |
487 | |
|
488 | 0 | assert(blob_file_additions || blob_file_paths.empty()); |
489 | |
|
490 | 0 | if (blob_file_additions) { |
491 | 0 | for (const std::string& blob_file_path : blob_file_paths) { |
492 | 0 | Status ignored = DeleteDBFile(&db_options, blob_file_path, dbname, |
493 | 0 | /*force_bg=*/false, /*force_fg=*/false); |
494 | 0 | ignored.PermitUncheckedError(); |
495 | 0 | TEST_SYNC_POINT("BuildTable::AfterDeleteFile"); |
496 | 0 | } |
497 | 0 | } |
498 | 0 | } |
499 | | |
500 | 4.98k | Status status_for_listener = s; |
501 | 4.98k | if (meta->fd.GetFileSize() == 0) { |
502 | 0 | fname = "(nil)"; |
503 | 0 | if (s.ok()) { |
504 | 0 | status_for_listener = Status::Aborted("Empty SST file not kept"); |
505 | 0 | } |
506 | 0 | } |
507 | | // Output to event logger and fire events. |
508 | 4.98k | EventHelpers::LogAndNotifyTableFileCreationFinished( |
509 | 4.98k | event_logger, ioptions.listeners, dbname, tboptions.column_family_name, |
510 | 4.98k | fname, job_id, meta->fd, meta->oldest_blob_file_number, tp, |
511 | 4.98k | tboptions.reason, status_for_listener, file_checksum, |
512 | 4.98k | file_checksum_func_name); |
513 | | |
514 | 4.98k | return s; |
515 | 4.98k | } |
516 | | |
517 | | } // namespace ROCKSDB_NAMESPACE |