/src/rocksdb/table/sst_file_writer.cc
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #include "rocksdb/sst_file_writer.h" |
7 | | |
8 | | #include <vector> |
9 | | |
10 | | #include "db/db_impl/db_impl.h" |
11 | | #include "db/dbformat.h" |
12 | | #include "db/wide/wide_column_serialization.h" |
13 | | #include "db/wide/wide_columns_helper.h" |
14 | | #include "file/writable_file_writer.h" |
15 | | #include "rocksdb/file_system.h" |
16 | | #include "rocksdb/table.h" |
17 | | #include "table/block_based/block_based_table_builder.h" |
18 | | #include "table/sst_file_writer_collectors.h" |
19 | | #include "test_util/sync_point.h" |
20 | | |
21 | | namespace ROCKSDB_NAMESPACE { |
22 | | |
23 | | const std::string ExternalSstFilePropertyNames::kVersion = |
24 | | "rocksdb.external_sst_file.version"; |
25 | | const std::string ExternalSstFilePropertyNames::kGlobalSeqno = |
26 | | "rocksdb.external_sst_file.global_seqno"; |
27 | | |
28 | | const size_t kFadviseTrigger = 1024 * 1024; // 1MB |
29 | | |
30 | | struct SstFileWriter::Rep { |
31 | | Rep(const EnvOptions& _env_options, const Options& options, |
32 | | Env::IOPriority _io_priority, const Comparator* _user_comparator, |
33 | | ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters, |
34 | | std::string _db_session_id) |
35 | 0 | : env_options(_env_options), |
36 | 0 | ioptions(options), |
37 | 0 | mutable_cf_options(options), |
38 | 0 | io_priority(_io_priority), |
39 | 0 | internal_comparator(_user_comparator), |
40 | 0 | cfh(_cfh), |
41 | 0 | invalidate_page_cache(_invalidate_page_cache), |
42 | 0 | skip_filters(_skip_filters), |
43 | 0 | db_session_id(_db_session_id), |
44 | 0 | ts_sz(_user_comparator->timestamp_size()), |
45 | 0 | strip_timestamp(ts_sz > 0 && |
46 | 0 | !ioptions.persist_user_defined_timestamps) { |
47 | | // TODO (hx235): pass in `WriteOptions` instead of `rate_limiter_priority` |
48 | | // during construction |
49 | 0 | write_options.rate_limiter_priority = io_priority; |
50 | 0 | } |
51 | | |
52 | | std::unique_ptr<WritableFileWriter> file_writer; |
53 | | std::unique_ptr<TableBuilder> builder; |
54 | | EnvOptions env_options; |
55 | | ImmutableOptions ioptions; |
56 | | MutableCFOptions mutable_cf_options; |
57 | | Env::IOPriority io_priority; |
58 | | WriteOptions write_options; |
59 | | InternalKeyComparator internal_comparator; |
60 | | ExternalSstFileInfo file_info; |
61 | | InternalKey ikey; |
62 | | std::string column_family_name; |
63 | | ColumnFamilyHandle* cfh; |
64 | | // If true, We will give the OS a hint that this file pages is not needed |
65 | | // every time we write 1MB to the file. |
66 | | bool invalidate_page_cache; |
67 | | // The size of the file during the last time we called Fadvise to remove |
68 | | // cached pages from page cache. |
69 | | uint64_t last_fadvise_size = 0; |
70 | | bool skip_filters; |
71 | | std::string db_session_id; |
72 | | uint64_t next_file_number = 1; |
73 | | size_t ts_sz; |
74 | | bool strip_timestamp; |
75 | | |
76 | | Status AddImpl(const Slice& user_key, const Slice& value, |
77 | 0 | ValueType value_type) { |
78 | 0 | if (!builder) { |
79 | 0 | return Status::InvalidArgument("File is not opened"); |
80 | 0 | } |
81 | 0 | if (!builder->status().ok()) { |
82 | 0 | return builder->status(); |
83 | 0 | } |
84 | | |
85 | 0 | assert(user_key.size() >= ts_sz); |
86 | 0 | if (strip_timestamp) { |
87 | | // In this mode, we expect users to always provide a min timestamp. |
88 | 0 | if (internal_comparator.user_comparator()->CompareTimestamp( |
89 | 0 | Slice(user_key.data() + user_key.size() - ts_sz, ts_sz), |
90 | 0 | MinU64Ts()) != 0) { |
91 | 0 | return Status::InvalidArgument( |
92 | 0 | "persist_user_defined_timestamps flag is set to false, only " |
93 | 0 | "minimum timestamp is accepted."); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | if (file_info.num_entries == 0) { |
97 | 0 | file_info.smallest_key.assign(user_key.data(), user_key.size()); |
98 | 0 | } else { |
99 | 0 | if (internal_comparator.user_comparator()->Compare( |
100 | 0 | user_key, file_info.largest_key) <= 0) { |
101 | | // Make sure that keys are added in order |
102 | 0 | return Status::InvalidArgument( |
103 | 0 | "Keys must be added in strict ascending order."); |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | 0 | assert(value_type == kTypeValue || value_type == kTypeMerge || |
108 | 0 | value_type == kTypeDeletion || |
109 | 0 | value_type == kTypeDeletionWithTimestamp || |
110 | 0 | value_type == kTypeWideColumnEntity); |
111 | |
|
112 | 0 | constexpr SequenceNumber sequence_number = 0; |
113 | |
|
114 | 0 | ikey.Set(user_key, sequence_number, value_type); |
115 | |
|
116 | 0 | builder->Add(ikey.Encode(), value); |
117 | | |
118 | | // update file info |
119 | 0 | file_info.num_entries++; |
120 | 0 | file_info.largest_key.assign(user_key.data(), user_key.size()); |
121 | 0 | file_info.file_size = builder->FileSize(); |
122 | |
|
123 | 0 | InvalidatePageCache(false /* closing */).PermitUncheckedError(); |
124 | 0 | return builder->status(); |
125 | 0 | } |
126 | | |
127 | 0 | Status Add(const Slice& user_key, const Slice& value, ValueType value_type) { |
128 | 0 | if (internal_comparator.user_comparator()->timestamp_size() != 0) { |
129 | 0 | return Status::InvalidArgument("Timestamp size mismatch"); |
130 | 0 | } |
131 | | |
132 | 0 | return AddImpl(user_key, value, value_type); |
133 | 0 | } |
134 | | |
135 | | Status Add(const Slice& user_key, const Slice& timestamp, const Slice& value, |
136 | 0 | ValueType value_type) { |
137 | 0 | const size_t timestamp_size = timestamp.size(); |
138 | |
|
139 | 0 | if (internal_comparator.user_comparator()->timestamp_size() != |
140 | 0 | timestamp_size) { |
141 | 0 | return Status::InvalidArgument("Timestamp size mismatch"); |
142 | 0 | } |
143 | | |
144 | 0 | const size_t user_key_size = user_key.size(); |
145 | |
|
146 | 0 | if (user_key.data() + user_key_size == timestamp.data()) { |
147 | 0 | Slice user_key_with_ts(user_key.data(), user_key_size + timestamp_size); |
148 | 0 | return AddImpl(user_key_with_ts, value, value_type); |
149 | 0 | } |
150 | | |
151 | 0 | std::string user_key_with_ts; |
152 | 0 | user_key_with_ts.reserve(user_key_size + timestamp_size); |
153 | 0 | user_key_with_ts.append(user_key.data(), user_key_size); |
154 | 0 | user_key_with_ts.append(timestamp.data(), timestamp_size); |
155 | |
|
156 | 0 | return AddImpl(user_key_with_ts, value, value_type); |
157 | 0 | } |
158 | | |
159 | 0 | Status AddEntity(const Slice& user_key, const WideColumns& columns) { |
160 | 0 | WideColumns sorted_columns(columns); |
161 | 0 | WideColumnsHelper::SortColumns(sorted_columns); |
162 | |
|
163 | 0 | std::string entity; |
164 | 0 | const Status s = WideColumnSerialization::Serialize(sorted_columns, entity); |
165 | 0 | if (!s.ok()) { |
166 | 0 | return s; |
167 | 0 | } |
168 | 0 | if (entity.size() > size_t{std::numeric_limits<uint32_t>::max()}) { |
169 | 0 | return Status::InvalidArgument("wide column entity is too large"); |
170 | 0 | } |
171 | 0 | return Add(user_key, entity, kTypeWideColumnEntity); |
172 | 0 | } |
173 | | |
174 | 0 | Status DeleteRangeImpl(const Slice& begin_key, const Slice& end_key) { |
175 | 0 | if (!builder) { |
176 | 0 | return Status::InvalidArgument("File is not opened"); |
177 | 0 | } |
178 | 0 | int cmp = internal_comparator.user_comparator()->CompareWithoutTimestamp( |
179 | 0 | begin_key, end_key); |
180 | 0 | if (cmp > 0) { |
181 | | // It's an empty range where endpoints appear mistaken. Don't bother |
182 | | // applying it to the DB, and return an error to the user. |
183 | 0 | return Status::InvalidArgument("end key comes before start key"); |
184 | 0 | } else if (cmp == 0) { |
185 | | // It's an empty range. Don't bother applying it to the DB. |
186 | 0 | return Status::OK(); |
187 | 0 | } |
188 | | |
189 | 0 | assert(begin_key.size() >= ts_sz); |
190 | 0 | assert(end_key.size() >= ts_sz); |
191 | 0 | Slice begin_key_ts = |
192 | 0 | Slice(begin_key.data() + begin_key.size() - ts_sz, ts_sz); |
193 | 0 | Slice end_key_ts = Slice(end_key.data() + end_key.size() - ts_sz, ts_sz); |
194 | 0 | assert(begin_key_ts.compare(end_key_ts) == 0); |
195 | 0 | if (strip_timestamp) { |
196 | | // In this mode, we expect users to always provide a min timestamp. |
197 | 0 | if (internal_comparator.user_comparator()->CompareTimestamp( |
198 | 0 | begin_key_ts, MinU64Ts()) != 0) { |
199 | 0 | return Status::InvalidArgument( |
200 | 0 | "persist_user_defined_timestamps flag is set to false, only " |
201 | 0 | "minimum timestamp is accepted for start key."); |
202 | 0 | } |
203 | 0 | if (internal_comparator.user_comparator()->CompareTimestamp( |
204 | 0 | end_key_ts, MinU64Ts()) != 0) { |
205 | 0 | return Status::InvalidArgument( |
206 | 0 | "persist_user_defined_timestamps flag is set to false, only " |
207 | 0 | "minimum timestamp is accepted for end key."); |
208 | 0 | } |
209 | 0 | } |
210 | | |
211 | 0 | RangeTombstone tombstone(begin_key, end_key, 0 /* Sequence Number */); |
212 | 0 | if (file_info.num_range_del_entries == 0) { |
213 | 0 | file_info.smallest_range_del_key.assign(tombstone.start_key_.data(), |
214 | 0 | tombstone.start_key_.size()); |
215 | 0 | file_info.largest_range_del_key.assign(tombstone.end_key_.data(), |
216 | 0 | tombstone.end_key_.size()); |
217 | 0 | } else { |
218 | 0 | if (internal_comparator.user_comparator()->Compare( |
219 | 0 | tombstone.start_key_, file_info.smallest_range_del_key) < 0) { |
220 | 0 | file_info.smallest_range_del_key.assign(tombstone.start_key_.data(), |
221 | 0 | tombstone.start_key_.size()); |
222 | 0 | } |
223 | 0 | if (internal_comparator.user_comparator()->Compare( |
224 | 0 | tombstone.end_key_, file_info.largest_range_del_key) > 0) { |
225 | 0 | file_info.largest_range_del_key.assign(tombstone.end_key_.data(), |
226 | 0 | tombstone.end_key_.size()); |
227 | 0 | } |
228 | 0 | } |
229 | |
|
230 | 0 | auto ikey_and_end_key = tombstone.Serialize(); |
231 | 0 | builder->Add(ikey_and_end_key.first.Encode(), ikey_and_end_key.second); |
232 | | |
233 | | // update file info |
234 | 0 | file_info.num_range_del_entries++; |
235 | 0 | file_info.file_size = builder->FileSize(); |
236 | |
|
237 | 0 | InvalidatePageCache(false /* closing */).PermitUncheckedError(); |
238 | 0 | return Status::OK(); |
239 | 0 | } |
240 | | |
241 | 0 | Status DeleteRange(const Slice& begin_key, const Slice& end_key) { |
242 | 0 | if (internal_comparator.user_comparator()->timestamp_size() != 0) { |
243 | 0 | return Status::InvalidArgument("Timestamp size mismatch"); |
244 | 0 | } |
245 | 0 | return DeleteRangeImpl(begin_key, end_key); |
246 | 0 | } |
247 | | |
248 | | // begin_key and end_key should be users keys without timestamp. |
249 | | Status DeleteRange(const Slice& begin_key, const Slice& end_key, |
250 | 0 | const Slice& timestamp) { |
251 | 0 | const size_t timestamp_size = timestamp.size(); |
252 | |
|
253 | 0 | if (internal_comparator.user_comparator()->timestamp_size() != |
254 | 0 | timestamp_size) { |
255 | 0 | return Status::InvalidArgument("Timestamp size mismatch"); |
256 | 0 | } |
257 | | |
258 | 0 | const size_t begin_key_size = begin_key.size(); |
259 | 0 | const size_t end_key_size = end_key.size(); |
260 | 0 | if (begin_key.data() + begin_key_size == timestamp.data() || |
261 | 0 | end_key.data() + begin_key_size == timestamp.data()) { |
262 | 0 | assert(memcmp(begin_key.data() + begin_key_size, |
263 | 0 | end_key.data() + end_key_size, timestamp_size) == 0); |
264 | 0 | Slice begin_key_with_ts(begin_key.data(), |
265 | 0 | begin_key_size + timestamp_size); |
266 | 0 | Slice end_key_with_ts(end_key.data(), end_key.size() + timestamp_size); |
267 | 0 | return DeleteRangeImpl(begin_key_with_ts, end_key_with_ts); |
268 | 0 | } |
269 | 0 | std::string begin_key_with_ts; |
270 | 0 | begin_key_with_ts.reserve(begin_key_size + timestamp_size); |
271 | 0 | begin_key_with_ts.append(begin_key.data(), begin_key_size); |
272 | 0 | begin_key_with_ts.append(timestamp.data(), timestamp_size); |
273 | 0 | std::string end_key_with_ts; |
274 | 0 | end_key_with_ts.reserve(end_key_size + timestamp_size); |
275 | 0 | end_key_with_ts.append(end_key.data(), end_key_size); |
276 | 0 | end_key_with_ts.append(timestamp.data(), timestamp_size); |
277 | 0 | return DeleteRangeImpl(begin_key_with_ts, end_key_with_ts); |
278 | 0 | } |
279 | | |
280 | 0 | Status InvalidatePageCache(bool closing) { |
281 | 0 | Status s = Status::OK(); |
282 | 0 | if (invalidate_page_cache == false) { |
283 | | // Fadvise disabled |
284 | 0 | return s; |
285 | 0 | } |
286 | 0 | uint64_t bytes_since_last_fadvise = builder->FileSize() - last_fadvise_size; |
287 | 0 | if (bytes_since_last_fadvise > kFadviseTrigger || closing) { |
288 | 0 | TEST_SYNC_POINT_CALLBACK("SstFileWriter::Rep::InvalidatePageCache", |
289 | 0 | &(bytes_since_last_fadvise)); |
290 | | // Tell the OS that we don't need this file in page cache |
291 | 0 | s = file_writer->InvalidateCache(0, 0); |
292 | 0 | if (s.IsNotSupported()) { |
293 | | // NotSupported is fine as it could be a file type that doesn't use page |
294 | | // cache. |
295 | 0 | s = Status::OK(); |
296 | 0 | } |
297 | 0 | last_fadvise_size = builder->FileSize(); |
298 | 0 | } |
299 | 0 | return s; |
300 | 0 | } |
301 | | }; |
302 | | |
303 | | SstFileWriter::SstFileWriter(const EnvOptions& env_options, |
304 | | const Options& options, |
305 | | const Comparator* user_comparator, |
306 | | ColumnFamilyHandle* column_family, |
307 | | bool invalidate_page_cache, |
308 | | Env::IOPriority io_priority, bool skip_filters) |
309 | 0 | : rep_(new Rep(env_options, options, io_priority, user_comparator, |
310 | 0 | column_family, invalidate_page_cache, skip_filters, |
311 | 0 | DBImpl::GenerateDbSessionId(options.env))) { |
312 | | // SstFileWriter is used to create sst files that can be added to database |
313 | | // later. Therefore, no real db_id and db_session_id are associated with it. |
314 | | // Here we mimic the way db_session_id behaves by getting a db_session_id |
315 | | // for each SstFileWriter, and (later below) assign unique file numbers |
316 | | // in the table properties. The db_id is set to be "SST Writer" for clarity. |
317 | |
|
318 | 0 | rep_->file_info.file_size = 0; |
319 | 0 | } |
320 | | |
321 | 0 | SstFileWriter::~SstFileWriter() { |
322 | 0 | if (rep_->builder) { |
323 | | // User did not call Finish() or Finish() failed, we need to |
324 | | // abandon the builder. |
325 | 0 | rep_->builder->Abandon(); |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | 0 | Status SstFileWriter::Open(const std::string& file_path, Temperature temp) { |
330 | 0 | Rep* r = rep_.get(); |
331 | 0 | Status s; |
332 | 0 | std::unique_ptr<FSWritableFile> sst_file; |
333 | 0 | FileOptions cur_file_opts(r->env_options); |
334 | 0 | cur_file_opts.temperature = temp; |
335 | 0 | s = r->ioptions.env->GetFileSystem()->NewWritableFile( |
336 | 0 | file_path, cur_file_opts, &sst_file, nullptr); |
337 | 0 | if (!s.ok()) { |
338 | 0 | return s; |
339 | 0 | } |
340 | | |
341 | 0 | sst_file->SetIOPriority(r->io_priority); |
342 | |
|
343 | 0 | CompressionType compression_type; |
344 | 0 | CompressionOptions compression_opts; |
345 | 0 | if (r->mutable_cf_options.bottommost_compression != |
346 | 0 | kDisableCompressionOption) { |
347 | 0 | compression_type = r->mutable_cf_options.bottommost_compression; |
348 | 0 | if (r->mutable_cf_options.bottommost_compression_opts.enabled) { |
349 | 0 | compression_opts = r->mutable_cf_options.bottommost_compression_opts; |
350 | 0 | } else { |
351 | 0 | compression_opts = r->mutable_cf_options.compression_opts; |
352 | 0 | } |
353 | 0 | } else if (!r->mutable_cf_options.compression_per_level.empty()) { |
354 | | // Use the compression of the last level if we have per level compression |
355 | 0 | compression_type = *(r->mutable_cf_options.compression_per_level.rbegin()); |
356 | 0 | compression_opts = r->mutable_cf_options.compression_opts; |
357 | 0 | } else { |
358 | 0 | compression_type = r->mutable_cf_options.compression; |
359 | 0 | compression_opts = r->mutable_cf_options.compression_opts; |
360 | 0 | } |
361 | |
|
362 | 0 | InternalTblPropCollFactories internal_tbl_prop_coll_factories; |
363 | | |
364 | | // SstFileWriter properties collector to add SstFileWriter version. |
365 | 0 | internal_tbl_prop_coll_factories.emplace_back( |
366 | 0 | new SstFileWriterPropertiesCollectorFactory(2 /* version */, |
367 | 0 | 0 /* global_seqno*/)); |
368 | | |
369 | | // User collector factories |
370 | 0 | auto user_collector_factories = |
371 | 0 | r->ioptions.table_properties_collector_factories; |
372 | 0 | for (size_t i = 0; i < user_collector_factories.size(); i++) { |
373 | 0 | internal_tbl_prop_coll_factories.emplace_back( |
374 | 0 | new UserKeyTablePropertiesCollectorFactory( |
375 | 0 | user_collector_factories[i])); |
376 | 0 | } |
377 | 0 | int unknown_level = -1; |
378 | 0 | uint32_t cf_id; |
379 | |
|
380 | 0 | if (r->cfh != nullptr) { |
381 | | // user explicitly specified that this file will be ingested into cfh, |
382 | | // we can persist this information in the file. |
383 | 0 | cf_id = r->cfh->GetID(); |
384 | 0 | r->column_family_name = r->cfh->GetName(); |
385 | 0 | } else { |
386 | 0 | r->column_family_name = ""; |
387 | 0 | cf_id = TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; |
388 | 0 | } |
389 | | |
390 | | // TODO: it would be better to set oldest_key_time to be used for getting the |
391 | | // approximate time of ingested keys. |
392 | | // TODO: plumb Env::IOActivity, Env::IOPriority |
393 | 0 | TableBuilderOptions table_builder_options( |
394 | 0 | r->ioptions, r->mutable_cf_options, ReadOptions(), r->write_options, |
395 | 0 | r->internal_comparator, &internal_tbl_prop_coll_factories, |
396 | 0 | compression_type, compression_opts, cf_id, r->column_family_name, |
397 | 0 | unknown_level, kUnknownNewestKeyTime, false /* is_bottommost */, |
398 | 0 | TableFileCreationReason::kMisc, 0 /* oldest_key_time */, |
399 | 0 | 0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id, |
400 | 0 | 0 /* target_file_size */, r->next_file_number); |
401 | | // External SST files used to each get a unique session id. Now for |
402 | | // slightly better uniqueness probability in constructing cache keys, we |
403 | | // assign fake file numbers to each file (into table properties) and keep |
404 | | // the same session id for the life of the SstFileWriter. |
405 | 0 | r->next_file_number++; |
406 | | // XXX: when we can remove skip_filters from the SstFileWriter public API |
407 | | // we can remove it from TableBuilderOptions. |
408 | 0 | table_builder_options.skip_filters = r->skip_filters; |
409 | 0 | FileTypeSet tmp_set = r->ioptions.checksum_handoff_file_types; |
410 | 0 | r->file_writer.reset(new WritableFileWriter( |
411 | 0 | std::move(sst_file), file_path, r->env_options, r->ioptions.clock, |
412 | 0 | nullptr /* io_tracer */, r->ioptions.stats, Histograms::SST_WRITE_MICROS, |
413 | 0 | r->ioptions.listeners, r->ioptions.file_checksum_gen_factory.get(), |
414 | 0 | tmp_set.Contains(FileType::kTableFile), false)); |
415 | | |
416 | | // TODO(tec) : If table_factory is using compressed block cache, we will |
417 | | // be adding the external sst file blocks into it, which is wasteful. |
418 | 0 | r->builder.reset(r->mutable_cf_options.table_factory->NewTableBuilder( |
419 | 0 | table_builder_options, r->file_writer.get())); |
420 | |
|
421 | 0 | r->file_info = ExternalSstFileInfo(); |
422 | 0 | r->file_info.file_path = file_path; |
423 | 0 | r->file_info.version = 2; |
424 | 0 | return s; |
425 | 0 | } |
426 | | |
427 | 0 | Status SstFileWriter::Add(const Slice& user_key, const Slice& value) { |
428 | 0 | return rep_->Add(user_key, value, ValueType::kTypeValue); |
429 | 0 | } |
430 | | |
431 | 0 | Status SstFileWriter::Put(const Slice& user_key, const Slice& value) { |
432 | 0 | return rep_->Add(user_key, value, ValueType::kTypeValue); |
433 | 0 | } |
434 | | |
435 | | Status SstFileWriter::Put(const Slice& user_key, const Slice& timestamp, |
436 | 0 | const Slice& value) { |
437 | 0 | return rep_->Add(user_key, timestamp, value, ValueType::kTypeValue); |
438 | 0 | } |
439 | | |
440 | | Status SstFileWriter::PutEntity(const Slice& user_key, |
441 | 0 | const WideColumns& columns) { |
442 | 0 | return rep_->AddEntity(user_key, columns); |
443 | 0 | } |
444 | | |
445 | 0 | Status SstFileWriter::Merge(const Slice& user_key, const Slice& value) { |
446 | 0 | return rep_->Add(user_key, value, ValueType::kTypeMerge); |
447 | 0 | } |
448 | | |
449 | 0 | Status SstFileWriter::Delete(const Slice& user_key) { |
450 | 0 | return rep_->Add(user_key, Slice(), ValueType::kTypeDeletion); |
451 | 0 | } |
452 | | |
453 | 0 | Status SstFileWriter::Delete(const Slice& user_key, const Slice& timestamp) { |
454 | 0 | return rep_->Add(user_key, timestamp, Slice(), |
455 | 0 | ValueType::kTypeDeletionWithTimestamp); |
456 | 0 | } |
457 | | |
458 | | Status SstFileWriter::DeleteRange(const Slice& begin_key, |
459 | 0 | const Slice& end_key) { |
460 | 0 | return rep_->DeleteRange(begin_key, end_key); |
461 | 0 | } |
462 | | |
463 | | Status SstFileWriter::DeleteRange(const Slice& begin_key, const Slice& end_key, |
464 | 0 | const Slice& timestamp) { |
465 | 0 | return rep_->DeleteRange(begin_key, end_key, timestamp); |
466 | 0 | } |
467 | | |
468 | 0 | Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { |
469 | 0 | Rep* r = rep_.get(); |
470 | 0 | if (!r->builder) { |
471 | 0 | return Status::InvalidArgument("File is not opened"); |
472 | 0 | } |
473 | 0 | if (r->file_info.num_entries == 0 && |
474 | 0 | r->file_info.num_range_del_entries == 0) { |
475 | 0 | r->builder->status().PermitUncheckedError(); |
476 | 0 | return Status::InvalidArgument("Cannot create sst file with no entries"); |
477 | 0 | } |
478 | | |
479 | 0 | Status s = r->builder->Finish(); |
480 | 0 | r->file_info.file_size = r->builder->FileSize(); |
481 | |
|
482 | 0 | IOOptions opts; |
483 | 0 | if (s.ok()) { |
484 | 0 | s = WritableFileWriter::PrepareIOOptions(r->write_options, opts); |
485 | 0 | } |
486 | 0 | if (s.ok()) { |
487 | 0 | s = r->file_writer->Sync(opts, r->ioptions.use_fsync); |
488 | 0 | r->InvalidatePageCache(true /* closing */).PermitUncheckedError(); |
489 | 0 | if (s.ok()) { |
490 | 0 | s = r->file_writer->Close(opts); |
491 | 0 | } |
492 | 0 | } |
493 | 0 | if (s.ok()) { |
494 | 0 | r->file_info.file_checksum = r->file_writer->GetFileChecksum(); |
495 | 0 | r->file_info.file_checksum_func_name = |
496 | 0 | r->file_writer->GetFileChecksumFuncName(); |
497 | 0 | } |
498 | 0 | if (!s.ok()) { |
499 | 0 | Status status = r->ioptions.env->DeleteFile(r->file_info.file_path); |
500 | | // Silence ASSERT_STATUS_CHECKED warning, since DeleteFile may fail under |
501 | | // some error injection, and we can just ignore the failure |
502 | 0 | status.PermitUncheckedError(); |
503 | 0 | } |
504 | |
|
505 | 0 | if (file_info != nullptr) { |
506 | 0 | *file_info = r->file_info; |
507 | 0 | Slice smallest_key = r->file_info.smallest_key; |
508 | 0 | Slice largest_key = r->file_info.largest_key; |
509 | 0 | Slice smallest_range_del_key = r->file_info.smallest_range_del_key; |
510 | 0 | Slice largest_range_del_key = r->file_info.largest_range_del_key; |
511 | 0 | assert(smallest_key.empty() == largest_key.empty()); |
512 | 0 | assert(smallest_range_del_key.empty() == largest_range_del_key.empty()); |
513 | | // Remove user-defined timestamps from external file metadata too when they |
514 | | // should not be persisted. |
515 | 0 | if (r->strip_timestamp) { |
516 | 0 | if (!smallest_key.empty()) { |
517 | 0 | assert(smallest_key.size() >= r->ts_sz); |
518 | 0 | assert(largest_key.size() >= r->ts_sz); |
519 | 0 | file_info->smallest_key.resize(smallest_key.size() - r->ts_sz); |
520 | 0 | file_info->largest_key.resize(largest_key.size() - r->ts_sz); |
521 | 0 | } |
522 | 0 | if (!smallest_range_del_key.empty()) { |
523 | 0 | assert(smallest_range_del_key.size() >= r->ts_sz); |
524 | 0 | assert(largest_range_del_key.size() >= r->ts_sz); |
525 | 0 | file_info->smallest_range_del_key.resize(smallest_range_del_key.size() - |
526 | 0 | r->ts_sz); |
527 | 0 | file_info->largest_range_del_key.resize(largest_range_del_key.size() - |
528 | 0 | r->ts_sz); |
529 | 0 | } |
530 | 0 | } |
531 | 0 | } |
532 | |
|
533 | 0 | r->builder.reset(); |
534 | 0 | return s; |
535 | 0 | } |
536 | | |
537 | 0 | uint64_t SstFileWriter::FileSize() { return rep_->file_info.file_size; } |
538 | | |
539 | 0 | bool SstFileWriter::CreatedBySstFileWriter(const TableProperties& tp) { |
540 | 0 | const auto& uprops = tp.user_collected_properties; |
541 | 0 | return uprops.find(ExternalSstFilePropertyNames::kVersion) != uprops.end(); |
542 | 0 | } |
543 | | |
544 | | } // namespace ROCKSDB_NAMESPACE |