Coverage Report

Created: 2024-09-08 07:17

/src/rocksdb/db/error_handler.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
#include "db/error_handler.h"
7
8
#include "db/db_impl/db_impl.h"
9
#include "db/event_helpers.h"
10
#include "file/sst_file_manager_impl.h"
11
#include "logging/logging.h"
12
#include "port/lang.h"
13
14
namespace ROCKSDB_NAMESPACE {
15
16
// Maps to help decide the severity of an error based on the
17
// BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
18
// is set or not. There are 3 maps, going from most specific to least specific
19
// (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
20
// paranoid_checks). The less specific map serves as a catch all in case we miss
21
// a specific error code or subcode.
22
std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
23
         Status::Severity>
24
    ErrorSeverityMap = {
25
        // Errors during BG compaction
26
        {std::make_tuple(BackgroundErrorReason::kCompaction,
27
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
28
                         true),
29
         Status::Severity::kSoftError},
30
        {std::make_tuple(BackgroundErrorReason::kCompaction,
31
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
32
                         false),
33
         Status::Severity::kNoError},
34
        {std::make_tuple(BackgroundErrorReason::kCompaction,
35
                         Status::Code::kIOError, Status::SubCode::kSpaceLimit,
36
                         true),
37
         Status::Severity::kHardError},
38
        {std::make_tuple(BackgroundErrorReason::kCompaction,
39
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
40
                         true),
41
         Status::Severity::kFatalError},
42
        {std::make_tuple(BackgroundErrorReason::kCompaction,
43
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
44
                         false),
45
         Status::Severity::kFatalError},
46
        // Errors during BG flush
47
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
48
                         Status::SubCode::kNoSpace, true),
49
         Status::Severity::kHardError},
50
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
51
                         Status::SubCode::kNoSpace, false),
52
         Status::Severity::kNoError},
53
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
54
                         Status::SubCode::kSpaceLimit, true),
55
         Status::Severity::kHardError},
56
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
57
                         Status::SubCode::kIOFenced, true),
58
         Status::Severity::kFatalError},
59
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
60
                         Status::SubCode::kIOFenced, false),
61
         Status::Severity::kFatalError},
62
        // Errors during Write
63
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
64
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
65
                         true),
66
         Status::Severity::kHardError},
67
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
68
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
69
                         false),
70
         Status::Severity::kHardError},
71
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
72
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
73
                         true),
74
         Status::Severity::kFatalError},
75
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
76
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
77
                         false),
78
         Status::Severity::kFatalError},
79
        // Errors during MANIFEST write
80
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
81
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
82
                         true),
83
         Status::Severity::kHardError},
84
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
85
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
86
                         false),
87
         Status::Severity::kHardError},
88
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
89
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
90
                         true),
91
         Status::Severity::kFatalError},
92
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
93
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
94
                         false),
95
         Status::Severity::kFatalError},
96
        // Errors during BG flush with WAL disabled
97
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
98
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
99
                         true),
100
         Status::Severity::kHardError},
101
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
102
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
103
                         false),
104
         Status::Severity::kNoError},
105
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
106
                         Status::Code::kIOError, Status::SubCode::kSpaceLimit,
107
                         true),
108
         Status::Severity::kHardError},
109
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
110
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
111
                         true),
112
         Status::Severity::kFatalError},
113
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
114
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
115
                         false),
116
         Status::Severity::kFatalError},
117
        // Errors during MANIFEST write when WAL is disabled
118
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
119
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
120
                         true),
121
         Status::Severity::kHardError},
122
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
123
                         Status::Code::kIOError, Status::SubCode::kNoSpace,
124
                         false),
125
         Status::Severity::kHardError},
126
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
127
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
128
                         true),
129
         Status::Severity::kFatalError},
130
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
131
                         Status::Code::kIOError, Status::SubCode::kIOFenced,
132
                         false),
133
         Status::Severity::kFatalError},
134
135
};
136
137
std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>,
138
         Status::Severity>
139
    DefaultErrorSeverityMap = {
140
        // Errors during BG compaction
141
        {std::make_tuple(BackgroundErrorReason::kCompaction,
142
                         Status::Code::kCorruption, true),
143
         Status::Severity::kUnrecoverableError},
144
        {std::make_tuple(BackgroundErrorReason::kCompaction,
145
                         Status::Code::kCorruption, false),
146
         Status::Severity::kNoError},
147
        {std::make_tuple(BackgroundErrorReason::kCompaction,
148
                         Status::Code::kIOError, true),
149
         Status::Severity::kFatalError},
150
        {std::make_tuple(BackgroundErrorReason::kCompaction,
151
                         Status::Code::kIOError, false),
152
         Status::Severity::kNoError},
153
        // Errors during BG flush
154
        {std::make_tuple(BackgroundErrorReason::kFlush,
155
                         Status::Code::kCorruption, true),
156
         Status::Severity::kUnrecoverableError},
157
        {std::make_tuple(BackgroundErrorReason::kFlush,
158
                         Status::Code::kCorruption, false),
159
         Status::Severity::kNoError},
160
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
161
                         true),
162
         Status::Severity::kFatalError},
163
        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
164
                         false),
165
         Status::Severity::kNoError},
166
        // Errors during Write
167
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
168
                         Status::Code::kCorruption, true),
169
         Status::Severity::kUnrecoverableError},
170
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
171
                         Status::Code::kCorruption, false),
172
         Status::Severity::kNoError},
173
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
174
                         Status::Code::kIOError, true),
175
         Status::Severity::kFatalError},
176
        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
177
                         Status::Code::kIOError, false),
178
         Status::Severity::kNoError},
179
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
180
                         Status::Code::kIOError, true),
181
         Status::Severity::kFatalError},
182
        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
183
                         Status::Code::kIOError, false),
184
         Status::Severity::kFatalError},
185
        // Errors during BG flush with WAL disabled
186
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
187
                         Status::Code::kCorruption, true),
188
         Status::Severity::kUnrecoverableError},
189
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
190
                         Status::Code::kCorruption, false),
191
         Status::Severity::kNoError},
192
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
193
                         Status::Code::kIOError, true),
194
         Status::Severity::kFatalError},
195
        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
196
                         Status::Code::kIOError, false),
197
         Status::Severity::kNoError},
198
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
199
                         Status::Code::kIOError, true),
200
         Status::Severity::kFatalError},
201
        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
202
                         Status::Code::kIOError, false),
203
         Status::Severity::kFatalError},
204
};
205
206
std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
207
    DefaultReasonMap = {
208
        // Errors during BG compaction
209
        {std::make_tuple(BackgroundErrorReason::kCompaction, true),
210
         Status::Severity::kFatalError},
211
        {std::make_tuple(BackgroundErrorReason::kCompaction, false),
212
         Status::Severity::kNoError},
213
        // Errors during BG flush
214
        {std::make_tuple(BackgroundErrorReason::kFlush, true),
215
         Status::Severity::kFatalError},
216
        {std::make_tuple(BackgroundErrorReason::kFlush, false),
217
         Status::Severity::kNoError},
218
        // Errors during Write
219
        {std::make_tuple(BackgroundErrorReason::kWriteCallback, true),
220
         Status::Severity::kFatalError},
221
        {std::make_tuple(BackgroundErrorReason::kWriteCallback, false),
222
         Status::Severity::kFatalError},
223
        // Errors during Memtable update
224
        {std::make_tuple(BackgroundErrorReason::kMemTable, true),
225
         Status::Severity::kFatalError},
226
        {std::make_tuple(BackgroundErrorReason::kMemTable, false),
227
         Status::Severity::kFatalError},
228
};
229
230
11.0k
void ErrorHandler::CancelErrorRecovery() {
231
11.0k
  db_mutex_->AssertHeld();
232
233
  // We'll release the lock before calling sfm, so make sure no new
234
  // recovery gets scheduled at that point
235
11.0k
  auto_recovery_ = false;
236
11.0k
  SstFileManagerImpl* sfm =
237
11.0k
      static_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
238
11.0k
  if (sfm) {
239
    // This may or may not cancel a pending recovery
240
11.0k
    db_mutex_->Unlock();
241
11.0k
    bool cancelled = sfm->CancelErrorRecovery(this);
242
11.0k
    db_mutex_->Lock();
243
11.0k
    if (cancelled) {
244
0
      recovery_in_prog_ = false;
245
0
    }
246
11.0k
  }
247
248
  // If auto recovery is also runing to resume from the retryable error,
249
  // we should wait and end the auto recovery.
250
11.0k
  EndAutoRecovery();
251
11.0k
}
252
253
// This is the main function for looking at an error during a background
254
// operation and deciding the severity, and error recovery strategy. The high
255
// level algorithm is as follows -
256
// 1. Classify the severity of the error based on the ErrorSeverityMap,
257
//    DefaultErrorSeverityMap and DefaultReasonMap defined earlier
258
// 2. Call a Status code specific override function to adjust the severity
259
//    if needed. The reason for this is our ability to recover may depend on
260
//    the exact options enabled in DBOptions
261
// 3. Determine if auto recovery is possible. A listener notification callback
262
//    is called, which can disable the auto recovery even if we decide its
263
//    feasible
264
// 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control
265
//    the actual recovery. If no sst file manager is specified in DBOptions,
266
//    a default one is allocated during DB::Open(), so there will always be
267
//    one.
268
// This can also get called as part of a recovery operation. In that case, we
269
// also track the error separately in recovery_error_ so we can tell in the
270
// end whether recovery succeeded or not
271
void ErrorHandler::HandleKnownErrors(const Status& bg_err,
272
0
                                     BackgroundErrorReason reason) {
273
0
  db_mutex_->AssertHeld();
274
0
  if (bg_err.ok()) {
275
0
    return;
276
0
  }
277
278
0
  ROCKS_LOG_INFO(db_options_.info_log,
279
0
                 "ErrorHandler: Set regular background error\n");
280
281
0
  bool paranoid = db_options_.paranoid_checks;
282
0
  Status::Severity sev = Status::Severity::kFatalError;
283
0
  Status new_bg_err;
284
0
  DBRecoverContext context;
285
0
  bool found = false;
286
287
0
  {
288
0
    auto entry = ErrorSeverityMap.find(
289
0
        std::make_tuple(reason, bg_err.code(), bg_err.subcode(), paranoid));
290
0
    if (entry != ErrorSeverityMap.end()) {
291
0
      sev = entry->second;
292
0
      found = true;
293
0
    }
294
0
  }
295
296
0
  if (!found) {
297
0
    auto entry = DefaultErrorSeverityMap.find(
298
0
        std::make_tuple(reason, bg_err.code(), paranoid));
299
0
    if (entry != DefaultErrorSeverityMap.end()) {
300
0
      sev = entry->second;
301
0
      found = true;
302
0
    }
303
0
  }
304
305
0
  if (!found) {
306
0
    auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid));
307
0
    if (entry != DefaultReasonMap.end()) {
308
0
      sev = entry->second;
309
0
    }
310
0
  }
311
312
0
  new_bg_err = Status(bg_err, sev);
313
314
  // Check if recovery is currently in progress. If it is, we will save this
315
  // error so we can check it at the end to see if recovery succeeded or not
316
0
  if (recovery_in_prog_ && recovery_error_.ok()) {
317
0
    recovery_error_ = status_to_io_status(Status(new_bg_err));
318
0
  }
319
320
0
  bool auto_recovery = auto_recovery_;
321
0
  if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) {
322
0
    auto_recovery = false;
323
0
  }
324
325
  // Allow some error specific overrides
326
0
  if (new_bg_err.subcode() == IOStatus::SubCode::kNoSpace ||
327
0
      new_bg_err.subcode() == IOStatus::SubCode::kSpaceLimit) {
328
0
    new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery);
329
0
  }
330
331
0
  if (!new_bg_err.ok()) {
332
0
    Status s = new_bg_err;
333
0
    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
334
0
                                          db_mutex_, &auto_recovery);
335
0
    if (!s.ok() && (s.severity() > bg_error_.severity())) {
336
0
      bg_error_ = s;
337
0
    } else {
338
      // This error is less severe than previously encountered error. Don't
339
      // take any further action
340
0
      return;
341
0
    }
342
0
  }
343
344
0
  recover_context_ = context;
345
0
  if (auto_recovery) {
346
0
    recovery_in_prog_ = true;
347
348
    // Kick-off error specific recovery
349
0
    if (new_bg_err.subcode() == IOStatus::SubCode::kNoSpace ||
350
0
        new_bg_err.subcode() == IOStatus::SubCode::kSpaceLimit) {
351
0
      RecoverFromNoSpace();
352
0
    }
353
0
  }
354
0
  if (bg_error_.severity() >= Status::Severity::kHardError) {
355
0
    is_db_stopped_.store(true, std::memory_order_release);
356
0
  }
357
0
}
358
359
// This is the main function for looking at IO related error during the
360
// background operations. The main logic is:
361
// File scope IO error is treated as retryable IO error in the write path. In
362
// RocksDB, If a file has write IO error and it is at file scope, RocksDB never
363
// write to the same file again. RocksDB will create a new file and rewrite the
364
// whole content. Thus, it is retryable.
365
// There are three main categories of error handling:
366
// 1) if the error is caused by data loss, the error is mapped to
367
//    unrecoverable error. Application/user must take action to handle
368
//    this situation (File scope case is excluded).
369
// 2) if the error is a Retryable IO error (i.e., it is a file scope IO error,
370
//     or its retryable flag is set and not a data loss error), auto resume (
371
//     DBImpl::ResumeImpl) may be called and the auto resume can be controlled
372
//     by resume count and resume interval options. There are three sub-cases:
373
//    a) if the error happens during compaction, it is mapped to a soft error.
374
//       the compaction thread will reschedule a new compaction. This doesn't
375
//       call auto resume.
376
//    b) if the error happens during flush and also WAL is empty, it is mapped
377
//       to a soft error. Note that, it includes the case that IO error happens
378
//       in SST or manifest write during flush. Auto resume will be called.
379
//    c) all other errors are mapped to hard error. Auto resume will be called.
380
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
381
//    BackgroundErrorReason reason) will be called to handle other error cases
382
//    such as delegating to SstFileManager to handle no space error.
383
void ErrorHandler::SetBGError(const Status& bg_status,
384
0
                              BackgroundErrorReason reason) {
385
0
  db_mutex_->AssertHeld();
386
0
  Status tmp_status = bg_status;
387
0
  IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
388
389
0
  if (bg_io_err.ok()) {
390
0
    return;
391
0
  }
392
0
  ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
393
0
                 bg_io_err.ToString().c_str());
394
395
0
  RecordStats({ERROR_HANDLER_BG_ERROR_COUNT, ERROR_HANDLER_BG_IO_ERROR_COUNT},
396
0
              {} /* int_histograms */);
397
398
0
  Status new_bg_io_err = bg_io_err;
399
0
  DBRecoverContext context;
400
0
  if (bg_io_err.GetScope() != IOStatus::IOErrorScope::kIOErrorScopeFile &&
401
0
      bg_io_err.GetDataLoss()) {
402
    // First, data loss (non file scope) is treated as unrecoverable error. So
403
    // it can directly overwrite any existing bg_error_.
404
0
    bool auto_recovery = false;
405
0
    Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError);
406
0
    CheckAndSetRecoveryAndBGError(bg_err);
407
0
    ROCKS_LOG_INFO(
408
0
        db_options_.info_log,
409
0
        "ErrorHandler: Set background IO error as unrecoverable error\n");
410
0
    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
411
0
                                          &bg_err, db_mutex_, &auto_recovery);
412
0
    recover_context_ = context;
413
0
    return;
414
0
  }
415
0
  if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
416
0
      (bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
417
0
       bg_io_err.GetRetryable())) {
418
    // Second, check if the error is a retryable IO error (file scope IO error
419
    // is also treated as retryable IO error in RocksDB write path). if it is
420
    // retryable error and its severity is higher than bg_error_, overwrite the
421
    // bg_error_ with new error. In current stage, for retryable IO error of
422
    // compaction, treat it as soft error. In other cases, treat the retryable
423
    // IO error as hard error. Note that, all the NoSpace error should be
424
    // handled by the SstFileManager::StartErrorRecovery(). Therefore, no matter
425
    // it is retryable or file scope, this logic will be bypassed.
426
427
0
    RecordStats({ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT},
428
0
                {} /* int_histograms */);
429
0
    ROCKS_LOG_INFO(db_options_.info_log,
430
0
                   "ErrorHandler: Set background retryable IO error\n");
431
0
    if (BackgroundErrorReason::kCompaction == reason) {
432
      // We map the retryable IO error during compaction to soft error. Since
433
      // compaction can reschedule by itself. We will not set the BG error in
434
      // this case
435
      // TODO:  a better way to set or clean the retryable IO error which
436
      // happens during compaction SST file write.
437
0
      RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
438
0
      ROCKS_LOG_INFO(
439
0
          db_options_.info_log,
440
0
          "ErrorHandler: Compaction will schedule by itself to resume\n");
441
0
      bool auto_recovery = false;
442
0
      EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
443
0
                                            &new_bg_io_err, db_mutex_,
444
0
                                            &auto_recovery);
445
      // Not used in this code path.
446
0
      new_bg_io_err.PermitUncheckedError();
447
0
      return;
448
0
    }
449
450
0
    Status::Severity severity;
451
0
    if (BackgroundErrorReason::kFlushNoWAL == reason ||
452
0
        BackgroundErrorReason::kManifestWriteNoWAL == reason) {
453
      // When the BG Retryable IO error reason is flush without WAL,
454
      // We map it to a soft error. At the same time, all the background work
455
      // should be stopped except the BG work from recovery. Therefore, we
456
      // set the soft_error_no_bg_work_ to true. At the same time, since DB
457
      // continues to receive writes when BG error is soft error, to avoid
458
      // to many small memtable being generated during auto resume, the flush
459
      // reason is set to kErrorRecoveryRetryFlush.
460
0
      severity = Status::Severity::kSoftError;
461
0
      soft_error_no_bg_work_ = true;
462
0
      context.flush_reason = FlushReason::kErrorRecoveryRetryFlush;
463
0
    } else {
464
0
      severity = Status::Severity::kHardError;
465
0
    }
466
0
    Status bg_err(new_bg_io_err, severity);
467
0
    CheckAndSetRecoveryAndBGError(bg_err);
468
0
    recover_context_ = context;
469
0
    bool auto_recovery = db_options_.max_bgerror_resume_count > 0;
470
0
    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
471
0
                                          &new_bg_io_err, db_mutex_,
472
0
                                          &auto_recovery);
473
0
    StartRecoverFromRetryableBGIOError(bg_io_err);
474
0
    return;
475
0
  }
476
0
  HandleKnownErrors(new_bg_io_err, reason);
477
0
}
478
479
void ErrorHandler::AddFilesToQuarantine(
480
0
    autovector<const autovector<uint64_t>*> files_to_quarantine) {
481
0
  db_mutex_->AssertHeld();
482
0
  std::ostringstream quarantine_files_oss;
483
0
  bool is_first_one = true;
484
0
  for (const auto* files : files_to_quarantine) {
485
0
    assert(files);
486
0
    for (uint64_t file_number : *files) {
487
0
      files_to_quarantine_.push_back(file_number);
488
0
      quarantine_files_oss << (is_first_one ? "" : ", ") << file_number;
489
0
      is_first_one = false;
490
0
    }
491
0
  }
492
0
  ROCKS_LOG_INFO(db_options_.info_log,
493
0
                 "ErrorHandler: added file numbers %s to quarantine.\n",
494
0
                 quarantine_files_oss.str().c_str());
495
0
}
496
497
0
void ErrorHandler::ClearFilesToQuarantine() {
498
0
  db_mutex_->AssertHeld();
499
0
  files_to_quarantine_.clear();
500
0
  ROCKS_LOG_INFO(db_options_.info_log,
501
0
                 "ErrorHandler: cleared files in quarantine.\n");
502
0
}
503
504
Status ErrorHandler::OverrideNoSpaceError(const Status& bg_error,
505
0
                                          bool* auto_recovery) {
506
0
  if (bg_error.severity() >= Status::Severity::kFatalError) {
507
0
    return bg_error;
508
0
  }
509
510
0
  if (db_options_.sst_file_manager.get() == nullptr) {
511
    // We rely on SFM to poll for enough disk space and recover
512
0
    *auto_recovery = false;
513
0
    return bg_error;
514
0
  }
515
516
0
  if (db_options_.allow_2pc &&
517
0
      (bg_error.severity() <= Status::Severity::kSoftError)) {
518
    // Don't know how to recover, as the contents of the current WAL file may
519
    // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled,
520
    // we can just flush the memtable and discard the log
521
0
    *auto_recovery = false;
522
0
    return Status(bg_error, Status::Severity::kFatalError);
523
0
  }
524
525
0
  {
526
0
    uint64_t free_space;
527
0
    if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path,
528
0
                                      &free_space) == Status::NotSupported()) {
529
0
      *auto_recovery = false;
530
0
    }
531
0
  }
532
533
0
  return bg_error;
534
0
}
535
536
0
void ErrorHandler::RecoverFromNoSpace() {
537
0
  SstFileManagerImpl* sfm =
538
0
      static_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
539
540
  // Inform SFM of the error, so it can kick-off the recovery
541
0
  if (sfm) {
542
0
    sfm->StartErrorRecovery(this, bg_error_);
543
0
  }
544
0
}
545
546
0
Status ErrorHandler::ClearBGError() {
547
0
  db_mutex_->AssertHeld();
548
549
  // Signal that recovery succeeded
550
0
  if (recovery_error_.ok()) {
551
0
    assert(files_to_quarantine_.empty());
552
0
    Status old_bg_error = bg_error_;
553
    // old_bg_error is only for notifying listeners, so may not be checked
554
0
    old_bg_error.PermitUncheckedError();
555
    // Clear and check the recovery IO and BG error
556
0
    is_db_stopped_.store(false, std::memory_order_release);
557
0
    bg_error_ = Status::OK();
558
0
    recovery_error_ = IOStatus::OK();
559
0
    bg_error_.PermitUncheckedError();
560
0
    recovery_error_.PermitUncheckedError();
561
0
    recovery_in_prog_ = false;
562
0
    soft_error_no_bg_work_ = false;
563
0
    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error,
564
0
                                           bg_error_, db_mutex_);
565
0
  }
566
0
  return recovery_error_;
567
0
}
568
569
0
Status ErrorHandler::RecoverFromBGError(bool is_manual) {
570
0
  InstrumentedMutexLock l(db_mutex_);
571
0
  bool no_bg_work_original_flag = soft_error_no_bg_work_;
572
0
  if (is_manual) {
573
    // If its a manual recovery and there's a background recovery in progress
574
    // return busy status
575
0
    if (recovery_in_prog_) {
576
0
      return Status::Busy("Recovery already in progress");
577
0
    }
578
0
    recovery_in_prog_ = true;
579
580
    // In manual resume, we allow the bg work to run. If it is a auto resume,
581
    // the bg work should follow this tag.
582
0
    soft_error_no_bg_work_ = false;
583
584
    // In manual resume, if the bg error is a soft error and also requires
585
    // no bg work, the error must be recovered by call the flush with
586
    // flush reason: kErrorRecoveryRetryFlush. In other case, the flush
587
    // reason is set to kErrorRecovery.
588
0
    if (no_bg_work_original_flag) {
589
0
      recover_context_.flush_reason = FlushReason::kErrorRecoveryRetryFlush;
590
0
    } else {
591
0
      recover_context_.flush_reason = FlushReason::kErrorRecovery;
592
0
    }
593
0
  }
594
595
0
  if (bg_error_.severity() == Status::Severity::kSoftError &&
596
0
      recover_context_.flush_reason == FlushReason::kErrorRecovery) {
597
    // Simply clear the background error and return
598
0
    recovery_error_ = IOStatus::OK();
599
0
    return ClearBGError();
600
0
  }
601
602
  // Reset recovery_error_. We will use this to record any errors that happen
603
  // during the recovery process. While recovering, the only operations that
604
  // can generate background errors should be the flush operations
605
0
  recovery_error_ = IOStatus::OK();
606
0
  recovery_error_.PermitUncheckedError();
607
0
  Status s = db_->ResumeImpl(recover_context_);
608
0
  if (s.ok()) {
609
0
    soft_error_no_bg_work_ = false;
610
0
  } else {
611
0
    soft_error_no_bg_work_ = no_bg_work_original_flag;
612
0
  }
613
614
  // For manual recover, shutdown, and fatal error  cases, set
615
  // recovery_in_prog_ to false. For automatic background recovery, leave it
616
  // as is regardless of success or failure as it will be retried
617
0
  if (is_manual || s.IsShutdownInProgress() ||
618
0
      bg_error_.severity() >= Status::Severity::kFatalError) {
619
0
    recovery_in_prog_ = false;
620
0
  }
621
0
  return s;
622
0
}
623
624
void ErrorHandler::StartRecoverFromRetryableBGIOError(
625
0
    const IOStatus& io_error) {
626
0
  db_mutex_->AssertHeld();
627
0
  if (bg_error_.ok() || io_error.ok()) {
628
0
    return;
629
0
  }
630
0
  if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
631
    // Auto resume BG error is not enabled
632
0
    return;
633
0
  }
634
0
  if (end_recovery_) {
635
    // Can temporarily release db mutex
636
0
    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
637
0
                                           Status::ShutdownInProgress(),
638
0
                                           db_mutex_);
639
0
    db_mutex_->AssertHeld();
640
0
    return;
641
0
  }
642
0
  RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
643
0
  ROCKS_LOG_INFO(
644
0
      db_options_.info_log,
645
0
      "ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n");
646
  // Needs to be set in the same lock hold as setting BG error, otherwise
647
  // intervening writes could see a BG error without a recovery and bail out.
648
0
  recovery_in_prog_ = true;
649
650
0
  if (recovery_thread_) {
651
    // Ensure only one thread can execute the join().
652
0
    std::unique_ptr<port::Thread> old_recovery_thread(
653
0
        std::move(recovery_thread_));
654
    // In this case, if recovery_in_prog_ is false, current thread should
655
    // wait the previous recover thread to finish and create a new thread
656
    // to recover from the bg error.
657
0
    db_mutex_->Unlock();
658
0
    TEST_SYNC_POINT(
659
0
        "StartRecoverFromRetryableBGIOError:BeforeWaitingForOtherThread");
660
0
    old_recovery_thread->join();
661
0
    TEST_SYNC_POINT(
662
0
        "StartRecoverFromRetryableBGIOError:AfterWaitingForOtherThread");
663
0
    db_mutex_->Lock();
664
0
  }
665
666
0
  recovery_thread_.reset(
667
0
      new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
668
0
}
669
670
// Automatic recover from Retryable BG IO error. Must be called after db
671
// mutex is released.
672
0
void ErrorHandler::RecoverFromRetryableBGIOError() {
673
0
  assert(recovery_in_prog_);
674
0
  TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart");
675
0
  TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart2");
676
0
  InstrumentedMutexLock l(db_mutex_);
677
0
  if (end_recovery_) {
678
0
    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
679
0
                                           Status::ShutdownInProgress(),
680
0
                                           db_mutex_);
681
682
0
    recovery_in_prog_ = false;
683
0
    return;
684
0
  }
685
0
  DBRecoverContext context = recover_context_;
686
0
  context.flush_after_recovery = true;
687
0
  int resume_count = db_options_.max_bgerror_resume_count;
688
0
  uint64_t wait_interval = db_options_.bgerror_resume_retry_interval;
689
0
  uint64_t retry_count = 0;
690
  // Recover from the retryable error. Create a separate thread to do it.
691
0
  while (resume_count > 0) {
692
0
    if (end_recovery_) {
693
0
      EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
694
0
                                             Status::ShutdownInProgress(),
695
0
                                             db_mutex_);
696
0
      recovery_in_prog_ = false;
697
0
      return;
698
0
    }
699
0
    TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume0");
700
0
    TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume1");
701
0
    recovery_error_ = IOStatus::OK();
702
0
    retry_count++;
703
0
    Status s = db_->ResumeImpl(context);
704
0
    RecordStats({ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT},
705
0
                {} /* int_histograms */);
706
0
    if (s.IsShutdownInProgress() ||
707
0
        bg_error_.severity() >= Status::Severity::kFatalError) {
708
      // If DB shutdown in progress or the error severity is higher than
709
      // Hard Error, stop auto resume and returns.
710
0
      recovery_in_prog_ = false;
711
0
      RecordStats({} /* ticker_types */,
712
0
                  {{ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count}});
713
0
      EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
714
0
                                             bg_error_, db_mutex_);
715
0
      return;
716
0
    }
717
0
    if (!recovery_error_.ok() &&
718
0
        recovery_error_.severity() <= Status::Severity::kHardError &&
719
0
        recovery_error_.GetRetryable()) {
720
      // If new BG IO error happens during auto recovery and it is retryable
721
      // and its severity is Hard Error or lower, the auto resmue sleep for
722
      // a period of time and redo auto resume if it is allowed.
723
0
      TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait0");
724
0
      TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1");
725
0
      int64_t wait_until = db_options_.clock->NowMicros() + wait_interval;
726
0
      cv_.TimedWait(wait_until);
727
0
    } else {
728
      // There are three possibility: 1) recovery_error_ is set during resume
729
      // and the error is not retryable, 2) recover is successful, 3) other
730
      // error happens during resume and cannot be resumed here.
731
0
      if (recovery_error_.ok() && s.ok()) {
732
        // recover from the retryable IO error and no other BG errors. Clean
733
        // the bg_error and notify user.
734
0
        TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess");
735
0
        RecordStats({ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT},
736
0
                    {{ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count}});
737
0
        return;
738
0
      } else {
739
        // In this case: 1) recovery_error_ is more serious or not retryable
740
        // 2) other error happens. The auto recovery stops.
741
0
        recovery_in_prog_ = false;
742
0
        RecordStats({} /* ticker_types */,
743
0
                    {{ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count}});
744
0
        EventHelpers::NotifyOnErrorRecoveryEnd(
745
0
            db_options_.listeners, bg_error_,
746
0
            !recovery_error_.ok() ? recovery_error_ : s, db_mutex_);
747
0
        return;
748
0
      }
749
0
    }
750
0
    resume_count--;
751
0
  }
752
0
  recovery_in_prog_ = false;
753
0
  EventHelpers::NotifyOnErrorRecoveryEnd(
754
0
      db_options_.listeners, bg_error_,
755
0
      Status::Aborted("Exceeded resume retry count"), db_mutex_);
756
0
  TEST_SYNC_POINT("RecoverFromRetryableBGIOError:LoopOut");
757
0
  RecordStats({} /* ticker_types */,
758
0
              {{ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count}});
759
0
}
760
761
0
void ErrorHandler::CheckAndSetRecoveryAndBGError(const Status& bg_err) {
762
0
  if (recovery_in_prog_ && recovery_error_.ok()) {
763
0
    recovery_error_ = status_to_io_status(Status(bg_err));
764
0
  }
765
0
  if (bg_err.severity() > bg_error_.severity()) {
766
0
    bg_error_ = bg_err;
767
0
  }
768
0
  if (bg_error_.severity() >= Status::Severity::kHardError) {
769
0
    is_db_stopped_.store(true, std::memory_order_release);
770
0
  }
771
0
}
772
773
11.0k
void ErrorHandler::EndAutoRecovery() {
774
11.0k
  db_mutex_->AssertHeld();
775
11.0k
  if (!end_recovery_) {
776
11.0k
    end_recovery_ = true;
777
11.0k
  }
778
11.0k
  if (recovery_thread_) {
779
    // Ensure only one thread can execute the join().
780
0
    std::unique_ptr<port::Thread> old_recovery_thread(
781
0
        std::move(recovery_thread_));
782
0
    db_mutex_->Unlock();
783
0
    cv_.SignalAll();
784
0
    old_recovery_thread->join();
785
0
    db_mutex_->Lock();
786
0
  }
787
11.0k
  TEST_SYNC_POINT("PostEndAutoRecovery");
788
11.0k
}
789
790
void ErrorHandler::RecordStats(
791
    const std::vector<Tickers>& ticker_types,
792
0
    const std::vector<std::tuple<Histograms, uint64_t>>& int_histograms) {
793
0
  if (bg_error_stats_ == nullptr) {
794
0
    return;
795
0
  }
796
0
  for (const auto& ticker_type : ticker_types) {
797
0
    RecordTick(bg_error_stats_.get(), ticker_type);
798
0
  }
799
800
0
  for (const auto& hist : int_histograms) {
801
0
    RecordInHistogram(bg_error_stats_.get(), std::get<0>(hist),
802
0
                      std::get<1>(hist));
803
0
  }
804
0
}
805
806
}  // namespace ROCKSDB_NAMESPACE