Coverage Report

Created: 2024-09-08 07:17

/src/rocksdb/util/string_util.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
#include "util/string_util.h"
7
8
#include <algorithm>
9
#include <cerrno>
10
#include <cinttypes>
11
#include <cmath>
12
#include <cstdio>
13
#include <cstdlib>
14
#include <sstream>
15
#include <string>
16
#include <utility>
17
#include <vector>
18
19
#include "port/port.h"
20
#include "port/sys_time.h"
21
#include "rocksdb/slice.h"
22
23
#ifndef __has_cpp_attribute
24
#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) 0
25
#else
26
#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
27
#endif
28
29
#if ROCKSDB_HAS_CPP_ATTRIBUTE(maybe_unused) && __cplusplus >= 201703L
30
#define ROCKSDB_MAYBE_UNUSED [[maybe_unused]]
31
#elif ROCKSDB_HAS_CPP_ATTRIBUTE(gnu::unused) || __GNUC__
32
#define ROCKSDB_MAYBE_UNUSED [[gnu::unused]]
33
#else
34
#define ROCKSDB_MAYBE_UNUSED
35
#endif
36
37
namespace ROCKSDB_NAMESPACE {
38
39
const std::string kNullptrString = "nullptr";
40
41
0
std::vector<std::string> StringSplit(const std::string& arg, char delim) {
42
0
  std::vector<std::string> splits;
43
0
  std::stringstream ss(arg);
44
0
  std::string item;
45
0
  while (std::getline(ss, item, delim)) {
46
0
    splits.push_back(item);
47
0
  }
48
0
  return splits;
49
0
}
50
51
// for micros < 10ms, print "XX us".
52
// for micros < 10sec, print "XX ms".
53
// for micros >= 10 sec, print "XX sec".
54
// for micros <= 1 hour, print Y:X M:S".
55
// for micros > 1 hour, print Z:Y:X H:M:S".
56
int AppendHumanMicros(uint64_t micros, char* output, int len,
57
114
                      bool fixed_format) {
58
114
  if (micros < 10000 && !fixed_format) {
59
0
    return snprintf(output, len, "%" PRIu64 " us", micros);
60
114
  } else if (micros < 10000000 && !fixed_format) {
61
0
    return snprintf(output, len, "%.3lf ms",
62
0
                    static_cast<double>(micros) / 1000);
63
114
  } else if (micros < 1000000l * 60 && !fixed_format) {
64
0
    return snprintf(output, len, "%.3lf sec",
65
0
                    static_cast<double>(micros) / 1000000);
66
114
  } else if (micros < 1000000ll * 60 * 60 && !fixed_format) {
67
0
    return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S",
68
0
                    micros / 1000000 / 60,
69
0
                    static_cast<double>(micros % 60000000) / 1000000);
70
114
  } else {
71
114
    return snprintf(output, len, "%02" PRIu64 ":%02" PRIu64 ":%05.3f H:M:S",
72
114
                    micros / 1000000 / 3600, (micros / 1000000 / 60) % 60,
73
114
                    static_cast<double>(micros % 60000000) / 1000000);
74
114
  }
75
114
}
76
77
// for sizes >=10TB, print "XXTB"
78
// for sizes >=10GB, print "XXGB"
79
// etc.
80
// append file size summary to output and return the len
81
626
int AppendHumanBytes(uint64_t bytes, char* output, int len) {
82
626
  const uint64_t ull10 = 10;
83
626
  if (bytes >= ull10 << 40) {
84
0
    return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40);
85
626
  } else if (bytes >= ull10 << 30) {
86
0
    return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30);
87
626
  } else if (bytes >= ull10 << 20) {
88
0
    return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20);
89
626
  } else if (bytes >= ull10 << 10) {
90
7
    return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10);
91
619
  } else {
92
619
    return snprintf(output, len, "%" PRIu64 "B", bytes);
93
619
  }
94
626
}
95
96
0
void AppendNumberTo(std::string* str, uint64_t num) {
97
0
  char buf[30];
98
0
  snprintf(buf, sizeof(buf), "%" PRIu64, num);
99
0
  str->append(buf);
100
0
}
101
102
0
void AppendEscapedStringTo(std::string* str, const Slice& value) {
103
0
  for (size_t i = 0; i < value.size(); i++) {
104
0
    char c = value[i];
105
0
    if (c >= ' ' && c <= '~') {
106
0
      str->push_back(c);
107
0
    } else {
108
0
      char buf[10];
109
0
      snprintf(buf, sizeof(buf), "\\x%02x",
110
0
               static_cast<unsigned int>(c) & 0xff);
111
0
      str->append(buf);
112
0
    }
113
0
  }
114
0
}
115
116
830
std::string NumberToHumanString(int64_t num) {
117
830
  char buf[21];
118
830
  int64_t absnum;
119
120
830
  if (num < 0) {
121
    // abs(INT64_MIN) is INT64_MAX+1 which overflows int64_t and become itself.
122
    // So we convert it to INT64_MAX to avoid fall into <10000 slot.
123
0
    absnum = num == INT64_MIN ? INT64_MAX : -num;
124
830
  } else {
125
830
    absnum = num;
126
830
  }
127
128
830
  if (absnum < 10000) {
129
830
    snprintf(buf, sizeof(buf), "%" PRIi64, num);
130
830
  } else if (absnum < 10000000) {
131
0
    snprintf(buf, sizeof(buf), "%" PRIi64 "K", num / 1000);
132
0
  } else if (absnum < 10000000000LL) {
133
0
    snprintf(buf, sizeof(buf), "%" PRIi64 "M", num / 1000000);
134
0
  } else {
135
0
    snprintf(buf, sizeof(buf), "%" PRIi64 "G", num / 1000000000);
136
0
  }
137
830
  return std::string(buf);
138
830
}
139
140
313
std::string BytesToHumanString(uint64_t bytes) {
141
313
  const char* size_name[] = {"KB", "MB", "GB", "TB"};
142
313
  double final_size = static_cast<double>(bytes);
143
313
  size_t size_idx;
144
145
  // always start with KB
146
313
  final_size /= 1024;
147
313
  size_idx = 0;
148
149
373
  while (size_idx < 3 && final_size >= 1024) {
150
60
    final_size /= 1024;
151
60
    size_idx++;
152
60
  }
153
154
313
  char buf[20];
155
313
  snprintf(buf, sizeof(buf), "%.2f %s", final_size, size_name[size_idx]);
156
313
  return std::string(buf);
157
313
}
158
159
0
std::string TimeToHumanString(int unixtime) {
160
0
  char time_buffer[80];
161
0
  time_t rawtime = unixtime;
162
0
  struct tm tInfo;
163
0
  struct tm* timeinfo = port::LocalTimeR(&rawtime, &tInfo);
164
0
  assert(timeinfo == &tInfo);
165
0
  strftime(time_buffer, 80, "%c", timeinfo);
166
0
  return std::string(time_buffer);
167
0
}
168
169
0
std::string EscapeString(const Slice& value) {
170
0
  std::string r;
171
0
  AppendEscapedStringTo(&r, value);
172
0
  return r;
173
0
}
174
175
851k
bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
176
851k
  uint64_t v = 0;
177
851k
  int digits = 0;
178
6.65M
  while (!in->empty()) {
179
6.16M
    char c = (*in)[0];
180
6.16M
    if (c >= '0' && c <= '9') {
181
5.80M
      ++digits;
182
5.80M
      const unsigned int delta = (c - '0');
183
5.80M
      static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
184
5.80M
      if (v > kMaxUint64 / 10 ||
185
5.80M
          (v == kMaxUint64 / 10 && delta > kMaxUint64 % 10)) {
186
        // Overflow
187
0
        return false;
188
0
      }
189
5.80M
      v = (v * 10) + delta;
190
5.80M
      in->remove_prefix(1);
191
5.80M
    } else {
192
359k
      break;
193
359k
    }
194
6.16M
  }
195
851k
  *val = v;
196
851k
  return (digits > 0);
197
851k
}
198
199
458k
bool isSpecialChar(const char c) {
200
458k
  if (c == '\\' || c == '#' || c == ':' || c == '\r' || c == '\n') {
201
0
    return true;
202
0
  }
203
458k
  return false;
204
458k
}
205
206
namespace {
207
using CharMap = std::pair<char, char>;
208
}
209
210
0
char UnescapeChar(const char c) {
211
0
  static const CharMap convert_map[] = {{'r', '\r'}, {'n', '\n'}};
212
213
0
  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
214
0
                           [c](const CharMap& p) { return p.first == c; });
215
216
0
  if (iter == std::end(convert_map)) {
217
0
    return c;
218
0
  }
219
0
  return iter->second;
220
0
}
221
222
0
char EscapeChar(const char c) {
223
0
  static const CharMap convert_map[] = {{'\n', 'n'}, {'\r', 'r'}};
224
225
0
  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
226
0
                           [c](const CharMap& p) { return p.first == c; });
227
228
0
  if (iter == std::end(convert_map)) {
229
0
    return c;
230
0
  }
231
0
  return iter->second;
232
0
}
233
234
119k
std::string EscapeOptionString(const std::string& raw_string) {
235
119k
  std::string output;
236
458k
  for (auto c : raw_string) {
237
458k
    if (isSpecialChar(c)) {
238
0
      output += '\\';
239
0
      output += EscapeChar(c);
240
458k
    } else {
241
458k
      output += c;
242
458k
    }
243
458k
  }
244
245
119k
  return output;
246
119k
}
247
248
3.42M
std::string UnescapeOptionString(const std::string& escaped_string) {
249
3.42M
  bool escaped = false;
250
3.42M
  std::string output;
251
252
30.2M
  for (auto c : escaped_string) {
253
30.2M
    if (escaped) {
254
0
      output += UnescapeChar(c);
255
0
      escaped = false;
256
30.2M
    } else {
257
30.2M
      if (c == '\\') {
258
0
        escaped = true;
259
0
        continue;
260
0
      }
261
30.2M
      output += c;
262
30.2M
    }
263
30.2M
  }
264
3.42M
  return output;
265
3.42M
}
266
267
2.13M
std::string trim(const std::string& str) {
268
2.13M
  if (str.empty()) {
269
50.2k
    return std::string();
270
50.2k
  }
271
2.08M
  size_t start = 0;
272
2.08M
  size_t end = str.size() - 1;
273
2.08M
  while (isspace(str[start]) != 0 && start < end) {
274
0
    ++start;
275
0
  }
276
2.08M
  while (isspace(str[end]) != 0 && start < end) {
277
0
    --end;
278
0
  }
279
2.08M
  if (start <= end) {
280
2.08M
    return str.substr(start, end - start + 1);
281
2.08M
  }
282
0
  return std::string();
283
2.08M
}
284
285
200k
bool EndsWith(const std::string& string, const std::string& pattern) {
286
200k
  size_t plen = pattern.size();
287
200k
  size_t slen = string.size();
288
200k
  if (plen <= slen) {
289
200k
    return string.compare(slen - plen, plen, pattern) == 0;
290
200k
  } else {
291
0
    return false;
292
0
  }
293
200k
}
294
295
0
bool StartsWith(const std::string& string, const std::string& pattern) {
296
0
  return string.compare(0, pattern.size(), pattern) == 0;
297
0
}
298
299
300
1.11M
bool ParseBoolean(const std::string& type, const std::string& value) {
301
1.11M
  if (value == "true" || value == "1") {
302
354k
    return true;
303
758k
  } else if (value == "false" || value == "0") {
304
758k
    return false;
305
758k
  }
306
0
  throw std::invalid_argument(type);
307
1.11M
}
308
309
12.5k
uint8_t ParseUint8(const std::string& value) {
310
12.5k
  uint64_t num = ParseUint64(value);
311
12.5k
  if ((num >> 8LL) == 0) {
312
12.5k
    return static_cast<uint8_t>(num);
313
12.5k
  } else {
314
0
    throw std::out_of_range(value);
315
0
  }
316
12.5k
}
317
318
223k
uint32_t ParseUint32(const std::string& value) {
319
223k
  uint64_t num = ParseUint64(value);
320
223k
  if ((num >> 32LL) == 0) {
321
223k
    return static_cast<uint32_t>(num);
322
223k
  } else {
323
0
    throw std::out_of_range(value);
324
0
  }
325
223k
}
326
327
0
int32_t ParseInt32(const std::string& value) {
328
0
  int64_t num = ParseInt64(value);
329
0
  if (num <= std::numeric_limits<int32_t>::max() &&
330
0
      num >= std::numeric_limits<int32_t>::min()) {
331
0
    return static_cast<int32_t>(num);
332
0
  } else {
333
0
    throw std::out_of_range(value);
334
0
  }
335
0
}
336
337
338
983k
uint64_t ParseUint64(const std::string& value) {
339
983k
  size_t endchar;
340
983k
#ifndef CYGWIN
341
983k
  uint64_t num = std::stoull(value.c_str(), &endchar);
342
#else
343
  char* endptr;
344
  uint64_t num = std::strtoul(value.c_str(), &endptr, 0);
345
  endchar = endptr - value.c_str();
346
#endif
347
348
983k
  if (endchar < value.length()) {
349
0
    char c = value[endchar];
350
0
    if (c == 'k' || c == 'K') {
351
0
      num <<= 10LL;
352
0
    } else if (c == 'm' || c == 'M') {
353
0
      num <<= 20LL;
354
0
    } else if (c == 'g' || c == 'G') {
355
0
      num <<= 30LL;
356
0
    } else if (c == 't' || c == 'T') {
357
0
      num <<= 40LL;
358
0
    }
359
0
  }
360
361
983k
  return num;
362
983k
}
363
364
12.5k
int64_t ParseInt64(const std::string& value) {
365
12.5k
  size_t endchar;
366
12.5k
#ifndef CYGWIN
367
12.5k
  int64_t num = std::stoll(value.c_str(), &endchar);
368
#else
369
  char* endptr;
370
  int64_t num = std::strtoll(value.c_str(), &endptr, 0);
371
  endchar = endptr - value.c_str();
372
#endif
373
374
12.5k
  if (endchar < value.length()) {
375
0
    char c = value[endchar];
376
0
    if (c == 'k' || c == 'K') {
377
0
      num <<= 10LL;
378
0
    } else if (c == 'm' || c == 'M') {
379
0
      num <<= 20LL;
380
0
    } else if (c == 'g' || c == 'G') {
381
0
      num <<= 30LL;
382
0
    } else if (c == 't' || c == 'T') {
383
0
      num <<= 40LL;
384
0
    }
385
0
  }
386
387
12.5k
  return num;
388
12.5k
}
389
390
509k
int ParseInt(const std::string& value) {
391
509k
  size_t endchar;
392
509k
#ifndef CYGWIN
393
509k
  int num = std::stoi(value.c_str(), &endchar);
394
#else
395
  char* endptr;
396
  int num = std::strtoul(value.c_str(), &endptr, 0);
397
  endchar = endptr - value.c_str();
398
#endif
399
400
509k
  if (endchar < value.length()) {
401
0
    char c = value[endchar];
402
0
    if (c == 'k' || c == 'K') {
403
0
      num <<= 10;
404
0
    } else if (c == 'm' || c == 'M') {
405
0
      num <<= 20;
406
0
    } else if (c == 'g' || c == 'G') {
407
0
      num <<= 30;
408
0
    }
409
0
  }
410
411
509k
  return num;
412
509k
}
413
414
87.9k
double ParseDouble(const std::string& value) {
415
87.9k
#ifndef CYGWIN
416
87.9k
  return std::stod(value);
417
#else
418
  return std::strtod(value.c_str(), 0);
419
#endif
420
87.9k
}
421
422
268k
size_t ParseSizeT(const std::string& value) {
423
268k
  return static_cast<size_t>(ParseUint64(value));
424
268k
}
425
426
0
std::vector<int> ParseVectorInt(const std::string& value) {
427
0
  std::vector<int> result;
428
0
  size_t start = 0;
429
0
  while (start < value.size()) {
430
0
    size_t end = value.find(':', start);
431
0
    if (end == std::string::npos) {
432
0
      result.push_back(ParseInt(value.substr(start)));
433
0
      break;
434
0
    } else {
435
0
      result.push_back(ParseInt(value.substr(start, end - start)));
436
0
      start = end + 1;
437
0
    }
438
0
  }
439
0
  return result;
440
0
}
441
442
0
bool SerializeIntVector(const std::vector<int>& vec, std::string* value) {
443
0
  *value = "";
444
0
  for (size_t i = 0; i < vec.size(); ++i) {
445
0
    if (i > 0) {
446
0
      *value += ":";
447
0
    }
448
0
    *value += std::to_string(vec[i]);
449
0
  }
450
0
  return true;
451
0
}
452
453
0
int ParseTimeStringToSeconds(const std::string& value) {
454
0
  int hours, minutes;
455
0
  char colon;
456
457
0
  std::istringstream stream(value);
458
0
  stream >> hours >> colon >> minutes;
459
460
0
  if (stream.fail() || !stream.eof() || colon != ':') {
461
0
    return -1;
462
0
  }
463
464
0
  if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59) {
465
0
    return -1;
466
0
  }
467
0
  return hours * 3600 + minutes * 60;
468
0
}
469
470
bool TryParseTimeRangeString(const std::string& value, int& start_time,
471
11.0k
                             int& end_time) {
472
11.0k
  if (value.empty()) {
473
11.0k
    start_time = 0;
474
11.0k
    end_time = 0;
475
11.0k
    return true;
476
11.0k
  }
477
0
  auto split = StringSplit(value, '-');
478
0
  if (split.size() != 2) {
479
0
    return false;
480
0
  }
481
0
  start_time = ParseTimeStringToSeconds(split[0]);
482
0
  if (start_time < 0) {
483
0
    return false;
484
0
  }
485
0
  end_time = ParseTimeStringToSeconds(split[1]);
486
0
  if (end_time < 0) {
487
0
    return false;
488
0
  }
489
0
  return true;
490
0
}
491
492
// Copied from folly/string.cpp:
493
// https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457
494
// There are two variants of `strerror_r` function, one returns
495
// `int`, and another returns `char*`. Selecting proper version using
496
// preprocessor macros portably is extremely hard.
497
//
498
// For example, on Android function signature depends on `__USE_GNU` and
499
// `__ANDROID_API__` macros (https://git.io/fjBBE).
500
//
501
// So we are using C++ overloading trick: we pass a pointer of
502
// `strerror_r` to `invoke_strerror_r` function, and C++ compiler
503
// selects proper function.
504
505
#if !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
506
ROCKSDB_MAYBE_UNUSED
507
static std::string invoke_strerror_r(int (*strerror_r)(int, char*, size_t),
508
0
                                     int err, char* buf, size_t buflen) {
509
0
  // Using XSI-compatible strerror_r
510
0
  int r = strerror_r(err, buf, buflen);
511
0
512
0
  // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero
513
0
  if (r != 0) {
514
0
    snprintf(buf, buflen, "Unknown error %d (strerror_r failed with error %d)",
515
0
             err, errno);
516
0
  }
517
0
  return buf;
518
0
}
519
520
ROCKSDB_MAYBE_UNUSED
521
static std::string invoke_strerror_r(char* (*strerror_r)(int, char*, size_t),
522
0
                                     int err, char* buf, size_t buflen) {
523
  // Using GNU strerror_r
524
0
  return strerror_r(err, buf, buflen);
525
0
}
526
#endif  // !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
527
528
0
std::string errnoStr(int err) {
529
0
  char buf[1024];
530
0
  buf[0] = '\0';
531
532
0
  std::string result;
533
534
  // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html
535
  // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
536
#if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER))
537
  // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added
538
  // as well. So maybe we should use this across all platforms (together
539
  // with strerrorlen_s). Note strerror_r and _s have swapped args.
540
  int r = strerror_s(buf, sizeof(buf), err);
541
  if (r != 0) {
542
    snprintf(buf, sizeof(buf),
543
             "Unknown error %d (strerror_r failed with error %d)", err, errno);
544
  }
545
  result.assign(buf);
546
#else
547
  // Using any strerror_r
548
0
  result.assign(invoke_strerror_r(strerror_r, err, buf, sizeof(buf)));
549
0
#endif
550
551
0
  return result;
552
0
}
553
554
}  // namespace ROCKSDB_NAMESPACE