Coverage Report

Created: 2026-06-22 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/sentencepiece/third_party/darts_clone/darts.h
Line
Count
Source
1
#ifndef DARTS_H_
2
#define DARTS_H_
3
4
#include <cstdio>
5
#include <cstring>
6
#include <exception>
7
#include <new>
8
9
#define DARTS_VERSION "0.32"
10
11
// DARTS_THROW() throws a <Darts::Exception> whose message starts with the
12
// file name and the line number. For example, DARTS_THROW("error message") at
13
// line 123 of "darts.h" throws a <Darts::Exception> which has a pointer to
14
// "darts.h:123: exception: error message". The message is available by using
15
// what() as well as that of <std::exception>.
16
#define DARTS_INT_TO_STR(value) #value
17
#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line)
18
#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__)
19
0
#define DARTS_THROW(msg) throw Darts::Details::Exception( \
20
0
  __FILE__ ":" DARTS_LINE_STR ": exception: " msg)
21
22
namespace Darts {
23
24
// The following namespace hides the internal types and classes.
25
namespace Details {
26
27
// This header assumes that <int> and <unsigned int> are 32-bit integer types.
28
//
29
// Darts-clone keeps values associated with keys. The type of the values is
30
// <value_type>. Note that the values must be positive integers because the
31
// most significant bit (MSB) of each value is used to represent whether the
32
// corresponding unit is a leaf or not. Also, the keys are represented by
33
// sequences of <char_type>s. <uchar_type> is the unsigned type of <char_type>.
34
typedef char char_type;
35
typedef unsigned char uchar_type;
36
typedef int value_type;
37
38
// The main structure of Darts-clone is an array of <DoubleArrayUnit>s, and the
39
// unit type is actually a wrapper of <id_type>.
40
typedef unsigned int id_type;
41
42
// <progress_func_type> is the type of callback functions for reporting the
43
// progress of building a dictionary. See also build() of <DoubleArray>.
44
// The 1st argument receives the progress value and the 2nd argument receives
45
// the maximum progress value. A usage example is to show the progress
46
// percentage, 100.0 * (the 1st argument) / (the 2nd argument).
47
typedef int (*progress_func_type)(std::size_t, std::size_t);
48
49
// <DoubleArrayUnit> is the type of double-array units and it is a wrapper of
50
// <id_type> in practice.
51
class DoubleArrayUnit {
52
 public:
53
44.5M
  DoubleArrayUnit() : unit_() {}
54
55
  // has_leaf() returns whether a leaf unit is immediately derived from the
56
  // unit (true) or not (false).
57
652M
  bool has_leaf() const {
58
652M
    return ((unit_ >> 8) & 1) == 1;
59
652M
  }
60
  // value() returns the value stored in the unit, and thus value() is
61
  // available when and only when the unit is a leaf unit.
62
498M
  value_type value() const {
63
498M
    return static_cast<value_type>(unit_ & ((1U << 31) - 1));
64
498M
  }
65
66
  // label() returns the label associted with the unit. Note that a leaf unit
67
  // always returns an invalid label. For this feature, leaf unit's label()
68
  // returns an <id_type> that has the MSB of 1.
69
1.60G
  id_type label() const {
70
1.60G
    return unit_ & ((1U << 31) | 0xFF);
71
1.60G
  }
72
  // offset() returns the offset from the unit to its derived units.
73
1.37G
  id_type offset() const {
74
1.37G
    return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6);
75
1.37G
  }
76
77
 private:
78
  id_type unit_;
79
80
  // Copyable.
81
};
82
83
// Darts-clone throws an <Exception> for memory allocation failure, invalid
84
// arguments or a too large offset. The last case means that there are too many
85
// keys in the given set of keys. Note that the `msg' of <Exception> must be a
86
// constant or static string because an <Exception> keeps only a pointer to
87
// that string.
88
class Exception : public std::exception {
89
 public:
90
0
  explicit Exception(const char *msg = NULL) throw() : msg_(msg) {}
91
0
  Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {}
92
0
  virtual ~Exception() throw() {}
93
94
  // <Exception> overrides what() of <std::exception>.
95
0
  virtual const char *what() const throw() {
96
0
    return (msg_ != NULL) ? msg_ : "";
97
0
  }
98
99
 private:
100
  const char *msg_;
101
102
  // Disallows operator=.
103
  Exception &operator=(const Exception &);
104
};
105
106
}  // namespace Details
107
108
// <DoubleArrayImpl> is the interface of Darts-clone. Note that other
109
// classes should not be accessed from outside.
110
//
111
// <DoubleArrayImpl> has 4 template arguments but only the 3rd one is used as
112
// the type of values. Note that the given <T> is used only from outside, and
113
// the internal value type is not changed from <Darts::Details::value_type>.
114
// In build(), given values are casted from <T> to <Darts::Details::value_type>
115
// by using static_cast. On the other hand, values are casted from
116
// <Darts::Details::value_type> to <T> in searching dictionaries.
117
template <typename, typename, typename T, typename>
118
class DoubleArrayImpl {
119
 public:
120
  // Even if this <value_type> is changed, the internal value type is still
121
  // <Darts::Details::value_type>. Other types, such as 64-bit integer types
122
  // and floating-point number types, should not be used.
123
  typedef T value_type;
124
  // A key is reprenseted by a sequence of <key_type>s. For example,
125
  // exactMatchSearch() takes a <const key_type *>.
126
  typedef Details::char_type key_type;
127
  // In searching dictionaries, the values associated with the matched keys are
128
  // stored into or returned as <result_type>s.
129
  typedef value_type result_type;
130
131
  // <result_pair_type> enables applications to get the lengths of the matched
132
  // keys in addition to the values.
133
  struct result_pair_type {
134
    value_type value;
135
    std::size_t length;
136
  };
137
138
  // The constructor initializes member variables with 0 and NULLs.
139
74.7k
  DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {}
140
  // The destructor frees memory allocated for units and then initializes
141
  // member variables with 0 and NULLs.
142
74.7k
  virtual ~DoubleArrayImpl() {
143
74.7k
    clear();
144
74.7k
  }
145
146
  // <DoubleArrayImpl> has 2 kinds of set_result()s. The 1st set_result() is to
147
  // set a value to a <value_type>. The 2nd set_result() is to set a value and
148
  // a length to a <result_pair_type>. By using set_result()s, search methods
149
  // can return the 2 kinds of results in the same way.
150
  // Why the set_result()s are non-static? It is for compatibility.
151
  //
152
  // The 1st set_result() takes a length as the 3rd argument but it is not
153
  // used. If a compiler does a good job, codes for getting the length may be
154
  // removed.
155
5.16M
  void set_result(value_type *result, value_type value, std::size_t) const {
156
5.16M
    *result = value;
157
5.16M
  }
158
  // The 2nd set_result() uses both `value' and `length'.
159
  void set_result(result_pair_type *result,
160
234M
      value_type value, std::size_t length) const {
161
234M
    result->value = value;
162
234M
    result->length = length;
163
234M
  }
164
165
  // set_array() calls clear() in order to free memory allocated to the old
166
  // array and then sets a new array. This function is useful to set a memory-
167
  // mapped array. Note that the array set by set_array() is not freed in
168
  // clear() and the destructor of <DoubleArrayImpl>.
169
  // set_array() can also set the size of the new array but the size is not
170
  // used in search methods. So it works well even if the 2nd argument is 0 or
171
  // omitted. Remember that size() and total_size() returns 0 in such a case.
172
17.0k
  void set_array(const void *ptr, std::size_t size = 0) {
173
17.0k
    clear();
174
17.0k
    array_ = static_cast<const unit_type *>(ptr);
175
17.0k
    size_ = size;
176
17.0k
  }
177
0
  void copy_array(const void* ptr, std::size_t num_bytes) {
178
0
    clear();
179
0
    const std::size_t extra_padding = num_bytes % unit_size() == 0 ? 0 : 1;
180
0
    size_ = num_bytes / unit_size() + extra_padding;
181
0
    buf_ = new unit_type[size_];
182
0
    std::memcpy(buf_, ptr, num_bytes);
183
0
    array_ = buf_;
184
0
  }
185
  // array() returns a pointer to the array of units.
186
0
  const void *array() const {
187
0
    return array_;
188
0
  }
189
190
  // clear() frees memory allocated to units and then initializes member
191
  // variables with 0 and NULLs. Note that clear() does not free memory if the
192
  // array of units was set by set_array(). In such a case, `array_' is not
193
  // NULL and `buf_' is NULL.
194
149k
  void clear() {
195
149k
    size_ = 0;
196
149k
    array_ = NULL;
197
149k
    if (buf_ != NULL) {
198
57.7k
      delete[] buf_;
199
57.7k
      buf_ = NULL;
200
57.7k
    }
201
149k
  }
202
203
  // unit_size() returns the size of each unit. The size must be 4 bytes.
204
17.0k
  std::size_t unit_size() const {
205
17.0k
    return sizeof(unit_type);
206
17.0k
  }
207
  // size() returns the number of units. It can be 0 if set_array() is used.
208
0
  std::size_t size() const {
209
0
    return size_;
210
0
  }
211
  // total_size() returns the number of bytes allocated to the array of units.
212
  // It can be 0 if set_array() is used.
213
  std::size_t total_size() const {
214
    return unit_size() * size();
215
  }
216
  // nonzero_size() exists for compatibility. It always returns the number of
217
  // units because it takes long time to count the number of non-zero units.
218
  std::size_t nonzero_size() const {
219
    return size();
220
  }
221
222
  // build() constructs a dictionary from given key-value pairs. If `lengths'
223
  // is NULL, `keys' is handled as an array of zero-terminated strings. If
224
  // `values' is NULL, the index in `keys' is associated with each key, i.e.
225
  // the ith key has (i - 1) as its value.
226
  // Note that the key-value pairs must be arranged in key order and the values
227
  // must not be negative. Also, if there are duplicate keys, only the first
228
  // pair will be stored in the resultant dictionary.
229
  // `progress_func' is a pointer to a callback function. If it is not NULL,
230
  // it will be called in build() so that the caller can check the progress of
231
  // dictionary construction. For details, please see the definition of
232
  // <Darts::Details::progress_func_type>.
233
  // The return value of build() is 0, and it indicates the success of the
234
  // operation. Otherwise, build() throws a <Darts::Exception>, which is a
235
  // derived class of <std::exception>.
236
  // build() uses another construction algorithm if `values' is not NULL. In
237
  // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead
238
  // of a trie because a DAWG is likely to be more compact than a trie.
239
  int build(std::size_t num_keys, const key_type * const *keys,
240
      const std::size_t *lengths = NULL, const value_type *values = NULL,
241
      Details::progress_func_type progress_func = NULL);
242
243
  // open() reads an array of units from the specified file. And if it goes
244
  // well, the old array will be freed and replaced with the new array read
245
  // from the file. `offset' specifies the number of bytes to be skipped before
246
  // reading an array. `size' specifies the number of bytes to be read from the
247
  // file. If the `size' is 0, the whole file will be read.
248
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
249
  // non-zero value or throws a <Darts::Exception>. The exception is thrown
250
  // when and only when a memory allocation fails.
251
  int open(const char *file_name, const char *mode = "rb",
252
      std::size_t offset = 0, std::size_t size = 0);
253
  // save() writes the array of units into the specified file. `offset'
254
  // specifies the number of bytes to be skipped before writing the array.
255
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
256
  // non-zero value.
257
  int save(const char *file_name, const char *mode = "wb",
258
      std::size_t offset = 0) const;
259
260
  // The 1st exactMatchSearch() tests whether the given key exists or not, and
261
  // if it exists, its value and length are set to `result'. Otherwise, the
262
  // value and the length of `result' are set to -1 and 0 respectively.
263
  // Note that if `length' is 0, `key' is handled as a zero-terminated string.
264
  // `node_pos' specifies the start position of matching. This argument enables
265
  // the combination of exactMatchSearch() and traverse(). For example, if you
266
  // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get
267
  // the node position corresponding to "xyz" and then you can use
268
  // exactMatchSearch() to test "A", "BC", and "DE" from that position.
269
  // Note that the length of `result' indicates the length from the `node_pos'.
270
  // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }.
271
  template <class U>
272
  void exactMatchSearch(const key_type *key, U &result,
273
2.74M
      std::size_t length = 0, std::size_t node_pos = 0) const {
274
2.74M
    result = exactMatchSearch<U>(key, length, node_pos);
275
2.74M
  }
276
  // The 2nd exactMatchSearch() returns a result instead of updating the 2nd
277
  // argument. So, the following exactMatchSearch() has only 3 arguments.
278
  template <class U>
279
  inline U exactMatchSearch(const key_type *key, std::size_t length = 0,
280
      std::size_t node_pos = 0) const;
281
282
  // commonPrefixSearch() searches for keys which match a prefix of the given
283
  // string. If `length' is 0, `key' is handled as a zero-terminated string.
284
  // The values and the lengths of at most `max_num_results' matched keys are
285
  // stored in `results'. commonPrefixSearch() returns the number of matched
286
  // keys. Note that the return value can be larger than `max_num_results' if
287
  // there are more than `max_num_results' matches. If you want to get all the
288
  // results, allocate more spaces and call commonPrefixSearch() again.
289
  // `node_pos' works as well as in exactMatchSearch().
290
  template <class U>
291
  inline std::size_t commonPrefixSearch(const key_type *key, U *results,
292
      std::size_t max_num_results, std::size_t length = 0,
293
      std::size_t node_pos = 0) const;
294
295
  // In Darts-clone, a dictionary is a deterministic finite-state automaton
296
  // (DFA) and traverse() tests transitions on the DFA. The initial state is
297
  // `node_pos' and traverse() chooses transitions labeled key[key_pos],
298
  // key[key_pos + 1], ... in order. If there is not a transition labeled
299
  // key[key_pos + i], traverse() terminates the transitions at that state and
300
  // returns -2. Otherwise, traverse() ends without a termination and returns
301
  // -1 or a nonnegative value, -1 indicates that the final state was not an
302
  // accept state. When a nonnegative value is returned, it is the value
303
  // associated with the final accept state. That is, traverse() returns the
304
  // value associated with the given key if it exists. Note that traverse()
305
  // updates `node_pos' and `key_pos' after each transition.
306
  inline value_type traverse(const key_type *key, std::size_t &node_pos,
307
      std::size_t &key_pos, std::size_t length = 0) const;
308
309
  bool validate(value_type max_value_limit = -1) const;
310
311
 private:
312
  typedef Details::uchar_type uchar_type;
313
  typedef Details::id_type id_type;
314
  typedef Details::DoubleArrayUnit unit_type;
315
316
  std::size_t size_;
317
  const unit_type *array_;
318
  unit_type *buf_;
319
320
  // Disallows copy and assignment.
321
  DoubleArrayImpl(const DoubleArrayImpl &);
322
  DoubleArrayImpl &operator=(const DoubleArrayImpl &);
323
};
324
325
// <DoubleArray> is the typical instance of <DoubleArrayImpl>. It uses <int>
326
// as the type of values and it is suitable for most cases.
327
typedef DoubleArrayImpl<void, void, int, void> DoubleArray;
328
329
// The interface section ends here. For using Darts-clone, there is no need
330
// to read the remaining section, which gives the implementation of
331
// Darts-clone.
332
333
//
334
// Member functions of DoubleArrayImpl (except build()).
335
//
336
337
template <typename A, typename B, typename T, typename C>
338
int DoubleArrayImpl<A, B, T, C>::open(const char *file_name,
339
    const char *mode, std::size_t offset, std::size_t size) {
340
#ifdef _MSC_VER
341
  std::FILE *file;
342
  if (::fopen_s(&file, file_name, mode) != 0) {
343
    return -1;
344
  }
345
#else
346
  std::FILE *file = std::fopen(file_name, mode);
347
  if (file == NULL) {
348
    return -1;
349
  }
350
#endif
351
352
  if (size == 0) {
353
    if (std::fseek(file, 0, SEEK_END) != 0) {
354
      std::fclose(file);
355
      return -1;
356
    }
357
    size = std::ftell(file) - offset;
358
  }
359
360
  size /= unit_size();
361
  if (size < 256 || (size & 0xFF) != 0) {
362
    std::fclose(file);
363
    return -1;
364
  }
365
366
  if (std::fseek(file, offset, SEEK_SET) != 0) {
367
    std::fclose(file);
368
    return -1;
369
  }
370
371
  unit_type units[256];
372
  if (std::fread(units, unit_size(), 256, file) != 256) {
373
    std::fclose(file);
374
    return -1;
375
  }
376
377
  if (units[0].label() != '\0' || units[0].has_leaf() ||
378
      units[0].offset() == 0 || units[0].offset() >= 512) {
379
    std::fclose(file);
380
    return -1;
381
  }
382
  for (id_type i = 1; i < 256; ++i) {
383
    if (units[i].label() <= 0xFF && units[i].offset() >= size) {
384
      std::fclose(file);
385
      return -1;
386
    }
387
  }
388
389
  unit_type *buf;
390
  try {
391
    buf = new unit_type[size];
392
    for (id_type i = 0; i < 256; ++i) {
393
      buf[i] = units[i];
394
    }
395
  } catch (const std::bad_alloc &) {
396
    std::fclose(file);
397
    DARTS_THROW("failed to open double-array: std::bad_alloc");
398
  }
399
400
  if (size > 256) {
401
    if (std::fread(buf + 256, unit_size(), size - 256, file) != size - 256) {
402
      std::fclose(file);
403
      delete[] buf;
404
      return -1;
405
    }
406
  }
407
  std::fclose(file);
408
409
  clear();
410
411
  size_ = size;
412
  array_ = buf;
413
  buf_ = buf;
414
  return 0;
415
}
416
417
template <typename A, typename B, typename T, typename C>
418
int DoubleArrayImpl<A, B, T, C>::save(const char *file_name,
419
    const char *mode, std::size_t) const {
420
  if (size() == 0) {
421
    return -1;
422
  }
423
424
#ifdef _MSC_VER
425
  std::FILE *file;
426
  if (::fopen_s(&file, file_name, mode) != 0) {
427
    return -1;
428
  }
429
#else
430
  std::FILE *file = std::fopen(file_name, mode);
431
  if (file == NULL) {
432
    return -1;
433
  }
434
#endif
435
436
  if (std::fwrite(array_, unit_size(), size(), file) != size()) {
437
    std::fclose(file);
438
    return -1;
439
  }
440
  std::fclose(file);
441
  return 0;
442
}
443
444
template <typename A, typename B, typename T, typename C>
445
17.0k
bool DoubleArrayImpl<A, B, T, C>::validate(value_type max_value_limit) const {
446
17.0k
  if (size_ == 0 || array_ == NULL) return false;
447
17.0k
  if (array_[0].label() != '\0' || array_[0].has_leaf() ||
448
17.0k
      array_[0].offset() == 0 || ((0 ^ array_[0].offset()) | 0xFF) >= size_) {
449
0
    return false;
450
0
  }
451
765M
  for (std::size_t i = 1; i < size_; ++i) {
452
765M
    if (array_[i].label() <= 0xFF) {
453
510M
      if (((i ^ array_[i].offset()) | 0xFF) >= size_) {
454
0
        return false;
455
0
      }
456
510M
    } else {
457
255M
      if (max_value_limit >= 0) {
458
255M
        if (array_[i].value() >= max_value_limit) {
459
0
          return false;
460
0
        }
461
255M
      }
462
255M
    }
463
765M
  }
464
17.0k
  return true;
465
17.0k
}
466
467
template <typename A, typename B, typename T, typename C>
468
template <typename U>
469
inline U DoubleArrayImpl<A, B, T, C>::exactMatchSearch(const key_type *key,
470
2.74M
    std::size_t length, std::size_t node_pos) const {
471
2.74M
  U result;
472
2.74M
  set_result(&result, static_cast<value_type>(-1), 0);
473
474
2.74M
  unit_type unit = array_[node_pos];
475
2.74M
  if (length != 0) {
476
7.08M
    for (std::size_t i = 0; i < length; ++i) {
477
4.66M
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[i]);
478
4.66M
      unit = array_[node_pos];
479
4.66M
      if (unit.label() != static_cast<uchar_type>(key[i])) {
480
327k
        return result;
481
327k
      }
482
4.66M
    }
483
2.74M
  } else {
484
375
    for ( ; key[length] != '\0'; ++length) {
485
2
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[length]);
486
2
      unit = array_[node_pos];
487
2
      if (unit.label() != static_cast<uchar_type>(key[length])) {
488
2
        return result;
489
2
      }
490
2
    }
491
375
  }
492
493
2.41M
  if (!unit.has_leaf()) {
494
871
    return result;
495
871
  }
496
2.41M
  unit = array_[node_pos ^ unit.offset()];
497
2.41M
  set_result(&result, static_cast<value_type>(unit.value()), length);
498
2.41M
  return result;
499
2.41M
}
500
501
template <typename A, typename B, typename T, typename C>
502
template <typename U>
503
inline std::size_t DoubleArrayImpl<A, B, T, C>::commonPrefixSearch(
504
    const key_type *key, U *results, std::size_t max_num_results,
505
155M
    std::size_t length, std::size_t node_pos) const {
506
155M
  std::size_t num_results = 0;
507
508
155M
  unit_type unit = array_[node_pos];
509
155M
  node_pos ^= unit.offset();
510
155M
  if (length != 0) {
511
793M
    for (std::size_t i = 0; i < length; ++i) {
512
774M
      node_pos ^= static_cast<uchar_type>(key[i]);
513
774M
      unit = array_[node_pos];
514
774M
      if (unit.label() != static_cast<uchar_type>(key[i])) {
515
135M
        return num_results;
516
135M
      }
517
518
638M
      node_pos ^= unit.offset();
519
638M
      if (unit.has_leaf()) {
520
234M
        if (num_results < max_num_results) {
521
234M
          set_result(&results[num_results], static_cast<value_type>(
522
234M
              array_[node_pos].value()), i + 1);
523
234M
        }
524
234M
        ++num_results;
525
234M
      }
526
638M
    }
527
155M
  } else {
528
0
    for ( ; key[length] != '\0'; ++length) {
529
0
      node_pos ^= static_cast<uchar_type>(key[length]);
530
0
      unit = array_[node_pos];
531
0
      if (unit.label() != static_cast<uchar_type>(key[length])) {
532
0
        return num_results;
533
0
      }
534
535
0
      node_pos ^= unit.offset();
536
0
      if (unit.has_leaf()) {
537
0
        if (num_results < max_num_results) {
538
0
          set_result(&results[num_results], static_cast<value_type>(
539
0
              array_[node_pos].value()), length + 1);
540
0
        }
541
0
        ++num_results;
542
0
      }
543
0
    }
544
0
  }
545
546
19.4M
  return num_results;
547
155M
}
548
549
template <typename A, typename B, typename T, typename C>
550
inline typename DoubleArrayImpl<A, B, T, C>::value_type
551
DoubleArrayImpl<A, B, T, C>::traverse(const key_type *key,
552
61.2M
    std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const {
553
61.2M
  id_type id = static_cast<id_type>(node_pos);
554
61.2M
  unit_type unit = array_[id];
555
556
61.2M
  if (length != 0) {
557
73.1M
    for ( ; key_pos < length; ++key_pos) {
558
61.2M
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
559
61.2M
      unit = array_[id];
560
61.2M
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
561
49.3M
        return static_cast<value_type>(-2);
562
49.3M
      }
563
11.9M
      node_pos = id;
564
11.9M
    }
565
61.2M
  } else {
566
0
    for ( ; key[key_pos] != '\0'; ++key_pos) {
567
0
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
568
0
      unit = array_[id];
569
0
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
570
0
        return static_cast<value_type>(-2);
571
0
      }
572
0
      node_pos = id;
573
0
    }
574
0
  }
575
576
11.9M
  if (!unit.has_leaf()) {
577
5.79M
    return static_cast<value_type>(-1);
578
5.79M
  }
579
6.12M
  unit = array_[id ^ unit.offset()];
580
6.12M
  return static_cast<value_type>(unit.value());
581
11.9M
}
582
583
namespace Details {
584
585
//
586
// Memory management of array.
587
//
588
589
template <typename T>
590
class AutoArray {
591
 public:
592
5.96M
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<char>::AutoArray(char*)
Line
Count
Source
592
5.64M
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::AutoArray(Darts::Details::DoubleArrayBuilderExtraUnit*)
Line
Count
Source
592
115k
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<unsigned int>::AutoArray(unsigned int*)
Line
Count
Source
592
203k
  explicit AutoArray(T *array = NULL) : array_(array) {}
593
5.96M
  ~AutoArray() {
594
5.96M
    clear();
595
5.96M
  }
Darts::Details::AutoArray<char>::~AutoArray()
Line
Count
Source
593
5.64M
  ~AutoArray() {
594
5.64M
    clear();
595
5.64M
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::~AutoArray()
Line
Count
Source
593
115k
  ~AutoArray() {
594
115k
    clear();
595
115k
  }
Darts::Details::AutoArray<unsigned int>::~AutoArray()
Line
Count
Source
593
203k
  ~AutoArray() {
594
203k
    clear();
595
203k
  }
596
597
1.41G
  const T &operator[](std::size_t id) const {
598
1.41G
    return array_[id];
599
1.41G
  }
Darts::Details::AutoArray<char>::operator[](unsigned long) const
Line
Count
Source
597
974M
  const T &operator[](std::size_t id) const {
598
974M
    return array_[id];
599
974M
  }
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::operator[](unsigned long) const
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long) const
Line
Count
Source
597
436M
  const T &operator[](std::size_t id) const {
598
436M
    return array_[id];
599
436M
  }
600
1.96G
  T &operator[](std::size_t id) {
601
1.96G
    return array_[id];
602
1.96G
  }
Darts::Details::AutoArray<char>::operator[](unsigned long)
Line
Count
Source
600
1.46G
  T &operator[](std::size_t id) {
601
1.46G
    return array_[id];
602
1.46G
  }
Darts::Details::AutoArray<unsigned int>::operator[](unsigned long)
Line
Count
Source
600
1.02M
  T &operator[](std::size_t id) {
601
1.02M
    return array_[id];
602
1.02M
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long)
Line
Count
Source
600
495M
  T &operator[](std::size_t id) {
601
495M
    return array_[id];
602
495M
  }
603
604
  bool empty() const {
605
    return array_ == NULL;
606
  }
607
608
8.00M
  void clear() {
609
8.00M
    if (array_ != NULL) {
610
2.74M
      delete[] array_;
611
2.74M
      array_ = NULL;
612
2.74M
    }
613
8.00M
  }
Darts::Details::AutoArray<char>::clear()
Line
Count
Source
608
7.31M
  void clear() {
609
7.31M
    if (array_ != NULL) {
610
2.59M
      delete[] array_;
611
      array_ = NULL;
612
2.59M
    }
613
7.31M
  }
Darts::Details::AutoArray<unsigned int>::clear()
Line
Count
Source
608
456k
  void clear() {
609
456k
    if (array_ != NULL) {
610
97.4k
      delete[] array_;
611
      array_ = NULL;
612
97.4k
    }
613
456k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::clear()
Line
Count
Source
608
230k
  void clear() {
609
230k
    if (array_ != NULL) {
610
57.7k
      delete[] array_;
611
      array_ = NULL;
612
57.7k
    }
613
230k
  }
614
5.34M
  void swap(AutoArray *array) {
615
5.34M
    T *temp = array_;
616
5.34M
    array_ = array->array_;
617
5.34M
    array->array_ = temp;
618
5.34M
  }
Darts::Details::AutoArray<char>::swap(Darts::Details::AutoArray<char>*)
Line
Count
Source
614
5.18M
  void swap(AutoArray *array) {
615
5.18M
    T *temp = array_;
616
5.18M
    array_ = array->array_;
617
5.18M
    array->array_ = temp;
618
5.18M
  }
Darts::Details::AutoArray<unsigned int>::swap(Darts::Details::AutoArray<unsigned int>*)
Line
Count
Source
614
97.4k
  void swap(AutoArray *array) {
615
97.4k
    T *temp = array_;
616
97.4k
    array_ = array->array_;
617
97.4k
    array->array_ = temp;
618
97.4k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::swap(Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>*)
Line
Count
Source
614
57.7k
  void swap(AutoArray *array) {
615
57.7k
    T *temp = array_;
616
57.7k
    array_ = array->array_;
617
57.7k
    array->array_ = temp;
618
57.7k
  }
619
2.74M
  void reset(T *array = NULL) {
620
2.74M
    AutoArray(array).swap(this);
621
2.74M
  }
Darts::Details::AutoArray<char>::reset(char*)
Line
Count
Source
619
2.59M
  void reset(T *array = NULL) {
620
2.59M
    AutoArray(array).swap(this);
621
2.59M
  }
Darts::Details::AutoArray<unsigned int>::reset(unsigned int*)
Line
Count
Source
619
97.4k
  void reset(T *array = NULL) {
620
97.4k
    AutoArray(array).swap(this);
621
97.4k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::reset(Darts::Details::DoubleArrayBuilderExtraUnit*)
Line
Count
Source
619
57.7k
  void reset(T *array = NULL) {
620
57.7k
    AutoArray(array).swap(this);
621
57.7k
  }
622
623
 private:
624
  T *array_;
625
626
  // Disallows copy and assignment.
627
  AutoArray(const AutoArray &);
628
  AutoArray &operator=(const AutoArray &);
629
};
630
631
//
632
// Memory management of resizable array.
633
//
634
635
template <typename T>
636
class AutoPool {
637
 public:
638
456k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::AutoPool()
Line
Count
Source
638
57.7k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<unsigned char>::AutoPool()
Line
Count
Source
638
106k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DawgNode>::AutoPool()
Line
Count
Source
638
48.7k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DawgUnit>::AutoPool()
Line
Count
Source
638
48.7k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<unsigned int>::AutoPool()
Line
Count
Source
638
194k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
639
456k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<unsigned char>::~AutoPool()
Line
Count
Source
639
106k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::~AutoPool()
Line
Count
Source
639
57.7k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<unsigned int>::~AutoPool()
Line
Count
Source
639
194k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::~AutoPool()
Line
Count
Source
639
48.7k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DawgNode>::~AutoPool()
Line
Count
Source
639
48.7k
  ~AutoPool() { clear(); }
640
641
974M
  const T &operator[](std::size_t id) const {
642
974M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
974M
  }
Darts::Details::AutoPool<unsigned int>::operator[](unsigned long) const
Line
Count
Source
641
127M
  const T &operator[](std::size_t id) const {
642
127M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
127M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long) const
Line
Count
Source
641
276M
  const T &operator[](std::size_t id) const {
642
276M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
276M
  }
Darts::Details::AutoPool<unsigned char>::operator[](unsigned long) const
Line
Count
Source
641
370M
  const T &operator[](std::size_t id) const {
642
370M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
370M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long) const
Line
Count
Source
641
155M
  const T &operator[](std::size_t id) const {
642
155M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
155M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long) const
Line
Count
Source
641
44.5M
  const T &operator[](std::size_t id) const {
642
44.5M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
643
44.5M
  }
644
1.46G
  T &operator[](std::size_t id) {
645
1.46G
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
1.46G
  }
Darts::Details::AutoPool<unsigned int>::operator[](unsigned long)
Line
Count
Source
644
468M
  T &operator[](std::size_t id) {
645
468M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
468M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long)
Line
Count
Source
644
479M
  T &operator[](std::size_t id) {
645
479M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
479M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long)
Line
Count
Source
644
111M
  T &operator[](std::size_t id) {
645
111M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
111M
  }
Darts::Details::AutoPool<unsigned char>::operator[](unsigned long)
Line
Count
Source
644
236M
  T &operator[](std::size_t id) {
645
236M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
236M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long)
Line
Count
Source
644
165M
  T &operator[](std::size_t id) {
645
165M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
646
165M
  }
647
648
32.1M
  bool empty() const {
649
32.1M
    return size_ == 0;
650
32.1M
  }
Darts::Details::AutoPool<unsigned int>::empty() const
Line
Count
Source
648
31.8M
  bool empty() const {
649
31.8M
    return size_ == 0;
650
31.8M
  }
Darts::Details::AutoPool<unsigned char>::empty() const
Line
Count
Source
648
252k
  bool empty() const {
649
252k
    return size_ == 0;
650
252k
  }
651
506M
  std::size_t size() const {
652
506M
    return size_;
653
506M
  }
Darts::Details::AutoPool<unsigned int>::size() const
Line
Count
Source
651
238M
  std::size_t size() const {
652
238M
    return size_;
653
238M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::size() const
Line
Count
Source
651
20.8M
  std::size_t size() const {
652
20.8M
    return size_;
653
20.8M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::size() const
Line
Count
Source
651
3.15M
  std::size_t size() const {
652
3.15M
    return size_;
653
3.15M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::size() const
Line
Count
Source
651
114M
  std::size_t size() const {
652
114M
    return size_;
653
114M
  }
Darts::Details::AutoPool<unsigned char>::size() const
Line
Count
Source
651
129M
  std::size_t size() const {
652
129M
    return size_;
653
129M
  }
654
655
1.66M
  void clear() {
656
1.66M
    resize(0);
657
1.66M
    buf_.clear();
658
1.66M
    size_ = 0;
659
1.66M
    capacity_ = 0;
660
1.66M
  }
Darts::Details::AutoPool<unsigned int>::clear()
Line
Count
Source
655
891k
  void clear() {
656
891k
    resize(0);
657
891k
    buf_.clear();
658
891k
    size_ = 0;
659
891k
    capacity_ = 0;
660
891k
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::clear()
Line
Count
Source
655
194k
  void clear() {
656
194k
    resize(0);
657
194k
    buf_.clear();
658
194k
    size_ = 0;
659
194k
    capacity_ = 0;
660
194k
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::clear()
Line
Count
Source
655
146k
  void clear() {
656
146k
    resize(0);
657
146k
    buf_.clear();
658
146k
    size_ = 0;
659
146k
    capacity_ = 0;
660
146k
  }
Darts::Details::AutoPool<unsigned char>::clear()
Line
Count
Source
655
319k
  void clear() {
656
319k
    resize(0);
657
319k
    buf_.clear();
658
319k
    size_ = 0;
659
319k
    capacity_ = 0;
660
319k
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::clear()
Line
Count
Source
655
115k
  void clear() {
656
115k
    resize(0);
657
115k
    buf_.clear();
658
115k
    size_ = 0;
659
115k
    capacity_ = 0;
660
115k
  }
661
662
63.7M
  void push_back(const T &value) {
663
63.7M
    append(value);
664
63.7M
  }
665
60.6M
  void pop_back() {
666
60.6M
    (*this)[--size_].~T();
667
60.6M
  }
668
669
66.9M
  void append() {
670
66.9M
    if (size_ == capacity_)
671
1.27M
      resize_buf(size_ + 1);
672
66.9M
    new(&(*this)[size_++]) T;
673
66.9M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::append()
Line
Count
Source
669
31.8M
  void append() {
670
31.8M
    if (size_ == capacity_)
671
465k
      resize_buf(size_ + 1);
672
31.8M
    new(&(*this)[size_++]) T;
673
31.8M
  }
Darts::Details::AutoPool<unsigned char>::append()
Line
Count
Source
669
31.8M
  void append() {
670
31.8M
    if (size_ == capacity_)
671
465k
      resize_buf(size_ + 1);
672
31.8M
    new(&(*this)[size_++]) T;
673
31.8M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::append()
Line
Count
Source
669
3.15M
  void append() {
670
3.15M
    if (size_ == capacity_)
671
344k
      resize_buf(size_ + 1);
672
3.15M
    new(&(*this)[size_++]) T;
673
3.15M
  }
674
96.7M
  void append(const T &value) {
675
96.7M
    if (size_ == capacity_)
676
1.16M
      resize_buf(size_ + 1);
677
96.7M
    new(&(*this)[size_++]) T(value);
678
96.7M
  }
Darts::Details::AutoPool<unsigned int>::append(unsigned int const&)
Line
Count
Source
674
64.7M
  void append(const T &value) {
675
64.7M
    if (size_ == capacity_)
676
860k
      resize_buf(size_ + 1);
677
64.7M
    new(&(*this)[size_++]) T(value);
678
64.7M
  }
Darts::Details::AutoPool<unsigned char>::append(unsigned char const&)
Line
Count
Source
674
32.0M
  void append(const T &value) {
675
32.0M
    if (size_ == capacity_)
676
305k
      resize_buf(size_ + 1);
677
32.0M
    new(&(*this)[size_++]) T(value);
678
32.0M
  }
679
680
26.4M
  void resize(std::size_t size) {
681
267M
    while (size_ > size) {
682
241M
      (*this)[--size_].~T();
683
241M
    }
684
26.4M
    if (size > capacity_) {
685
32.1k
      resize_buf(size);
686
32.1k
    }
687
70.9M
    while (size_ < size) {
688
44.5M
      new(&(*this)[size_++]) T;
689
44.5M
    }
690
26.4M
  }
Darts::Details::AutoPool<unsigned int>::resize(unsigned long)
Line
Count
Source
680
891k
  void resize(std::size_t size) {
681
98.6M
    while (size_ > size) {
682
97.8M
      (*this)[--size_].~T();
683
97.8M
    }
684
891k
    if (size > capacity_) {
685
0
      resize_buf(size);
686
0
    }
687
891k
    while (size_ < size) {
688
0
      new(&(*this)[size_++]) T;
689
0
    }
690
891k
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::resize(unsigned long)
Line
Count
Source
680
194k
  void resize(std::size_t size) {
681
3.35M
    while (size_ > size) {
682
3.15M
      (*this)[--size_].~T();
683
3.15M
    }
684
194k
    if (size > capacity_) {
685
0
      resize_buf(size);
686
0
    }
687
194k
    while (size_ < size) {
688
0
      new(&(*this)[size_++]) T;
689
0
    }
690
194k
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize(unsigned long)
Line
Count
Source
680
146k
  void resize(std::size_t size) {
681
32.0M
    while (size_ > size) {
682
31.8M
      (*this)[--size_].~T();
683
31.8M
    }
684
146k
    if (size > capacity_) {
685
0
      resize_buf(size);
686
0
    }
687
146k
    while (size_ < size) {
688
0
      new(&(*this)[size_++]) T;
689
0
    }
690
146k
  }
Darts::Details::AutoPool<unsigned char>::resize(unsigned long)
Line
Count
Source
680
24.9M
  void resize(std::size_t size) {
681
88.7M
    while (size_ > size) {
682
63.8M
      (*this)[--size_].~T();
683
63.8M
    }
684
24.9M
    if (size > capacity_) {
685
0
      resize_buf(size);
686
0
    }
687
24.9M
    while (size_ < size) {
688
0
      new(&(*this)[size_++]) T;
689
0
    }
690
24.9M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize(unsigned long)
Line
Count
Source
680
289k
  void resize(std::size_t size) {
681
44.8M
    while (size_ > size) {
682
44.5M
      (*this)[--size_].~T();
683
44.5M
    }
684
289k
    if (size > capacity_) {
685
32.1k
      resize_buf(size);
686
32.1k
    }
687
44.8M
    while (size_ < size) {
688
44.5M
      new(&(*this)[size_++]) T;
689
44.5M
    }
690
289k
  }
691
62.4k
  void resize(std::size_t size, const T &value) {
692
62.4k
    while (size_ > size) {
693
0
      (*this)[--size_].~T();
694
0
    }
695
62.4k
    if (size > capacity_) {
696
62.4k
      resize_buf(size);
697
62.4k
    }
698
93.7M
    while (size_ < size) {
699
93.6M
      new(&(*this)[size_++]) T(value);
700
93.6M
    }
701
62.4k
  }
702
703
57.7k
  void reserve(std::size_t size) {
704
57.7k
    if (size > capacity_) {
705
57.7k
      resize_buf(size);
706
57.7k
    }
707
57.7k
  }
708
709
 private:
710
  AutoArray<char> buf_;
711
  std::size_t size_;
712
  std::size_t capacity_;
713
714
  // Disallows copy and assignment.
715
  AutoPool(const AutoPool &);
716
  AutoPool &operator=(const AutoPool &);
717
718
  void resize_buf(std::size_t size);
719
};
720
721
template <typename T>
722
2.59M
void AutoPool<T>::resize_buf(std::size_t size) {
723
2.59M
  std::size_t capacity;
724
2.59M
  if (size >= capacity_ * 2) {
725
843k
    capacity = size;
726
1.75M
  } else {
727
1.75M
    capacity = 1;
728
9.82M
    while (capacity < size) {
729
8.07M
      capacity <<= 1;
730
8.07M
    }
731
1.75M
  }
732
733
2.59M
  AutoArray<char> buf;
734
2.59M
  try {
735
2.59M
    buf.reset(new char[sizeof(T) * capacity]);
736
2.59M
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
2.59M
  if (size_ > 0) {
741
2.10M
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
2.10M
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
112M
    for (std::size_t i = 0; i < size_; ++i) {
744
110M
      new(&dest[i]) T(src[i]);
745
110M
      src[i].~T();
746
110M
    }
747
2.10M
  }
748
749
2.59M
  buf_.swap(&buf);
750
2.59M
  capacity_ = capacity;
751
2.59M
}
Darts::Details::AutoPool<unsigned int>::resize_buf(unsigned long)
Line
Count
Source
722
922k
void AutoPool<T>::resize_buf(std::size_t size) {
723
922k
  std::size_t capacity;
724
922k
  if (size >= capacity_ * 2) {
725
350k
    capacity = size;
726
572k
  } else {
727
572k
    capacity = 1;
728
2.76M
    while (capacity < size) {
729
2.19M
      capacity <<= 1;
730
2.19M
    }
731
572k
  }
732
733
922k
  AutoArray<char> buf;
734
922k
  try {
735
922k
    buf.reset(new char[sizeof(T) * capacity]);
736
922k
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
922k
  if (size_ > 0) {
741
714k
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
714k
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
8.34M
    for (std::size_t i = 0; i < size_; ++i) {
744
7.63M
      new(&dest[i]) T(src[i]);
745
7.63M
      src[i].~T();
746
7.63M
    }
747
714k
  }
748
749
922k
  buf_.swap(&buf);
750
922k
  capacity_ = capacity;
751
922k
}
Darts::Details::AutoPool<Darts::Details::DawgNode>::resize_buf(unsigned long)
Line
Count
Source
722
344k
void AutoPool<T>::resize_buf(std::size_t size) {
723
344k
  std::size_t capacity;
724
344k
  if (size >= capacity_ * 2) {
725
97.4k
    capacity = size;
726
247k
  } else {
727
247k
    capacity = 1;
728
1.28M
    while (capacity < size) {
729
1.03M
      capacity <<= 1;
730
1.03M
    }
731
247k
  }
732
733
344k
  AutoArray<char> buf;
734
344k
  try {
735
344k
    buf.reset(new char[sizeof(T) * capacity]);
736
344k
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
344k
  if (size_ > 0) {
741
296k
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
296k
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
4.70M
    for (std::size_t i = 0; i < size_; ++i) {
744
4.40M
      new(&dest[i]) T(src[i]);
745
4.40M
      src[i].~T();
746
4.40M
    }
747
296k
  }
748
749
344k
  buf_.swap(&buf);
750
344k
  capacity_ = capacity;
751
344k
}
Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize_buf(unsigned long)
Line
Count
Source
722
465k
void AutoPool<T>::resize_buf(std::size_t size) {
723
465k
  std::size_t capacity;
724
465k
  if (size >= capacity_ * 2) {
725
97.4k
    capacity = size;
726
368k
  } else {
727
368k
    capacity = 1;
728
2.42M
    while (capacity < size) {
729
2.05M
      capacity <<= 1;
730
2.05M
    }
731
368k
  }
732
733
465k
  AutoArray<char> buf;
734
465k
  try {
735
465k
    buf.reset(new char[sizeof(T) * capacity]);
736
465k
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
465k
  if (size_ > 0) {
741
417k
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
417k
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
46.2M
    for (std::size_t i = 0; i < size_; ++i) {
744
45.8M
      new(&dest[i]) T(src[i]);
745
45.8M
      src[i].~T();
746
45.8M
    }
747
417k
  }
748
749
465k
  buf_.swap(&buf);
750
465k
  capacity_ = capacity;
751
465k
}
Darts::Details::AutoPool<unsigned char>::resize_buf(unsigned long)
Line
Count
Source
722
771k
void AutoPool<T>::resize_buf(std::size_t size) {
723
771k
  std::size_t capacity;
724
771k
  if (size >= capacity_ * 2) {
725
211k
    capacity = size;
726
559k
  } else {
727
559k
    capacity = 1;
728
3.30M
    while (capacity < size) {
729
2.74M
      capacity <<= 1;
730
2.74M
    }
731
559k
  }
732
733
771k
  AutoArray<char> buf;
734
771k
  try {
735
771k
    buf.reset(new char[sizeof(T) * capacity]);
736
771k
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
771k
  if (size_ > 0) {
741
664k
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
664k
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
48.4M
    for (std::size_t i = 0; i < size_; ++i) {
744
47.8M
      new(&dest[i]) T(src[i]);
745
47.8M
      src[i].~T();
746
47.8M
    }
747
664k
  }
748
749
771k
  buf_.swap(&buf);
750
771k
  capacity_ = capacity;
751
771k
}
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize_buf(unsigned long)
Line
Count
Source
722
89.8k
void AutoPool<T>::resize_buf(std::size_t size) {
723
89.8k
  std::size_t capacity;
724
89.8k
  if (size >= capacity_ * 2) {
725
85.8k
    capacity = size;
726
85.8k
  } else {
727
4.08k
    capacity = 1;
728
46.7k
    while (capacity < size) {
729
42.6k
      capacity <<= 1;
730
42.6k
    }
731
4.08k
  }
732
733
89.8k
  AutoArray<char> buf;
734
89.8k
  try {
735
89.8k
    buf.reset(new char[sizeof(T) * capacity]);
736
89.8k
  } catch (const std::bad_alloc &) {
737
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
738
0
  }
739
740
89.8k
  if (size_ > 0) {
741
8.94k
    T *src = reinterpret_cast<T *>(&buf_[0]);
742
8.94k
    T *dest = reinterpret_cast<T *>(&buf[0]);
743
4.59M
    for (std::size_t i = 0; i < size_; ++i) {
744
4.58M
      new(&dest[i]) T(src[i]);
745
4.58M
      src[i].~T();
746
4.58M
    }
747
8.94k
  }
748
749
89.8k
  buf_.swap(&buf);
750
89.8k
  capacity_ = capacity;
751
89.8k
}
752
753
//
754
// Memory management of stack.
755
//
756
757
template <typename T>
758
class AutoStack {
759
 public:
760
97.4k
  AutoStack() : pool_() {}
761
97.4k
  ~AutoStack() {
762
97.4k
    clear();
763
97.4k
  }
764
765
  const T &top() const {
766
    return pool_[size() - 1];
767
  }
768
109M
  T &top() {
769
109M
    return pool_[size() - 1];
770
109M
  }
771
772
31.8M
  bool empty() const {
773
31.8M
    return pool_.empty();
774
31.8M
  }
775
109M
  std::size_t size() const {
776
109M
    return pool_.size();
777
109M
  }
778
779
63.7M
  void push(const T &value) {
780
63.7M
    pool_.push_back(value);
781
63.7M
  }
782
60.6M
  void pop() {
783
60.6M
    pool_.pop_back();
784
60.6M
  }
785
786
389k
  void clear() {
787
389k
    pool_.clear();
788
389k
  }
789
790
 private:
791
  AutoPool<T> pool_;
792
793
  // Disallows copy and assignment.
794
  AutoStack(const AutoStack &);
795
  AutoStack &operator=(const AutoStack &);
796
};
797
798
//
799
// Succinct bit vector.
800
//
801
802
class BitVector {
803
 public:
804
48.7k
  BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {}
805
48.7k
  ~BitVector() {
806
48.7k
    clear();
807
48.7k
  }
808
809
48.9M
  bool operator[](std::size_t id) const {
810
48.9M
    return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1;
811
48.9M
  }
812
813
0
  id_type rank(std::size_t id) const {
814
0
    std::size_t unit_id = id / UNIT_SIZE;
815
0
    return ranks_[unit_id] + pop_count(units_[unit_id]
816
0
        & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1)));
817
0
  }
818
819
0
  void set(std::size_t id, bool bit) {
820
0
    if (bit) {
821
0
      units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE);
822
0
    } else {
823
0
      units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE));
824
0
    }
825
0
  }
826
827
0
  bool empty() const {
828
0
    return units_.empty();
829
0
  }
830
97.4k
  std::size_t num_ones() const {
831
97.4k
    return num_ones_;
832
97.4k
  }
833
31.8M
  std::size_t size() const {
834
31.8M
    return size_;
835
31.8M
  }
836
837
31.8M
  void append() {
838
31.8M
    if ((size_ % UNIT_SIZE) == 0) {
839
1.02M
      units_.append(0);
840
1.02M
    }
841
31.8M
    ++size_;
842
31.8M
  }
843
  void build();
844
845
146k
  void clear() {
846
146k
    units_.clear();
847
146k
    ranks_.clear();
848
146k
  }
849
850
 private:
851
  enum { UNIT_SIZE = sizeof(id_type) * 8 };
852
853
  AutoPool<id_type> units_;
854
  AutoArray<id_type> ranks_;
855
  std::size_t num_ones_;
856
  std::size_t size_;
857
858
  // Disallows copy and assignment.
859
  BitVector(const BitVector &);
860
  BitVector &operator=(const BitVector &);
861
862
1.02M
  static id_type pop_count(id_type unit) {
863
1.02M
    unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555);
864
1.02M
    unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333);
865
1.02M
    unit = ((unit >> 4) + unit) & 0x0F0F0F0F;
866
1.02M
    unit += unit >> 8;
867
1.02M
    unit += unit >> 16;
868
1.02M
    return unit & 0xFF;
869
1.02M
  }
870
};
871
872
48.7k
inline void BitVector::build() {
873
48.7k
  try {
874
48.7k
    ranks_.reset(new id_type[units_.size()]);
875
48.7k
  } catch (const std::bad_alloc &) {
876
0
    DARTS_THROW("failed to build rank index: std::bad_alloc");
877
0
  }
878
879
48.7k
  num_ones_ = 0;
880
1.06M
  for (std::size_t i = 0; i < units_.size(); ++i) {
881
1.02M
    ranks_[i] = num_ones_;
882
1.02M
    num_ones_ += pop_count(units_[i]);
883
1.02M
  }
884
48.7k
}
885
886
//
887
// Keyset.
888
//
889
890
template <typename T>
891
class Keyset {
892
 public:
893
  Keyset(std::size_t num_keys, const char_type * const *keys,
894
      const std::size_t *lengths, const T *values) :
895
57.7k
      num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {}
896
897
7.51M
  std::size_t num_keys() const {
898
7.51M
    return num_keys_;
899
7.51M
  }
900
7.42M
  const char_type *keys(std::size_t id) const {
901
7.42M
    return keys_[id];
902
7.42M
  }
903
628k
  uchar_type keys(std::size_t key_id, std::size_t char_id) const {
904
628k
    if (has_lengths() && char_id >= lengths_[key_id])
905
87.1k
      return '\0';
906
541k
    return keys_[key_id][char_id];
907
628k
  }
908
909
8.13M
  bool has_lengths() const {
910
8.13M
    return lengths_ != NULL;
911
8.13M
  }
912
7.46M
  std::size_t lengths(std::size_t id) const {
913
7.46M
    if (has_lengths()) {
914
7.46M
      return lengths_[id];
915
7.46M
    }
916
0
    std::size_t length = 0;
917
0
    while (keys_[id][length] != '\0') {
918
0
      ++length;
919
0
    }
920
0
    return length;
921
7.46M
  }
922
923
7.56M
  bool has_values() const {
924
7.56M
    return values_ != NULL;
925
7.56M
  }
926
7.50M
  const value_type values(std::size_t id) const {
927
7.50M
    if (has_values()) {
928
7.42M
      return static_cast<value_type>(values_[id]);
929
7.42M
    }
930
87.1k
    return static_cast<value_type>(id);
931
7.50M
  }
932
933
 private:
934
  std::size_t num_keys_;
935
  const char_type * const * keys_;
936
  const std::size_t *lengths_;
937
  const T *values_;
938
939
  // Disallows copy and assignment.
940
  Keyset(const Keyset &);
941
  Keyset &operator=(const Keyset &);
942
};
943
944
//
945
// Node of Directed Acyclic Word Graph (DAWG).
946
//
947
948
class DawgNode {
949
 public:
950
31.8M
  DawgNode() : child_(0), sibling_(0), label_('\0'),
951
31.8M
    is_state_(false), has_sibling_(false) {}
952
953
56.2M
  void set_child(id_type child) {
954
56.2M
    child_ = child;
955
56.2M
  }
956
31.8M
  void set_sibling(id_type sibling) {
957
31.8M
    sibling_ = sibling;
958
31.8M
  }
959
7.42M
  void set_value(value_type value) {
960
7.42M
    child_ = value;
961
7.42M
  }
962
31.8M
  void set_label(uchar_type label) {
963
31.8M
    label_ = label;
964
31.8M
  }
965
24.4M
  void set_is_state(bool is_state) {
966
24.4M
    is_state_ = is_state;
967
24.4M
  }
968
7.37M
  void set_has_sibling(bool has_sibling) {
969
7.37M
    has_sibling_ = has_sibling;
970
7.37M
  }
971
972
94.7M
  id_type child() const {
973
94.7M
    return child_;
974
94.7M
  }
975
160M
  id_type sibling() const {
976
160M
    return sibling_;
977
160M
  }
978
0
  value_type value() const {
979
0
    return static_cast<value_type>(child_);
980
0
  }
981
94.7M
  uchar_type label() const {
982
94.7M
    return label_;
983
94.7M
  }
984
0
  bool is_state() const {
985
0
    return is_state_;
986
0
  }
987
0
  bool has_sibling() const {
988
0
    return has_sibling_;
989
0
  }
990
991
89.9M
  id_type unit() const {
992
89.9M
    if (label_ == '\0') {
993
19.8M
      return (child_ << 1) | (has_sibling_ ? 1 : 0);
994
19.8M
    }
995
70.1M
    return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0);
996
89.9M
  }
997
998
 private:
999
  id_type child_;
1000
  id_type sibling_;
1001
  uchar_type label_;
1002
  bool is_state_;
1003
  bool has_sibling_;
1004
1005
  // Copyable.
1006
};
1007
1008
//
1009
// Fixed unit of Directed Acyclic Word Graph (DAWG).
1010
//
1011
1012
class DawgUnit {
1013
 public:
1014
31.8M
  explicit DawgUnit(id_type unit = 0) : unit_(unit) {}
1015
45.8M
  DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {}
1016
1017
31.8M
  DawgUnit &operator=(id_type unit) {
1018
31.8M
    unit_ = unit;
1019
31.8M
    return *this;
1020
31.8M
  }
1021
1022
46.6M
  id_type unit() const {
1023
46.6M
    return unit_;
1024
46.6M
  }
1025
1026
73.4M
  id_type child() const {
1027
73.4M
    return unit_ >> 2;
1028
73.4M
  }
1029
149M
  bool has_sibling() const {
1030
149M
    return (unit_ & 1) == 1;
1031
149M
  }
1032
7.42M
  value_type value() const {
1033
7.42M
    return static_cast<value_type>(unit_ >> 1);
1034
7.42M
  }
1035
15.7M
  bool is_state() const {
1036
15.7M
    return (unit_ & 2) == 2;
1037
15.7M
  }
1038
1039
 private:
1040
  id_type unit_;
1041
1042
  // Copyable.
1043
};
1044
1045
//
1046
// Directed Acyclic Word Graph (DAWG) builder.
1047
//
1048
1049
class DawgBuilder {
1050
 public:
1051
48.7k
  DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(),
1052
48.7k
    table_(), node_stack_(), recycle_bin_(), num_states_(0) {}
1053
48.7k
  ~DawgBuilder() {
1054
48.7k
    clear();
1055
48.7k
  }
1056
1057
97.4k
  id_type root() const {
1058
97.4k
    return 0;
1059
97.4k
  }
1060
1061
73.4M
  id_type child(id_type id) const {
1062
73.4M
    return units_[id].child();
1063
73.4M
  }
1064
95.5M
  id_type sibling(id_type id) const {
1065
95.5M
    return units_[id].has_sibling() ? (id + 1) : 0;
1066
95.5M
  }
1067
7.42M
  int value(id_type id) const {
1068
7.42M
    return units_[id].value();
1069
7.42M
  }
1070
1071
31.8M
  bool is_leaf(id_type id) const {
1072
31.8M
    return label(id) == '\0';
1073
31.8M
  }
1074
95.5M
  uchar_type label(id_type id) const {
1075
95.5M
    return labels_[id];
1076
95.5M
  }
1077
1078
48.9M
  bool is_intersection(id_type id) const {
1079
48.9M
    return is_intersections_[id];
1080
48.9M
  }
1081
0
  id_type intersection_id(id_type id) const {
1082
0
    return is_intersections_.rank(id) - 1;
1083
0
  }
1084
1085
97.4k
  std::size_t num_intersections() const {
1086
97.4k
    return is_intersections_.num_ones();
1087
97.4k
  }
1088
1089
465k
  std::size_t size() const {
1090
465k
    return units_.size();
1091
465k
  }
1092
1093
  void init();
1094
  void finish();
1095
1096
  void insert(const char *key, std::size_t length, value_type value);
1097
1098
  void clear();
1099
1100
 private:
1101
  enum { INITIAL_TABLE_SIZE = 1 << 10 };
1102
1103
  AutoPool<DawgNode> nodes_;
1104
  AutoPool<DawgUnit> units_;
1105
  AutoPool<uchar_type> labels_;
1106
  BitVector is_intersections_;
1107
  AutoPool<id_type> table_;
1108
  AutoStack<id_type> node_stack_;
1109
  AutoStack<id_type> recycle_bin_;
1110
  std::size_t num_states_;
1111
1112
  // Disallows copy and assignment.
1113
  DawgBuilder(const DawgBuilder &);
1114
  DawgBuilder &operator=(const DawgBuilder &);
1115
1116
  void flush(id_type id);
1117
1118
  void expand_table();
1119
1120
  id_type find_unit(id_type id, id_type *hash_id) const;
1121
  id_type find_node(id_type node_id, id_type *hash_id) const;
1122
1123
  bool are_equal(id_type node_id, id_type unit_id) const;
1124
1125
  id_type hash_unit(id_type id) const;
1126
  id_type hash_node(id_type id) const;
1127
1128
  id_type append_node();
1129
  id_type append_unit();
1130
1131
31.8M
  void free_node(id_type id) {
1132
31.8M
    recycle_bin_.push(id);
1133
31.8M
  }
1134
1135
52.2M
  static id_type hash(id_type key) {
1136
52.2M
    key = ~key + (key << 15);  // key = (key << 15) - key - 1;
1137
52.2M
    key = key ^ (key >> 12);
1138
52.2M
    key = key + (key << 2);
1139
52.2M
    key = key ^ (key >> 4);
1140
52.2M
    key = key * 2057;  // key = (key + (key << 3)) + (key << 11);
1141
52.2M
    key = key ^ (key >> 16);
1142
52.2M
    return key;
1143
52.2M
  }
1144
};
1145
1146
48.7k
inline void DawgBuilder::init() {
1147
48.7k
  table_.resize(INITIAL_TABLE_SIZE, 0);
1148
1149
48.7k
  append_node();
1150
48.7k
  append_unit();
1151
1152
48.7k
  num_states_ = 1;
1153
1154
48.7k
  nodes_[0].set_label(0xFF);
1155
48.7k
  node_stack_.push(0);
1156
48.7k
}
1157
1158
48.7k
inline void DawgBuilder::finish() {
1159
48.7k
  flush(0);
1160
1161
48.7k
  units_[0] = nodes_[0].unit();
1162
48.7k
  labels_[0] = nodes_[0].label();
1163
1164
48.7k
  nodes_.clear();
1165
48.7k
  table_.clear();
1166
48.7k
  node_stack_.clear();
1167
48.7k
  recycle_bin_.clear();
1168
1169
48.7k
  is_intersections_.build();
1170
48.7k
}
1171
1172
inline void DawgBuilder::insert(const char *key, std::size_t length,
1173
7.42M
    value_type value) {
1174
7.42M
  if (value < 0) {
1175
0
    DARTS_THROW("failed to insert key: negative value");
1176
7.42M
  } else if (length == 0) {
1177
0
    DARTS_THROW("failed to insert key: zero-length key");
1178
0
  }
1179
1180
7.42M
  id_type id = 0;
1181
7.42M
  std::size_t key_pos = 0;
1182
1183
31.0M
  for ( ; key_pos <= length; ++key_pos) {
1184
31.0M
    id_type child_id = nodes_[id].child();
1185
31.0M
    if (child_id == 0) {
1186
48.7k
      break;
1187
48.7k
    }
1188
1189
30.9M
    uchar_type key_label = static_cast<uchar_type>(key[key_pos]);
1190
30.9M
    if (key_pos < length && key_label == '\0') {
1191
0
      DARTS_THROW("failed to insert key: invalid null character");
1192
0
    }
1193
1194
30.9M
    uchar_type unit_label = nodes_[child_id].label();
1195
30.9M
    if (key_label < unit_label) {
1196
0
      DARTS_THROW("failed to insert key: wrong key order");
1197
30.9M
    } else if (key_label > unit_label) {
1198
7.37M
      nodes_[child_id].set_has_sibling(true);
1199
7.37M
      flush(child_id);
1200
7.37M
      break;
1201
7.37M
    }
1202
23.6M
    id = child_id;
1203
23.6M
  }
1204
1205
7.42M
  if (key_pos > length) {
1206
0
    return;
1207
0
  }
1208
1209
39.2M
  for ( ; key_pos <= length; ++key_pos) {
1210
31.8M
    uchar_type key_label = static_cast<uchar_type>(
1211
31.8M
        (key_pos < length) ? key[key_pos] : '\0');
1212
31.8M
    id_type child_id = append_node();
1213
1214
31.8M
    if (nodes_[id].child() == 0) {
1215
24.4M
      nodes_[child_id].set_is_state(true);
1216
24.4M
    }
1217
31.8M
    nodes_[child_id].set_sibling(nodes_[id].child());
1218
31.8M
    nodes_[child_id].set_label(key_label);
1219
31.8M
    nodes_[id].set_child(child_id);
1220
31.8M
    node_stack_.push(child_id);
1221
1222
31.8M
    id = child_id;
1223
31.8M
  }
1224
7.42M
  nodes_[id].set_value(value);
1225
7.42M
}
1226
1227
97.4k
inline void DawgBuilder::clear() {
1228
97.4k
  nodes_.clear();
1229
97.4k
  units_.clear();
1230
97.4k
  labels_.clear();
1231
97.4k
  is_intersections_.clear();
1232
97.4k
  table_.clear();
1233
97.4k
  node_stack_.clear();
1234
97.4k
  recycle_bin_.clear();
1235
97.4k
  num_states_ = 0;
1236
97.4k
}
1237
1238
7.42M
inline void DawgBuilder::flush(id_type id) {
1239
31.8M
  while (node_stack_.top() != id) {
1240
24.4M
    id_type node_id = node_stack_.top();
1241
24.4M
    node_stack_.pop();
1242
1243
24.4M
    if (num_states_ >= table_.size() - (table_.size() >> 2)) {
1244
13.7k
      expand_table();
1245
13.7k
    }
1246
1247
24.4M
    id_type num_siblings = 0;
1248
56.2M
    for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1249
31.8M
      ++num_siblings;
1250
31.8M
    }
1251
1252
24.4M
    id_type hash_id;
1253
24.4M
    id_type match_id = find_node(node_id, &hash_id);
1254
24.4M
    if (match_id != 0) {
1255
0
      is_intersections_.set(match_id, true);
1256
24.4M
    } else {
1257
24.4M
      id_type unit_id = 0;
1258
56.2M
      for (id_type i = 0; i < num_siblings; ++i) {
1259
31.8M
        unit_id = append_unit();
1260
31.8M
      }
1261
56.2M
      for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1262
31.8M
        units_[unit_id] = nodes_[i].unit();
1263
31.8M
        labels_[unit_id] = nodes_[i].label();
1264
31.8M
        --unit_id;
1265
31.8M
      }
1266
24.4M
      match_id = unit_id + 1;
1267
24.4M
      table_[hash_id] = match_id;
1268
24.4M
      ++num_states_;
1269
24.4M
    }
1270
1271
56.2M
    for (id_type i = node_id, next; i != 0; i = next) {
1272
31.8M
      next = nodes_[i].sibling();
1273
31.8M
      free_node(i);
1274
31.8M
    }
1275
1276
24.4M
    nodes_[node_stack_.top()].set_child(match_id);
1277
24.4M
  }
1278
7.42M
  node_stack_.pop();
1279
7.42M
}
1280
1281
13.7k
inline void DawgBuilder::expand_table() {
1282
13.7k
  std::size_t table_size = table_.size() << 1;
1283
13.7k
  table_.clear();
1284
13.7k
  table_.resize(table_size, 0);
1285
1286
20.4M
  for (std::size_t i = 1; i < units_.size(); ++i) {
1287
20.3M
    id_type id = static_cast<id_type>(i);
1288
20.3M
    if (labels_[id] == '\0' || units_[id].is_state()) {
1289
16.3M
      id_type hash_id;
1290
16.3M
      find_unit(id, &hash_id);
1291
16.3M
      table_[hash_id] = id;
1292
16.3M
    }
1293
20.3M
  }
1294
13.7k
}
1295
1296
16.3M
inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const {
1297
16.3M
  *hash_id = hash_unit(id) % table_.size();
1298
21.3M
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1299
21.3M
    id_type unit_id = table_[*hash_id];
1300
21.3M
    if (unit_id == 0) {
1301
16.3M
      break;
1302
16.3M
    }
1303
1304
    // There must not be the same unit.
1305
21.3M
  }
1306
16.3M
  return 0;
1307
16.3M
}
1308
1309
inline id_type DawgBuilder::find_node(id_type node_id,
1310
24.4M
    id_type *hash_id) const {
1311
24.4M
  *hash_id = hash_node(node_id) % table_.size();
1312
57.2M
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1313
57.2M
    id_type unit_id = table_[*hash_id];
1314
57.2M
    if (unit_id == 0) {
1315
24.4M
      break;
1316
24.4M
    }
1317
1318
32.8M
    if (are_equal(node_id, unit_id)) {
1319
0
      return unit_id;
1320
0
    }
1321
32.8M
  }
1322
24.4M
  return 0;
1323
24.4M
}
1324
1325
32.8M
inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const {
1326
33.6M
  for (id_type i = nodes_[node_id].sibling(); i != 0;
1327
32.8M
      i = nodes_[i].sibling()) {
1328
4.06M
    if (units_[unit_id].has_sibling() == false) {
1329
3.24M
      return false;
1330
3.24M
    }
1331
822k
    ++unit_id;
1332
822k
  }
1333
29.5M
  if (units_[unit_id].has_sibling() == true) {
1334
3.34M
    return false;
1335
3.34M
  }
1336
1337
26.2M
  for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) {
1338
26.2M
    if (nodes_[i].unit() != units_[unit_id].unit() ||
1339
26.2M
        nodes_[i].label() != labels_[unit_id]) {
1340
26.2M
      return false;
1341
26.2M
    }
1342
26.2M
  }
1343
0
  return true;
1344
26.2M
}
1345
1346
16.3M
inline id_type DawgBuilder::hash_unit(id_type id) const {
1347
16.3M
  id_type hash_value = 0;
1348
20.3M
  for ( ; id != 0; ++id) {
1349
20.3M
    id_type unit = units_[id].unit();
1350
20.3M
    uchar_type label = labels_[id];
1351
20.3M
    hash_value ^= hash((label << 24) ^ unit);
1352
1353
20.3M
    if (units_[id].has_sibling() == false) {
1354
16.3M
      break;
1355
16.3M
    }
1356
20.3M
  }
1357
16.3M
  return hash_value;
1358
16.3M
}
1359
1360
24.4M
inline id_type DawgBuilder::hash_node(id_type id) const {
1361
24.4M
  id_type hash_value = 0;
1362
56.2M
  for ( ; id != 0; id = nodes_[id].sibling()) {
1363
31.8M
    id_type unit = nodes_[id].unit();
1364
31.8M
    uchar_type label = nodes_[id].label();
1365
31.8M
    hash_value ^= hash((label << 24) ^ unit);
1366
31.8M
  }
1367
24.4M
  return hash_value;
1368
24.4M
}
1369
1370
31.8M
inline id_type DawgBuilder::append_unit() {
1371
31.8M
  is_intersections_.append();
1372
31.8M
  units_.append();
1373
31.8M
  labels_.append();
1374
1375
31.8M
  return static_cast<id_type>(is_intersections_.size() - 1);
1376
31.8M
}
1377
1378
31.8M
inline id_type DawgBuilder::append_node() {
1379
31.8M
  id_type id;
1380
31.8M
  if (recycle_bin_.empty()) {
1381
3.15M
    id = static_cast<id_type>(nodes_.size());
1382
3.15M
    nodes_.append();
1383
28.7M
  } else {
1384
28.7M
    id = recycle_bin_.top();
1385
28.7M
    nodes_[id] = DawgNode();
1386
28.7M
    recycle_bin_.pop();
1387
28.7M
  }
1388
31.8M
  return id;
1389
31.8M
}
1390
1391
//
1392
// Unit of double-array builder.
1393
//
1394
1395
class DoubleArrayBuilderUnit {
1396
 public:
1397
44.5M
  DoubleArrayBuilderUnit() : unit_(0) {}
1398
1399
7.46M
  void set_has_leaf(bool has_leaf) {
1400
7.46M
    if (has_leaf) {
1401
7.46M
      unit_ |= 1U << 8;
1402
7.46M
    } else {
1403
0
      unit_ &= ~(1U << 8);
1404
0
    }
1405
7.46M
  }
1406
7.46M
  void set_value(value_type value) {
1407
7.46M
    unit_ = value | (1U << 31);
1408
7.46M
  }
1409
37.0M
  void set_label(uchar_type label) {
1410
37.0M
    unit_ = (unit_ & ~0xFFU) | label;
1411
37.0M
  }
1412
24.6M
  void set_offset(id_type offset) {
1413
24.6M
    if (offset >= 1U << 29) {
1414
0
      DARTS_THROW("failed to modify unit: too large offset");
1415
0
    }
1416
24.6M
    unit_ &= (1U << 31) | (1U << 8) | 0xFF;
1417
24.6M
    if (offset < 1U << 21) {
1418
24.6M
      unit_ |= (offset << 10);
1419
24.6M
    } else {
1420
0
      unit_ |= (offset << 2) | (1U << 9);
1421
0
    }
1422
24.6M
  }
1423
1424
 private:
1425
  id_type unit_;
1426
1427
  // Copyable.
1428
};
1429
1430
//
1431
// Extra unit of double-array builder.
1432
//
1433
1434
class DoubleArrayBuilderExtraUnit {
1435
 public:
1436
236M
  DoubleArrayBuilderExtraUnit() : prev_(0), next_(0),
1437
236M
      is_fixed_(false), is_used_(false) {}
1438
1439
89.4M
  void set_prev(id_type prev) {
1440
89.4M
    prev_ = prev;
1441
89.4M
  }
1442
89.4M
  void set_next(id_type next) {
1443
89.4M
    next_ = next;
1444
89.4M
  }
1445
46.8M
  void set_is_fixed(bool is_fixed) {
1446
46.8M
    is_fixed_ = is_fixed;
1447
46.8M
  }
1448
26.9M
  void set_is_used(bool is_used) {
1449
26.9M
    is_used_ = is_used;
1450
26.9M
  }
1451
1452
89.4M
  id_type prev() const {
1453
89.4M
    return prev_;
1454
89.4M
  }
1455
288M
  id_type next() const {
1456
288M
    return next_;
1457
288M
  }
1458
92.8M
  bool is_fixed() const {
1459
92.8M
    return is_fixed_;
1460
92.8M
  }
1461
208M
  bool is_used() const {
1462
208M
    return is_used_;
1463
208M
  }
1464
1465
 private:
1466
  id_type prev_;
1467
  id_type next_;
1468
  bool is_fixed_;
1469
  bool is_used_;
1470
1471
  // Copyable.
1472
};
1473
1474
//
1475
// DAWG -> double-array converter.
1476
//
1477
1478
class DoubleArrayBuilder {
1479
 public:
1480
  explicit DoubleArrayBuilder(progress_func_type progress_func)
1481
57.7k
      : progress_func_(progress_func), units_(), extras_(), labels_(),
1482
57.7k
        table_(), extras_head_(0) {}
1483
57.7k
  ~DoubleArrayBuilder() {
1484
57.7k
    clear();
1485
57.7k
  }
1486
1487
  template <typename T>
1488
  void build(const Keyset<T> &keyset);
1489
  void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const;
1490
1491
  void clear();
1492
1493
 private:
1494
  static constexpr int BLOCK_SIZE = 256;
1495
  static constexpr int NUM_EXTRA_BLOCKS = 16;
1496
  static constexpr int NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS;
1497
1498
  static constexpr int  UPPER_MASK = 0xFF << 21;
1499
  static constexpr int  LOWER_MASK = 0xFF;
1500
1501
  typedef DoubleArrayBuilderUnit unit_type;
1502
  typedef DoubleArrayBuilderExtraUnit extra_type;
1503
1504
  progress_func_type progress_func_;
1505
  AutoPool<unit_type> units_;
1506
  AutoArray<extra_type> extras_;
1507
  AutoPool<uchar_type> labels_;
1508
  AutoArray<id_type> table_;
1509
  id_type extras_head_;
1510
1511
  // Disallows copy and assignment.
1512
  DoubleArrayBuilder(const DoubleArrayBuilder &);
1513
  DoubleArrayBuilder &operator=(const DoubleArrayBuilder &);
1514
1515
290k
  std::size_t num_blocks() const {
1516
290k
    return units_.size() / BLOCK_SIZE;
1517
290k
  }
1518
1519
436M
  const extra_type &extras(id_type id) const {
1520
436M
    return extras_[id % NUM_EXTRAS];
1521
436M
  }
1522
495M
  extra_type &extras(id_type id) {
1523
495M
    return extras_[id % NUM_EXTRAS];
1524
495M
  }
1525
1526
  template <typename T>
1527
  void build_dawg(const Keyset<T> &keyset, DawgBuilder *dawg_builder);
1528
  void build_from_dawg(const DawgBuilder &dawg);
1529
  void build_from_dawg(const DawgBuilder &dawg,
1530
      id_type dawg_id, id_type dic_id);
1531
  id_type arrange_from_dawg(const DawgBuilder &dawg,
1532
      id_type dawg_id, id_type dic_id);
1533
1534
  template <typename T>
1535
  void build_from_keyset(const Keyset<T> &keyset);
1536
  template <typename T>
1537
  void build_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1538
      std::size_t end, std::size_t depth, id_type dic_id);
1539
  template <typename T>
1540
  id_type arrange_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1541
      std::size_t end, std::size_t depth, id_type dic_id);
1542
1543
  id_type find_valid_offset(id_type id) const;
1544
  bool is_valid_offset(id_type id, id_type offset) const;
1545
1546
  void reserve_id(id_type id);
1547
  void expand_units();
1548
1549
  void fix_all_blocks();
1550
  void fix_block(id_type block_id);
1551
};
1552
1553
template <typename T>
1554
57.7k
void DoubleArrayBuilder::build(const Keyset<T> &keyset) {
1555
57.7k
  if (keyset.has_values()) {
1556
48.7k
    Details::DawgBuilder dawg_builder;
1557
48.7k
    build_dawg(keyset, &dawg_builder);
1558
48.7k
    build_from_dawg(dawg_builder);
1559
48.7k
    dawg_builder.clear();
1560
48.7k
  } else {
1561
8.96k
    build_from_keyset(keyset);
1562
8.96k
  }
1563
57.7k
}
1564
1565
inline void DoubleArrayBuilder::copy(std::size_t *size_ptr,
1566
57.7k
    DoubleArrayUnit **buf_ptr) const {
1567
57.7k
  if (size_ptr != NULL) {
1568
57.7k
    *size_ptr = units_.size();
1569
57.7k
  }
1570
57.7k
  if (buf_ptr != NULL) {
1571
57.7k
    *buf_ptr = new DoubleArrayUnit[units_.size()];
1572
57.7k
    unit_type *units = reinterpret_cast<unit_type *>(*buf_ptr);
1573
44.6M
    for (std::size_t i = 0; i < units_.size(); ++i) {
1574
44.5M
      units[i] = units_[i];
1575
44.5M
    }
1576
57.7k
  }
1577
57.7k
}
1578
1579
57.7k
inline void DoubleArrayBuilder::clear() {
1580
57.7k
  units_.clear();
1581
57.7k
  extras_.clear();
1582
57.7k
  labels_.clear();
1583
57.7k
  table_.clear();
1584
57.7k
  extras_head_ = 0;
1585
57.7k
}
1586
1587
template <typename T>
1588
void DoubleArrayBuilder::build_dawg(const Keyset<T> &keyset,
1589
48.7k
    DawgBuilder *dawg_builder) {
1590
48.7k
  dawg_builder->init();
1591
7.47M
  for (std::size_t i = 0; i < keyset.num_keys(); ++i) {
1592
7.42M
    dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i));
1593
7.42M
    if (progress_func_ != NULL) {
1594
0
      progress_func_(i + 1, keyset.num_keys() + 1);
1595
0
    }
1596
7.42M
  }
1597
48.7k
  dawg_builder->finish();
1598
48.7k
}
1599
1600
48.7k
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) {
1601
48.7k
  std::size_t num_units = 1;
1602
465k
  while (num_units < dawg.size()) {
1603
417k
    num_units <<= 1;
1604
417k
  }
1605
48.7k
  units_.reserve(num_units);
1606
1607
48.7k
  table_.reset(new id_type[dawg.num_intersections()]);
1608
48.7k
  for (std::size_t i = 0; i < dawg.num_intersections(); ++i) {
1609
0
    table_[i] = 0;
1610
0
  }
1611
1612
48.7k
  extras_.reset(new extra_type[NUM_EXTRAS]);
1613
1614
48.7k
  reserve_id(0);
1615
48.7k
  extras(0).set_is_used(true);
1616
48.7k
  units_[0].set_offset(1);
1617
48.7k
  units_[0].set_label('\0');
1618
1619
48.7k
  if (dawg.child(dawg.root()) != 0) {
1620
48.7k
    build_from_dawg(dawg, dawg.root(), 0);
1621
48.7k
  }
1622
1623
48.7k
  fix_all_blocks();
1624
1625
48.7k
  extras_.clear();
1626
48.7k
  labels_.clear();
1627
48.7k
  table_.clear();
1628
48.7k
}
1629
1630
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg,
1631
24.4M
    id_type dawg_id, id_type dic_id) {
1632
24.4M
  id_type dawg_child_id = dawg.child(dawg_id);
1633
24.4M
  if (dawg.is_intersection(dawg_child_id)) {
1634
0
    id_type intersection_id = dawg.intersection_id(dawg_child_id);
1635
0
    id_type offset = table_[intersection_id];
1636
0
    if (offset != 0) {
1637
0
      offset ^= dic_id;
1638
0
      if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) {
1639
0
        if (dawg.is_leaf(dawg_child_id)) {
1640
0
          units_[dic_id].set_has_leaf(true);
1641
0
        }
1642
0
        units_[dic_id].set_offset(offset);
1643
0
        return;
1644
0
      }
1645
0
    }
1646
0
  }
1647
1648
24.4M
  id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id);
1649
24.4M
  if (dawg.is_intersection(dawg_child_id)) {
1650
0
    table_[dawg.intersection_id(dawg_child_id)] = offset;
1651
0
  }
1652
1653
31.8M
  do {
1654
31.8M
    uchar_type child_label = dawg.label(dawg_child_id);
1655
31.8M
    id_type dic_child_id = offset ^ child_label;
1656
31.8M
    if (child_label != '\0') {
1657
24.4M
      build_from_dawg(dawg, dawg_child_id, dic_child_id);
1658
24.4M
    }
1659
31.8M
    dawg_child_id = dawg.sibling(dawg_child_id);
1660
31.8M
  } while (dawg_child_id != 0);
1661
24.4M
}
1662
1663
inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg,
1664
24.4M
    id_type dawg_id, id_type dic_id) {
1665
24.4M
  labels_.resize(0);
1666
1667
24.4M
  id_type dawg_child_id = dawg.child(dawg_id);
1668
56.2M
  while (dawg_child_id != 0) {
1669
31.8M
    labels_.append(dawg.label(dawg_child_id));
1670
31.8M
    dawg_child_id = dawg.sibling(dawg_child_id);
1671
31.8M
  }
1672
1673
24.4M
  id_type offset = find_valid_offset(dic_id);
1674
24.4M
  units_[dic_id].set_offset(dic_id ^ offset);
1675
1676
24.4M
  dawg_child_id = dawg.child(dawg_id);
1677
56.2M
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1678
31.8M
    id_type dic_child_id = offset ^ labels_[i];
1679
31.8M
    reserve_id(dic_child_id);
1680
1681
31.8M
    if (dawg.is_leaf(dawg_child_id)) {
1682
7.42M
      units_[dic_id].set_has_leaf(true);
1683
7.42M
      units_[dic_child_id].set_value(dawg.value(dawg_child_id));
1684
24.4M
    } else {
1685
24.4M
      units_[dic_child_id].set_label(labels_[i]);
1686
24.4M
    }
1687
1688
31.8M
    dawg_child_id = dawg.sibling(dawg_child_id);
1689
31.8M
  }
1690
24.4M
  extras(offset).set_is_used(true);
1691
1692
24.4M
  return offset;
1693
24.4M
}
1694
1695
template <typename T>
1696
8.96k
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset) {
1697
8.96k
  std::size_t num_units = 1;
1698
26.5k
  while (num_units < keyset.num_keys()) {
1699
17.6k
    num_units <<= 1;
1700
17.6k
  }
1701
8.96k
  units_.reserve(num_units);
1702
1703
8.96k
  extras_.reset(new extra_type[NUM_EXTRAS]);
1704
1705
8.96k
  reserve_id(0);
1706
8.96k
  extras(0).set_is_used(true);
1707
8.96k
  units_[0].set_offset(1);
1708
8.96k
  units_[0].set_label('\0');
1709
1710
8.96k
  if (keyset.num_keys() > 0) {
1711
8.96k
    build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0);
1712
8.96k
  }
1713
1714
8.96k
  fix_all_blocks();
1715
1716
8.96k
  extras_.clear();
1717
8.96k
  labels_.clear();
1718
8.96k
}
1719
1720
template <typename T>
1721
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset,
1722
132k
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1723
132k
  id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id);
1724
1725
175k
  while (begin < end) {
1726
132k
    if (keyset.keys(begin, depth) != '\0') {
1727
88.7k
      break;
1728
88.7k
    }
1729
43.5k
    ++begin;
1730
43.5k
  }
1731
132k
  if (begin == end) {
1732
43.5k
    return;
1733
43.5k
  }
1734
1735
88.7k
  std::size_t last_begin = begin;
1736
88.7k
  uchar_type last_label = keyset.keys(begin, depth);
1737
208k
  while (++begin < end) {
1738
120k
    uchar_type label = keyset.keys(begin, depth);
1739
120k
    if (label != last_label) {
1740
34.5k
      build_from_keyset(keyset, last_begin, begin,
1741
34.5k
          depth + 1, offset ^ last_label);
1742
34.5k
      last_begin = begin;
1743
34.5k
      last_label = keyset.keys(begin, depth);
1744
34.5k
    }
1745
120k
  }
1746
88.7k
  build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label);
1747
88.7k
}
1748
1749
template <typename T>
1750
id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset<T> &keyset,
1751
132k
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1752
132k
  labels_.resize(0);
1753
1754
132k
  value_type value = -1;
1755
384k
  for (std::size_t i = begin; i < end; ++i) {
1756
252k
    uchar_type label = keyset.keys(i, depth);
1757
252k
    if (label == '\0') {
1758
43.5k
      if (keyset.has_lengths() && depth < keyset.lengths(i)) {
1759
0
        DARTS_THROW("failed to build double-array: "
1760
0
            "invalid null character");
1761
43.5k
      } else if (keyset.values(i) < 0) {
1762
0
        DARTS_THROW("failed to build double-array: negative value");
1763
0
      }
1764
1765
43.5k
      if (value == -1) {
1766
43.5k
        value = keyset.values(i);
1767
43.5k
      }
1768
43.5k
      if (progress_func_ != NULL) {
1769
0
        progress_func_(i + 1, keyset.num_keys() + 1);
1770
0
      }
1771
43.5k
    }
1772
1773
252k
    if (labels_.empty()) {
1774
132k
      labels_.append(label);
1775
132k
    } else if (label != labels_[labels_.size() - 1]) {
1776
34.5k
      if (label < labels_[labels_.size() - 1]) {
1777
0
        DARTS_THROW("failed to build double-array: wrong key order");
1778
0
      }
1779
34.5k
      labels_.append(label);
1780
34.5k
    }
1781
252k
  }
1782
1783
132k
  id_type offset = find_valid_offset(dic_id);
1784
132k
  units_[dic_id].set_offset(dic_id ^ offset);
1785
1786
299k
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1787
166k
    id_type dic_child_id = offset ^ labels_[i];
1788
166k
    reserve_id(dic_child_id);
1789
166k
    if (labels_[i] == '\0') {
1790
43.5k
      units_[dic_id].set_has_leaf(true);
1791
43.5k
      units_[dic_child_id].set_value(value);
1792
123k
    } else {
1793
123k
      units_[dic_child_id].set_label(labels_[i]);
1794
123k
    }
1795
166k
  }
1796
132k
  extras(offset).set_is_used(true);
1797
1798
132k
  return offset;
1799
132k
}
1800
1801
24.5M
inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const {
1802
24.5M
  if (extras_head_ >= units_.size()) {
1803
90
    return units_.size() | (id & LOWER_MASK);
1804
90
  }
1805
1806
24.5M
  id_type unfixed_id = extras_head_;
1807
206M
  do {
1808
206M
    id_type offset = unfixed_id ^ labels_[0];
1809
206M
    if (is_valid_offset(id, offset)) {
1810
24.4M
      return offset;
1811
24.4M
    }
1812
181M
    unfixed_id = extras(unfixed_id).next();
1813
181M
  } while (unfixed_id != extras_head_);
1814
1815
116k
  return units_.size() | (id & LOWER_MASK);
1816
24.5M
}
1817
1818
inline bool DoubleArrayBuilder::is_valid_offset(id_type id,
1819
206M
    id_type offset) const {
1820
206M
  if (extras(offset).is_used()) {
1821
156M
    return false;
1822
156M
  }
1823
1824
49.7M
  id_type rel_offset = id ^ offset;
1825
49.7M
  if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) {
1826
0
    return false;
1827
0
  }
1828
1829
72.7M
  for (std::size_t i = 1; i < labels_.size(); ++i) {
1830
48.2M
    if (extras(offset ^ labels_[i]).is_fixed()) {
1831
25.2M
      return false;
1832
25.2M
    }
1833
48.2M
  }
1834
1835
24.4M
  return true;
1836
49.7M
}
1837
1838
44.5M
inline void DoubleArrayBuilder::reserve_id(id_type id) {
1839
44.5M
  if (id >= units_.size()) {
1840
174k
    expand_units();
1841
174k
  }
1842
1843
44.5M
  if (id == extras_head_) {
1844
17.7M
    extras_head_ = extras(id).next();
1845
17.7M
    if (extras_head_ == id) {
1846
57.8k
      extras_head_ = units_.size();
1847
57.8k
    }
1848
17.7M
  }
1849
44.5M
  extras(extras(id).prev()).set_next(extras(id).next());
1850
44.5M
  extras(extras(id).next()).set_prev(extras(id).prev());
1851
44.5M
  extras(id).set_is_fixed(true);
1852
44.5M
}
1853
1854
174k
inline void DoubleArrayBuilder::expand_units() {
1855
174k
  id_type src_num_units = units_.size();
1856
174k
  id_type src_num_blocks = num_blocks();
1857
1858
174k
  id_type dest_num_units = src_num_units + BLOCK_SIZE;
1859
174k
  id_type dest_num_blocks = src_num_blocks + 1;
1860
1861
174k
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1862
9.08k
    fix_block(src_num_blocks - NUM_EXTRA_BLOCKS);
1863
9.08k
  }
1864
1865
174k
  units_.resize(dest_num_units);
1866
1867
174k
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1868
2.33M
    for (std::size_t id = src_num_units; id < dest_num_units; ++id) {
1869
2.32M
      extras(id).set_is_used(false);
1870
2.32M
      extras(id).set_is_fixed(false);
1871
2.32M
    }
1872
9.08k
  }
1873
1874
44.5M
  for (id_type i = src_num_units + 1; i < dest_num_units; ++i) {
1875
44.3M
    extras(i - 1).set_next(i);
1876
44.3M
    extras(i).set_prev(i - 1);
1877
44.3M
  }
1878
1879
174k
  extras(src_num_units).set_prev(dest_num_units - 1);
1880
174k
  extras(dest_num_units - 1).set_next(src_num_units);
1881
1882
174k
  extras(src_num_units).set_prev(extras(extras_head_).prev());
1883
174k
  extras(dest_num_units - 1).set_next(extras_head_);
1884
1885
174k
  extras(extras(extras_head_).prev()).set_next(src_num_units);
1886
174k
  extras(extras_head_).set_prev(dest_num_units - 1);
1887
174k
}
1888
1889
57.7k
inline void DoubleArrayBuilder::fix_all_blocks() {
1890
57.7k
  id_type begin = 0;
1891
57.7k
  if (num_blocks() > NUM_EXTRA_BLOCKS) {
1892
705
    begin = num_blocks() - NUM_EXTRA_BLOCKS;
1893
705
  }
1894
57.7k
  id_type end = num_blocks();
1895
1896
222k
  for (id_type block_id = begin; block_id != end; ++block_id) {
1897
164k
    fix_block(block_id);
1898
164k
  }
1899
57.7k
}
1900
1901
174k
inline void DoubleArrayBuilder::fix_block(id_type block_id) {
1902
174k
  id_type begin = block_id * BLOCK_SIZE;
1903
174k
  id_type end = begin + BLOCK_SIZE;
1904
1905
174k
  id_type unused_offset = 0;
1906
2.20M
  for (id_type offset = begin; offset != end; ++offset) {
1907
2.20M
    if (!extras(offset).is_used()) {
1908
174k
      unused_offset = offset;
1909
174k
      break;
1910
174k
    }
1911
2.20M
  }
1912
1913
44.7M
  for (id_type id = begin; id != end; ++id) {
1914
44.5M
    if (!extras(id).is_fixed()) {
1915
12.5M
      reserve_id(id);
1916
12.5M
      units_[id].set_label(static_cast<uchar_type>(id ^ unused_offset));
1917
12.5M
    }
1918
44.5M
  }
1919
174k
}
1920
1921
}  // namespace Details
1922
1923
//
1924
// Member function build() of DoubleArrayImpl.
1925
//
1926
1927
template <typename A, typename B, typename T, typename C>
1928
int DoubleArrayImpl<A, B, T, C>::build(std::size_t num_keys,
1929
    const key_type * const *keys, const std::size_t *lengths,
1930
57.7k
    const value_type *values, Details::progress_func_type progress_func) {
1931
57.7k
  Details::Keyset<value_type> keyset(num_keys, keys, lengths, values);
1932
1933
57.7k
  Details::DoubleArrayBuilder builder(progress_func);
1934
57.7k
  builder.build(keyset);
1935
1936
57.7k
  std::size_t size = 0;
1937
57.7k
  unit_type *buf = NULL;
1938
57.7k
  builder.copy(&size, &buf);
1939
1940
57.7k
  clear();
1941
1942
57.7k
  size_ = size;
1943
57.7k
  array_ = buf;
1944
57.7k
  buf_ = buf;
1945
1946
57.7k
  if (progress_func != NULL) {
1947
0
    progress_func(num_keys + 1, num_keys + 1);
1948
0
  }
1949
1950
57.7k
  return 0;
1951
57.7k
}
1952
1953
}  // namespace Darts
1954
1955
#undef DARTS_INT_TO_STR
1956
#undef DARTS_LINE_TO_STR
1957
#undef DARTS_LINE_STR
1958
#undef DARTS_THROW
1959
1960
#endif  // DARTS_H_