Coverage Report

Created: 2026-06-09 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/sentencepiece/third_party/darts_clone/darts.h
Line
Count
Source
1
#ifndef DARTS_H_
2
#define DARTS_H_
3
4
#include <cstdio>
5
#include <exception>
6
#include <new>
7
8
#define DARTS_VERSION "0.32"
9
10
// DARTS_THROW() throws a <Darts::Exception> whose message starts with the
11
// file name and the line number. For example, DARTS_THROW("error message") at
12
// line 123 of "darts.h" throws a <Darts::Exception> which has a pointer to
13
// "darts.h:123: exception: error message". The message is available by using
14
// what() as well as that of <std::exception>.
15
#define DARTS_INT_TO_STR(value) #value
16
#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line)
17
#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__)
18
0
#define DARTS_THROW(msg) throw Darts::Details::Exception( \
19
0
  __FILE__ ":" DARTS_LINE_STR ": exception: " msg)
20
21
namespace Darts {
22
23
// The following namespace hides the internal types and classes.
24
namespace Details {
25
26
// This header assumes that <int> and <unsigned int> are 32-bit integer types.
27
//
28
// Darts-clone keeps values associated with keys. The type of the values is
29
// <value_type>. Note that the values must be positive integers because the
30
// most significant bit (MSB) of each value is used to represent whether the
31
// corresponding unit is a leaf or not. Also, the keys are represented by
32
// sequences of <char_type>s. <uchar_type> is the unsigned type of <char_type>.
33
typedef char char_type;
34
typedef unsigned char uchar_type;
35
typedef int value_type;
36
37
// The main structure of Darts-clone is an array of <DoubleArrayUnit>s, and the
38
// unit type is actually a wrapper of <id_type>.
39
typedef unsigned int id_type;
40
41
// <progress_func_type> is the type of callback functions for reporting the
42
// progress of building a dictionary. See also build() of <DoubleArray>.
43
// The 1st argument receives the progress value and the 2nd argument receives
44
// the maximum progress value. A usage example is to show the progress
45
// percentage, 100.0 * (the 1st argument) / (the 2nd argument).
46
typedef int (*progress_func_type)(std::size_t, std::size_t);
47
48
// <DoubleArrayUnit> is the type of double-array units and it is a wrapper of
49
// <id_type> in practice.
50
class DoubleArrayUnit {
51
 public:
52
49.8M
  DoubleArrayUnit() : unit_() {}
53
54
  // has_leaf() returns whether a leaf unit is immediately derived from the
55
  // unit (true) or not (false).
56
712M
  bool has_leaf() const {
57
712M
    return ((unit_ >> 8) & 1) == 1;
58
712M
  }
59
  // value() returns the value stored in the unit, and thus value() is
60
  // available when and only when the unit is a leaf unit.
61
262M
  value_type value() const {
62
262M
    return static_cast<value_type>(unit_ & ((1U << 31) - 1));
63
262M
  }
64
65
  // label() returns the label associted with the unit. Note that a leaf unit
66
  // always returns an invalid label. For this feature, leaf unit's label()
67
  // returns an <id_type> that has the MSB of 1.
68
1.82G
  id_type label() const {
69
1.82G
    return unit_ & ((1U << 31) | 0xFF);
70
1.82G
  }
71
  // offset() returns the offset from the unit to its derived units.
72
1.54G
  id_type offset() const {
73
1.54G
    return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6);
74
1.54G
  }
75
76
 private:
77
  id_type unit_;
78
79
  // Copyable.
80
};
81
82
// Darts-clone throws an <Exception> for memory allocation failure, invalid
83
// arguments or a too large offset. The last case means that there are too many
84
// keys in the given set of keys. Note that the `msg' of <Exception> must be a
85
// constant or static string because an <Exception> keeps only a pointer to
86
// that string.
87
class Exception : public std::exception {
88
 public:
89
0
  explicit Exception(const char *msg = NULL) throw() : msg_(msg) {}
90
0
  Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {}
91
0
  virtual ~Exception() throw() {}
92
93
  // <Exception> overrides what() of <std::exception>.
94
0
  virtual const char *what() const throw() {
95
0
    return (msg_ != NULL) ? msg_ : "";
96
0
  }
97
98
 private:
99
  const char *msg_;
100
101
  // Disallows operator=.
102
  Exception &operator=(const Exception &);
103
};
104
105
}  // namespace Details
106
107
// <DoubleArrayImpl> is the interface of Darts-clone. Note that other
108
// classes should not be accessed from outside.
109
//
110
// <DoubleArrayImpl> has 4 template arguments but only the 3rd one is used as
111
// the type of values. Note that the given <T> is used only from outside, and
112
// the internal value type is not changed from <Darts::Details::value_type>.
113
// In build(), given values are casted from <T> to <Darts::Details::value_type>
114
// by using static_cast. On the other hand, values are casted from
115
// <Darts::Details::value_type> to <T> in searching dictionaries.
116
template <typename, typename, typename T, typename>
117
class DoubleArrayImpl {
118
 public:
119
  // Even if this <value_type> is changed, the internal value type is still
120
  // <Darts::Details::value_type>. Other types, such as 64-bit integer types
121
  // and floating-point number types, should not be used.
122
  typedef T value_type;
123
  // A key is reprenseted by a sequence of <key_type>s. For example,
124
  // exactMatchSearch() takes a <const key_type *>.
125
  typedef Details::char_type key_type;
126
  // In searching dictionaries, the values associated with the matched keys are
127
  // stored into or returned as <result_type>s.
128
  typedef value_type result_type;
129
130
  // <result_pair_type> enables applications to get the lengths of the matched
131
  // keys in addition to the values.
132
  struct result_pair_type {
133
    value_type value;
134
    std::size_t length;
135
  };
136
137
  // The constructor initializes member variables with 0 and NULLs.
138
86.4k
  DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {}
139
  // The destructor frees memory allocated for units and then initializes
140
  // member variables with 0 and NULLs.
141
86.4k
  virtual ~DoubleArrayImpl() {
142
86.4k
    clear();
143
86.4k
  }
144
145
  // <DoubleArrayImpl> has 2 kinds of set_result()s. The 1st set_result() is to
146
  // set a value to a <value_type>. The 2nd set_result() is to set a value and
147
  // a length to a <result_pair_type>. By using set_result()s, search methods
148
  // can return the 2 kinds of results in the same way.
149
  // Why the set_result()s are non-static? It is for compatibility.
150
  //
151
  // The 1st set_result() takes a length as the 3rd argument but it is not
152
  // used. If a compiler does a good job, codes for getting the length may be
153
  // removed.
154
5.16M
  void set_result(value_type *result, value_type value, std::size_t) const {
155
5.16M
    *result = value;
156
5.16M
  }
157
  // The 2nd set_result() uses both `value' and `length'.
158
  void set_result(result_pair_type *result,
159
253M
      value_type value, std::size_t length) const {
160
253M
    result->value = value;
161
253M
    result->length = length;
162
253M
  }
163
164
  // set_array() calls clear() in order to free memory allocated to the old
165
  // array and then sets a new array. This function is useful to set a memory-
166
  // mapped array. Note that the array set by set_array() is not freed in
167
  // clear() and the destructor of <DoubleArrayImpl>.
168
  // set_array() can also set the size of the new array but the size is not
169
  // used in search methods. So it works well even if the 2nd argument is 0 or
170
  // omitted. Remember that size() and total_size() returns 0 in such a case.
171
20.2k
  void set_array(const void *ptr, std::size_t size = 0) {
172
20.2k
    clear();
173
20.2k
    array_ = static_cast<const unit_type *>(ptr);
174
20.2k
    size_ = size;
175
20.2k
  }
176
  // array() returns a pointer to the array of units.
177
0
  const void *array() const {
178
0
    return array_;
179
0
  }
180
181
  // clear() frees memory allocated to units and then initializes member
182
  // variables with 0 and NULLs. Note that clear() does not free memory if the
183
  // array of units was set by set_array(). In such a case, `array_' is not
184
  // NULL and `buf_' is NULL.
185
172k
  void clear() {
186
172k
    size_ = 0;
187
172k
    array_ = NULL;
188
172k
    if (buf_ != NULL) {
189
66.2k
      delete[] buf_;
190
66.2k
      buf_ = NULL;
191
66.2k
    }
192
172k
  }
193
194
  // unit_size() returns the size of each unit. The size must be 4 bytes.
195
20.2k
  std::size_t unit_size() const {
196
20.2k
    return sizeof(unit_type);
197
20.2k
  }
198
  // size() returns the number of units. It can be 0 if set_array() is used.
199
0
  std::size_t size() const {
200
0
    return size_;
201
0
  }
202
  // total_size() returns the number of bytes allocated to the array of units.
203
  // It can be 0 if set_array() is used.
204
  std::size_t total_size() const {
205
    return unit_size() * size();
206
  }
207
  // nonzero_size() exists for compatibility. It always returns the number of
208
  // units because it takes long time to count the number of non-zero units.
209
  std::size_t nonzero_size() const {
210
    return size();
211
  }
212
213
  // build() constructs a dictionary from given key-value pairs. If `lengths'
214
  // is NULL, `keys' is handled as an array of zero-terminated strings. If
215
  // `values' is NULL, the index in `keys' is associated with each key, i.e.
216
  // the ith key has (i - 1) as its value.
217
  // Note that the key-value pairs must be arranged in key order and the values
218
  // must not be negative. Also, if there are duplicate keys, only the first
219
  // pair will be stored in the resultant dictionary.
220
  // `progress_func' is a pointer to a callback function. If it is not NULL,
221
  // it will be called in build() so that the caller can check the progress of
222
  // dictionary construction. For details, please see the definition of
223
  // <Darts::Details::progress_func_type>.
224
  // The return value of build() is 0, and it indicates the success of the
225
  // operation. Otherwise, build() throws a <Darts::Exception>, which is a
226
  // derived class of <std::exception>.
227
  // build() uses another construction algorithm if `values' is not NULL. In
228
  // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead
229
  // of a trie because a DAWG is likely to be more compact than a trie.
230
  int build(std::size_t num_keys, const key_type * const *keys,
231
      const std::size_t *lengths = NULL, const value_type *values = NULL,
232
      Details::progress_func_type progress_func = NULL);
233
234
  // open() reads an array of units from the specified file. And if it goes
235
  // well, the old array will be freed and replaced with the new array read
236
  // from the file. `offset' specifies the number of bytes to be skipped before
237
  // reading an array. `size' specifies the number of bytes to be read from the
238
  // file. If the `size' is 0, the whole file will be read.
239
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
240
  // non-zero value or throws a <Darts::Exception>. The exception is thrown
241
  // when and only when a memory allocation fails.
242
  int open(const char *file_name, const char *mode = "rb",
243
      std::size_t offset = 0, std::size_t size = 0);
244
  // save() writes the array of units into the specified file. `offset'
245
  // specifies the number of bytes to be skipped before writing the array.
246
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
247
  // non-zero value.
248
  int save(const char *file_name, const char *mode = "wb",
249
      std::size_t offset = 0) const;
250
251
  // The 1st exactMatchSearch() tests whether the given key exists or not, and
252
  // if it exists, its value and length are set to `result'. Otherwise, the
253
  // value and the length of `result' are set to -1 and 0 respectively.
254
  // Note that if `length' is 0, `key' is handled as a zero-terminated string.
255
  // `node_pos' specifies the start position of matching. This argument enables
256
  // the combination of exactMatchSearch() and traverse(). For example, if you
257
  // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get
258
  // the node position corresponding to "xyz" and then you can use
259
  // exactMatchSearch() to test "A", "BC", and "DE" from that position.
260
  // Note that the length of `result' indicates the length from the `node_pos'.
261
  // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }.
262
  template <class U>
263
  void exactMatchSearch(const key_type *key, U &result,
264
2.74M
      std::size_t length = 0, std::size_t node_pos = 0) const {
265
2.74M
    result = exactMatchSearch<U>(key, length, node_pos);
266
2.74M
  }
267
  // The 2nd exactMatchSearch() returns a result instead of updating the 2nd
268
  // argument. So, the following exactMatchSearch() has only 3 arguments.
269
  template <class U>
270
  inline U exactMatchSearch(const key_type *key, std::size_t length = 0,
271
      std::size_t node_pos = 0) const;
272
273
  // commonPrefixSearch() searches for keys which match a prefix of the given
274
  // string. If `length' is 0, `key' is handled as a zero-terminated string.
275
  // The values and the lengths of at most `max_num_results' matched keys are
276
  // stored in `results'. commonPrefixSearch() returns the number of matched
277
  // keys. Note that the return value can be larger than `max_num_results' if
278
  // there are more than `max_num_results' matches. If you want to get all the
279
  // results, allocate more spaces and call commonPrefixSearch() again.
280
  // `node_pos' works as well as in exactMatchSearch().
281
  template <class U>
282
  inline std::size_t commonPrefixSearch(const key_type *key, U *results,
283
      std::size_t max_num_results, std::size_t length = 0,
284
      std::size_t node_pos = 0) const;
285
286
  // In Darts-clone, a dictionary is a deterministic finite-state automaton
287
  // (DFA) and traverse() tests transitions on the DFA. The initial state is
288
  // `node_pos' and traverse() chooses transitions labeled key[key_pos],
289
  // key[key_pos + 1], ... in order. If there is not a transition labeled
290
  // key[key_pos + i], traverse() terminates the transitions at that state and
291
  // returns -2. Otherwise, traverse() ends without a termination and returns
292
  // -1 or a nonnegative value, -1 indicates that the final state was not an
293
  // accept state. When a nonnegative value is returned, it is the value
294
  // associated with the final accept state. That is, traverse() returns the
295
  // value associated with the given key if it exists. Note that traverse()
296
  // updates `node_pos' and `key_pos' after each transition.
297
  inline value_type traverse(const key_type *key, std::size_t &node_pos,
298
      std::size_t &key_pos, std::size_t length = 0) const;
299
300
  bool validate() const;
301
302
 private:
303
  typedef Details::uchar_type uchar_type;
304
  typedef Details::id_type id_type;
305
  typedef Details::DoubleArrayUnit unit_type;
306
307
  std::size_t size_;
308
  const unit_type *array_;
309
  unit_type *buf_;
310
311
  // Disallows copy and assignment.
312
  DoubleArrayImpl(const DoubleArrayImpl &);
313
  DoubleArrayImpl &operator=(const DoubleArrayImpl &);
314
};
315
316
// <DoubleArray> is the typical instance of <DoubleArrayImpl>. It uses <int>
317
// as the type of values and it is suitable for most cases.
318
typedef DoubleArrayImpl<void, void, int, void> DoubleArray;
319
320
// The interface section ends here. For using Darts-clone, there is no need
321
// to read the remaining section, which gives the implementation of
322
// Darts-clone.
323
324
//
325
// Member functions of DoubleArrayImpl (except build()).
326
//
327
328
template <typename A, typename B, typename T, typename C>
329
int DoubleArrayImpl<A, B, T, C>::open(const char *file_name,
330
    const char *mode, std::size_t offset, std::size_t size) {
331
#ifdef _MSC_VER
332
  std::FILE *file;
333
  if (::fopen_s(&file, file_name, mode) != 0) {
334
    return -1;
335
  }
336
#else
337
  std::FILE *file = std::fopen(file_name, mode);
338
  if (file == NULL) {
339
    return -1;
340
  }
341
#endif
342
343
  if (size == 0) {
344
    if (std::fseek(file, 0, SEEK_END) != 0) {
345
      std::fclose(file);
346
      return -1;
347
    }
348
    size = std::ftell(file) - offset;
349
  }
350
351
  size /= unit_size();
352
  if (size < 256 || (size & 0xFF) != 0) {
353
    std::fclose(file);
354
    return -1;
355
  }
356
357
  if (std::fseek(file, offset, SEEK_SET) != 0) {
358
    std::fclose(file);
359
    return -1;
360
  }
361
362
  unit_type units[256];
363
  if (std::fread(units, unit_size(), 256, file) != 256) {
364
    std::fclose(file);
365
    return -1;
366
  }
367
368
  if (units[0].label() != '\0' || units[0].has_leaf() ||
369
      units[0].offset() == 0 || units[0].offset() >= 512) {
370
    std::fclose(file);
371
    return -1;
372
  }
373
  for (id_type i = 1; i < 256; ++i) {
374
    if (units[i].label() <= 0xFF && units[i].offset() >= size) {
375
      std::fclose(file);
376
      return -1;
377
    }
378
  }
379
380
  unit_type *buf;
381
  try {
382
    buf = new unit_type[size];
383
    for (id_type i = 0; i < 256; ++i) {
384
      buf[i] = units[i];
385
    }
386
  } catch (const std::bad_alloc &) {
387
    std::fclose(file);
388
    DARTS_THROW("failed to open double-array: std::bad_alloc");
389
  }
390
391
  if (size > 256) {
392
    if (std::fread(buf + 256, unit_size(), size - 256, file) != size - 256) {
393
      std::fclose(file);
394
      delete[] buf;
395
      return -1;
396
    }
397
  }
398
  std::fclose(file);
399
400
  clear();
401
402
  size_ = size;
403
  array_ = buf;
404
  buf_ = buf;
405
  return 0;
406
}
407
408
template <typename A, typename B, typename T, typename C>
409
int DoubleArrayImpl<A, B, T, C>::save(const char *file_name,
410
    const char *mode, std::size_t) const {
411
  if (size() == 0) {
412
    return -1;
413
  }
414
415
#ifdef _MSC_VER
416
  std::FILE *file;
417
  if (::fopen_s(&file, file_name, mode) != 0) {
418
    return -1;
419
  }
420
#else
421
  std::FILE *file = std::fopen(file_name, mode);
422
  if (file == NULL) {
423
    return -1;
424
  }
425
#endif
426
427
  if (std::fwrite(array_, unit_size(), size(), file) != size()) {
428
    std::fclose(file);
429
    return -1;
430
  }
431
  std::fclose(file);
432
  return 0;
433
}
434
435
template <typename A, typename B, typename T, typename C>
436
20.2k
bool DoubleArrayImpl<A, B, T, C>::validate() const {
437
20.2k
  if (size_ == 0) {
438
0
    return true;
439
0
  }
440
907M
  for (std::size_t i = 0; i < size_; ++i) {
441
    // Leaf/value units store data in the offset field, not child pointers.
442
    // label() has MSB set (> 0xFF) for these units. Skip them, matching
443
    // the same pattern used in open().
444
907M
    if (array_[i].label() > 0xFF) {
445
302M
      continue;
446
302M
    }
447
605M
    const id_type offset = array_[i].offset();
448
605M
    if (offset == 0) {
449
3.88M
      continue;
450
3.88M
    }
451
601M
    const std::size_t base = i ^ offset;
452
601M
    if ((base | 0xFF) >= size_) {
453
0
      return false;
454
0
    }
455
601M
  }
456
20.2k
  return true;
457
20.2k
}
458
459
template <typename A, typename B, typename T, typename C>
460
template <typename U>
461
inline U DoubleArrayImpl<A, B, T, C>::exactMatchSearch(const key_type *key,
462
2.74M
    std::size_t length, std::size_t node_pos) const {
463
2.74M
  U result;
464
2.74M
  set_result(&result, static_cast<value_type>(-1), 0);
465
466
2.74M
  unit_type unit = array_[node_pos];
467
2.74M
  if (length != 0) {
468
7.08M
    for (std::size_t i = 0; i < length; ++i) {
469
4.66M
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[i]);
470
4.66M
      unit = array_[node_pos];
471
4.66M
      if (unit.label() != static_cast<uchar_type>(key[i])) {
472
327k
        return result;
473
327k
      }
474
4.66M
    }
475
2.74M
  } else {
476
375
    for ( ; key[length] != '\0'; ++length) {
477
2
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[length]);
478
2
      unit = array_[node_pos];
479
2
      if (unit.label() != static_cast<uchar_type>(key[length])) {
480
2
        return result;
481
2
      }
482
2
    }
483
375
  }
484
485
2.41M
  if (!unit.has_leaf()) {
486
871
    return result;
487
871
  }
488
2.41M
  unit = array_[node_pos ^ unit.offset()];
489
2.41M
  set_result(&result, static_cast<value_type>(unit.value()), length);
490
2.41M
  return result;
491
2.41M
}
492
493
template <typename A, typename B, typename T, typename C>
494
template <typename U>
495
inline std::size_t DoubleArrayImpl<A, B, T, C>::commonPrefixSearch(
496
    const key_type *key, U *results, std::size_t max_num_results,
497
170M
    std::size_t length, std::size_t node_pos) const {
498
170M
  std::size_t num_results = 0;
499
500
170M
  unit_type unit = array_[node_pos];
501
170M
  node_pos ^= unit.offset();
502
170M
  if (length != 0) {
503
868M
    for (std::size_t i = 0; i < length; ++i) {
504
847M
      node_pos ^= static_cast<uchar_type>(key[i]);
505
847M
      unit = array_[node_pos];
506
847M
      if (unit.label() != static_cast<uchar_type>(key[i])) {
507
149M
        return num_results;
508
149M
      }
509
510
698M
      node_pos ^= unit.offset();
511
698M
      if (unit.has_leaf()) {
512
253M
        if (num_results < max_num_results) {
513
253M
          set_result(&results[num_results], static_cast<value_type>(
514
253M
              array_[node_pos].value()), i + 1);
515
253M
        }
516
253M
        ++num_results;
517
253M
      }
518
698M
    }
519
170M
  } else {
520
0
    for ( ; key[length] != '\0'; ++length) {
521
0
      node_pos ^= static_cast<uchar_type>(key[length]);
522
0
      unit = array_[node_pos];
523
0
      if (unit.label() != static_cast<uchar_type>(key[length])) {
524
0
        return num_results;
525
0
      }
526
527
0
      node_pos ^= unit.offset();
528
0
      if (unit.has_leaf()) {
529
0
        if (num_results < max_num_results) {
530
0
          set_result(&results[num_results], static_cast<value_type>(
531
0
              array_[node_pos].value()), length + 1);
532
0
        }
533
0
        ++num_results;
534
0
      }
535
0
    }
536
0
  }
537
538
21.6M
  return num_results;
539
170M
}
540
541
template <typename A, typename B, typename T, typename C>
542
inline typename DoubleArrayImpl<A, B, T, C>::value_type
543
DoubleArrayImpl<A, B, T, C>::traverse(const key_type *key,
544
61.2M
    std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const {
545
61.2M
  id_type id = static_cast<id_type>(node_pos);
546
61.2M
  unit_type unit = array_[id];
547
548
61.2M
  if (length != 0) {
549
73.1M
    for ( ; key_pos < length; ++key_pos) {
550
61.2M
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
551
61.2M
      unit = array_[id];
552
61.2M
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
553
49.3M
        return static_cast<value_type>(-2);
554
49.3M
      }
555
11.9M
      node_pos = id;
556
11.9M
    }
557
61.2M
  } else {
558
0
    for ( ; key[key_pos] != '\0'; ++key_pos) {
559
0
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
560
0
      unit = array_[id];
561
0
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
562
0
        return static_cast<value_type>(-2);
563
0
      }
564
0
      node_pos = id;
565
0
    }
566
0
  }
567
568
11.9M
  if (!unit.has_leaf()) {
569
5.79M
    return static_cast<value_type>(-1);
570
5.79M
  }
571
6.12M
  unit = array_[id ^ unit.offset()];
572
6.12M
  return static_cast<value_type>(unit.value());
573
11.9M
}
574
575
namespace Details {
576
577
//
578
// Memory management of array.
579
//
580
581
template <typename T>
582
class AutoArray {
583
 public:
584
6.79M
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<char>::AutoArray(char*)
Line
Count
Source
584
6.43M
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::AutoArray(Darts::Details::DoubleArrayBuilderExtraUnit*)
Line
Count
Source
584
132k
  explicit AutoArray(T *array = NULL) : array_(array) {}
Darts::Details::AutoArray<unsigned int>::AutoArray(unsigned int*)
Line
Count
Source
584
233k
  explicit AutoArray(T *array = NULL) : array_(array) {}
585
6.79M
  ~AutoArray() {
586
6.79M
    clear();
587
6.79M
  }
Darts::Details::AutoArray<char>::~AutoArray()
Line
Count
Source
585
6.43M
  ~AutoArray() {
586
6.43M
    clear();
587
6.43M
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::~AutoArray()
Line
Count
Source
585
132k
  ~AutoArray() {
586
132k
    clear();
587
132k
  }
Darts::Details::AutoArray<unsigned int>::~AutoArray()
Line
Count
Source
585
233k
  ~AutoArray() {
586
233k
    clear();
587
233k
  }
588
589
1.56G
  const T &operator[](std::size_t id) const {
590
1.56G
    return array_[id];
591
1.56G
  }
Darts::Details::AutoArray<char>::operator[](unsigned long) const
Line
Count
Source
589
1.07G
  const T &operator[](std::size_t id) const {
590
1.07G
    return array_[id];
591
1.07G
  }
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::operator[](unsigned long) const
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long) const
Line
Count
Source
589
489M
  const T &operator[](std::size_t id) const {
590
489M
    return array_[id];
591
489M
  }
592
2.17G
  T &operator[](std::size_t id) {
593
2.17G
    return array_[id];
594
2.17G
  }
Darts::Details::AutoArray<char>::operator[](unsigned long)
Line
Count
Source
592
1.62G
  T &operator[](std::size_t id) {
593
1.62G
    return array_[id];
594
1.62G
  }
Darts::Details::AutoArray<unsigned int>::operator[](unsigned long)
Line
Count
Source
592
1.12M
  T &operator[](std::size_t id) {
593
1.12M
    return array_[id];
594
1.12M
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long)
Line
Count
Source
592
554M
  T &operator[](std::size_t id) {
593
554M
    return array_[id];
594
554M
  }
595
596
  bool empty() const {
597
    return array_ == NULL;
598
  }
599
600
9.12M
  void clear() {
601
9.12M
    if (array_ != NULL) {
602
3.13M
      delete[] array_;
603
3.13M
      array_ = NULL;
604
3.13M
    }
605
9.12M
  }
Darts::Details::AutoArray<char>::clear()
Line
Count
Source
600
8.33M
  void clear() {
601
8.33M
    if (array_ != NULL) {
602
2.95M
      delete[] array_;
603
      array_ = NULL;
604
2.95M
    }
605
8.33M
  }
Darts::Details::AutoArray<unsigned int>::clear()
Line
Count
Source
600
522k
  void clear() {
601
522k
    if (array_ != NULL) {
602
111k
      delete[] array_;
603
      array_ = NULL;
604
111k
    }
605
522k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::clear()
Line
Count
Source
600
264k
  void clear() {
601
264k
    if (array_ != NULL) {
602
66.2k
      delete[] array_;
603
      array_ = NULL;
604
66.2k
    }
605
264k
  }
606
6.08M
  void swap(AutoArray *array) {
607
6.08M
    T *temp = array_;
608
6.08M
    array_ = array->array_;
609
6.08M
    array->array_ = temp;
610
6.08M
  }
Darts::Details::AutoArray<char>::swap(Darts::Details::AutoArray<char>*)
Line
Count
Source
606
5.90M
  void swap(AutoArray *array) {
607
5.90M
    T *temp = array_;
608
5.90M
    array_ = array->array_;
609
5.90M
    array->array_ = temp;
610
5.90M
  }
Darts::Details::AutoArray<unsigned int>::swap(Darts::Details::AutoArray<unsigned int>*)
Line
Count
Source
606
111k
  void swap(AutoArray *array) {
607
111k
    T *temp = array_;
608
111k
    array_ = array->array_;
609
111k
    array->array_ = temp;
610
111k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::swap(Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>*)
Line
Count
Source
606
66.2k
  void swap(AutoArray *array) {
607
66.2k
    T *temp = array_;
608
66.2k
    array_ = array->array_;
609
66.2k
    array->array_ = temp;
610
66.2k
  }
611
3.13M
  void reset(T *array = NULL) {
612
3.13M
    AutoArray(array).swap(this);
613
3.13M
  }
Darts::Details::AutoArray<char>::reset(char*)
Line
Count
Source
611
2.95M
  void reset(T *array = NULL) {
612
2.95M
    AutoArray(array).swap(this);
613
2.95M
  }
Darts::Details::AutoArray<unsigned int>::reset(unsigned int*)
Line
Count
Source
611
111k
  void reset(T *array = NULL) {
612
111k
    AutoArray(array).swap(this);
613
111k
  }
Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::reset(Darts::Details::DoubleArrayBuilderExtraUnit*)
Line
Count
Source
611
66.2k
  void reset(T *array = NULL) {
612
66.2k
    AutoArray(array).swap(this);
613
66.2k
  }
614
615
 private:
616
  T *array_;
617
618
  // Disallows copy and assignment.
619
  AutoArray(const AutoArray &);
620
  AutoArray &operator=(const AutoArray &);
621
};
622
623
//
624
// Memory management of resizable array.
625
//
626
627
template <typename T>
628
class AutoPool {
629
 public:
630
522k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::AutoPool()
Line
Count
Source
630
66.2k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<unsigned char>::AutoPool()
Line
Count
Source
630
121k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DawgNode>::AutoPool()
Line
Count
Source
630
55.6k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<Darts::Details::DawgUnit>::AutoPool()
Line
Count
Source
630
55.6k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Darts::Details::AutoPool<unsigned int>::AutoPool()
Line
Count
Source
630
222k
  AutoPool() : buf_(), size_(0), capacity_(0) {}
631
522k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<unsigned char>::~AutoPool()
Line
Count
Source
631
121k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::~AutoPool()
Line
Count
Source
631
66.2k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<unsigned int>::~AutoPool()
Line
Count
Source
631
222k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::~AutoPool()
Line
Count
Source
631
55.6k
  ~AutoPool() { clear(); }
Darts::Details::AutoPool<Darts::Details::DawgNode>::~AutoPool()
Line
Count
Source
631
55.6k
  ~AutoPool() { clear(); }
632
633
1.07G
  const T &operator[](std::size_t id) const {
634
1.07G
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
1.07G
  }
Darts::Details::AutoPool<unsigned int>::operator[](unsigned long) const
Line
Count
Source
633
139M
  const T &operator[](std::size_t id) const {
634
139M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
139M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long) const
Line
Count
Source
633
304M
  const T &operator[](std::size_t id) const {
634
304M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
304M
  }
Darts::Details::AutoPool<unsigned char>::operator[](unsigned long) const
Line
Count
Source
633
413M
  const T &operator[](std::size_t id) const {
634
413M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
413M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long) const
Line
Count
Source
633
170M
  const T &operator[](std::size_t id) const {
634
170M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
170M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long) const
Line
Count
Source
633
49.8M
  const T &operator[](std::size_t id) const {
634
49.8M
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
635
49.8M
  }
636
1.61G
  T &operator[](std::size_t id) {
637
1.61G
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
1.61G
  }
Darts::Details::AutoPool<unsigned int>::operator[](unsigned long)
Line
Count
Source
636
518M
  T &operator[](std::size_t id) {
637
518M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
518M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long)
Line
Count
Source
636
529M
  T &operator[](std::size_t id) {
637
529M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
529M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long)
Line
Count
Source
636
122M
  T &operator[](std::size_t id) {
637
122M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
122M
  }
Darts::Details::AutoPool<unsigned char>::operator[](unsigned long)
Line
Count
Source
636
261M
  T &operator[](std::size_t id) {
637
261M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
261M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long)
Line
Count
Source
636
185M
  T &operator[](std::size_t id) {
637
185M
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
638
185M
  }
639
640
35.5M
  bool empty() const {
641
35.5M
    return size_ == 0;
642
35.5M
  }
Darts::Details::AutoPool<unsigned int>::empty() const
Line
Count
Source
640
35.2M
  bool empty() const {
641
35.2M
    return size_ == 0;
642
35.2M
  }
Darts::Details::AutoPool<unsigned char>::empty() const
Line
Count
Source
640
279k
  bool empty() const {
641
279k
    return size_ == 0;
642
279k
  }
643
559M
  std::size_t size() const {
644
559M
    return size_;
645
559M
  }
Darts::Details::AutoPool<unsigned int>::size() const
Line
Count
Source
643
261M
  std::size_t size() const {
644
261M
    return size_;
645
261M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::size() const
Line
Count
Source
643
22.7M
  std::size_t size() const {
644
22.7M
    return size_;
645
22.7M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::size() const
Line
Count
Source
643
3.57M
  std::size_t size() const {
644
3.57M
    return size_;
645
3.57M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::size() const
Line
Count
Source
643
127M
  std::size_t size() const {
644
127M
    return size_;
645
127M
  }
Darts::Details::AutoPool<unsigned char>::size() const
Line
Count
Source
643
143M
  std::size_t size() const {
644
143M
    return size_;
645
143M
  }
646
647
1.90M
  void clear() {
648
1.90M
    resize(0);
649
1.90M
    buf_.clear();
650
1.90M
    size_ = 0;
651
1.90M
    capacity_ = 0;
652
1.90M
  }
Darts::Details::AutoPool<unsigned int>::clear()
Line
Count
Source
647
1.01M
  void clear() {
648
1.01M
    resize(0);
649
1.01M
    buf_.clear();
650
1.01M
    size_ = 0;
651
1.01M
    capacity_ = 0;
652
1.01M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::clear()
Line
Count
Source
647
222k
  void clear() {
648
222k
    resize(0);
649
222k
    buf_.clear();
650
222k
    size_ = 0;
651
222k
    capacity_ = 0;
652
222k
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::clear()
Line
Count
Source
647
167k
  void clear() {
648
167k
    resize(0);
649
167k
    buf_.clear();
650
167k
    size_ = 0;
651
167k
    capacity_ = 0;
652
167k
  }
Darts::Details::AutoPool<unsigned char>::clear()
Line
Count
Source
647
365k
  void clear() {
648
365k
    resize(0);
649
365k
    buf_.clear();
650
365k
    size_ = 0;
651
365k
    capacity_ = 0;
652
365k
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::clear()
Line
Count
Source
647
132k
  void clear() {
648
132k
    resize(0);
649
132k
    buf_.clear();
650
132k
    size_ = 0;
651
132k
    capacity_ = 0;
652
132k
  }
653
654
70.4M
  void push_back(const T &value) {
655
70.4M
    append(value);
656
70.4M
  }
657
66.9M
  void pop_back() {
658
66.9M
    (*this)[--size_].~T();
659
66.9M
  }
660
661
74.0M
  void append() {
662
74.0M
    if (size_ == capacity_)
663
1.45M
      resize_buf(size_ + 1);
664
74.0M
    new(&(*this)[size_++]) T;
665
74.0M
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::append()
Line
Count
Source
661
35.2M
  void append() {
662
35.2M
    if (size_ == capacity_)
663
529k
      resize_buf(size_ + 1);
664
35.2M
    new(&(*this)[size_++]) T;
665
35.2M
  }
Darts::Details::AutoPool<unsigned char>::append()
Line
Count
Source
661
35.2M
  void append() {
662
35.2M
    if (size_ == capacity_)
663
529k
      resize_buf(size_ + 1);
664
35.2M
    new(&(*this)[size_++]) T;
665
35.2M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::append()
Line
Count
Source
661
3.57M
  void append() {
662
3.57M
    if (size_ == capacity_)
663
393k
      resize_buf(size_ + 1);
664
3.57M
    new(&(*this)[size_++]) T;
665
3.57M
  }
666
106M
  void append(const T &value) {
667
106M
    if (size_ == capacity_)
668
1.32M
      resize_buf(size_ + 1);
669
106M
    new(&(*this)[size_++]) T(value);
670
106M
  }
Darts::Details::AutoPool<unsigned int>::append(unsigned int const&)
Line
Count
Source
666
71.5M
  void append(const T &value) {
667
71.5M
    if (size_ == capacity_)
668
978k
      resize_buf(size_ + 1);
669
71.5M
    new(&(*this)[size_++]) T(value);
670
71.5M
  }
Darts::Details::AutoPool<unsigned char>::append(unsigned char const&)
Line
Count
Source
666
35.3M
  void append(const T &value) {
667
35.3M
    if (size_ == capacity_)
668
349k
      resize_buf(size_ + 1);
669
35.3M
    new(&(*this)[size_++]) T(value);
670
35.3M
  }
671
672
29.2M
  void resize(std::size_t size) {
673
297M
    while (size_ > size) {
674
268M
      (*this)[--size_].~T();
675
268M
    }
676
29.2M
    if (size > capacity_) {
677
38.2k
      resize_buf(size);
678
38.2k
    }
679
79.0M
    while (size_ < size) {
680
49.8M
      new(&(*this)[size_++]) T;
681
49.8M
    }
682
29.2M
  }
Darts::Details::AutoPool<unsigned int>::resize(unsigned long)
Line
Count
Source
672
1.01M
  void resize(std::size_t size) {
673
110M
    while (size_ > size) {
674
109M
      (*this)[--size_].~T();
675
109M
    }
676
1.01M
    if (size > capacity_) {
677
0
      resize_buf(size);
678
0
    }
679
1.01M
    while (size_ < size) {
680
0
      new(&(*this)[size_++]) T;
681
0
    }
682
1.01M
  }
Darts::Details::AutoPool<Darts::Details::DawgNode>::resize(unsigned long)
Line
Count
Source
672
222k
  void resize(std::size_t size) {
673
3.79M
    while (size_ > size) {
674
3.57M
      (*this)[--size_].~T();
675
3.57M
    }
676
222k
    if (size > capacity_) {
677
0
      resize_buf(size);
678
0
    }
679
222k
    while (size_ < size) {
680
0
      new(&(*this)[size_++]) T;
681
0
    }
682
222k
  }
Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize(unsigned long)
Line
Count
Source
672
167k
  void resize(std::size_t size) {
673
35.4M
    while (size_ > size) {
674
35.2M
      (*this)[--size_].~T();
675
35.2M
    }
676
167k
    if (size > capacity_) {
677
0
      resize_buf(size);
678
0
    }
679
167k
    while (size_ < size) {
680
0
      new(&(*this)[size_++]) T;
681
0
    }
682
167k
  }
Darts::Details::AutoPool<unsigned char>::resize(unsigned long)
Line
Count
Source
672
27.4M
  void resize(std::size_t size) {
673
98.0M
    while (size_ > size) {
674
70.6M
      (*this)[--size_].~T();
675
70.6M
    }
676
27.4M
    if (size > capacity_) {
677
0
      resize_buf(size);
678
0
    }
679
27.4M
    while (size_ < size) {
680
0
      new(&(*this)[size_++]) T;
681
0
    }
682
27.4M
  }
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize(unsigned long)
Line
Count
Source
672
327k
  void resize(std::size_t size) {
673
50.1M
    while (size_ > size) {
674
49.8M
      (*this)[--size_].~T();
675
49.8M
    }
676
327k
    if (size > capacity_) {
677
38.2k
      resize_buf(size);
678
38.2k
    }
679
50.1M
    while (size_ < size) {
680
49.8M
      new(&(*this)[size_++]) T;
681
49.8M
    }
682
327k
  }
683
70.8k
  void resize(std::size_t size, const T &value) {
684
70.8k
    while (size_ > size) {
685
0
      (*this)[--size_].~T();
686
0
    }
687
70.8k
    if (size > capacity_) {
688
70.8k
      resize_buf(size);
689
70.8k
    }
690
104M
    while (size_ < size) {
691
104M
      new(&(*this)[size_++]) T(value);
692
104M
    }
693
70.8k
  }
694
695
66.2k
  void reserve(std::size_t size) {
696
66.2k
    if (size > capacity_) {
697
66.2k
      resize_buf(size);
698
66.2k
    }
699
66.2k
  }
700
701
 private:
702
  AutoArray<char> buf_;
703
  std::size_t size_;
704
  std::size_t capacity_;
705
706
  // Disallows copy and assignment.
707
  AutoPool(const AutoPool &);
708
  AutoPool &operator=(const AutoPool &);
709
710
  void resize_buf(std::size_t size);
711
};
712
713
template <typename T>
714
2.95M
void AutoPool<T>::resize_buf(std::size_t size) {
715
2.95M
  std::size_t capacity;
716
2.95M
  if (size >= capacity_ * 2) {
717
965k
    capacity = size;
718
1.98M
  } else {
719
1.98M
    capacity = 1;
720
11.1M
    while (capacity < size) {
721
9.13M
      capacity <<= 1;
722
9.13M
    }
723
1.98M
  }
724
725
2.95M
  AutoArray<char> buf;
726
2.95M
  try {
727
2.95M
    buf.reset(new char[sizeof(T) * capacity]);
728
2.95M
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
2.95M
  if (size_ > 0) {
733
2.38M
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
2.38M
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
124M
    for (std::size_t i = 0; i < size_; ++i) {
736
122M
      new(&dest[i]) T(src[i]);
737
122M
      src[i].~T();
738
122M
    }
739
2.38M
  }
740
741
2.95M
  buf_.swap(&buf);
742
2.95M
  capacity_ = capacity;
743
2.95M
}
Darts::Details::AutoPool<unsigned int>::resize_buf(unsigned long)
Line
Count
Source
714
1.04M
void AutoPool<T>::resize_buf(std::size_t size) {
715
1.04M
  std::size_t capacity;
716
1.04M
  if (size >= capacity_ * 2) {
717
399k
    capacity = size;
718
649k
  } else {
719
649k
    capacity = 1;
720
3.13M
    while (capacity < size) {
721
2.48M
      capacity <<= 1;
722
2.48M
    }
723
649k
  }
724
725
1.04M
  AutoArray<char> buf;
726
1.04M
  try {
727
1.04M
    buf.reset(new char[sizeof(T) * capacity]);
728
1.04M
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
1.04M
  if (size_ > 0) {
733
811k
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
811k
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
9.44M
    for (std::size_t i = 0; i < size_; ++i) {
736
8.62M
      new(&dest[i]) T(src[i]);
737
8.62M
      src[i].~T();
738
8.62M
    }
739
811k
  }
740
741
1.04M
  buf_.swap(&buf);
742
1.04M
  capacity_ = capacity;
743
1.04M
}
Darts::Details::AutoPool<Darts::Details::DawgNode>::resize_buf(unsigned long)
Line
Count
Source
714
393k
void AutoPool<T>::resize_buf(std::size_t size) {
715
393k
  std::size_t capacity;
716
393k
  if (size >= capacity_ * 2) {
717
111k
    capacity = size;
718
282k
  } else {
719
282k
    capacity = 1;
720
1.46M
    while (capacity < size) {
721
1.18M
      capacity <<= 1;
722
1.18M
    }
723
282k
  }
724
725
393k
  AutoArray<char> buf;
726
393k
  try {
727
393k
    buf.reset(new char[sizeof(T) * capacity]);
728
393k
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
393k
  if (size_ > 0) {
733
337k
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
337k
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
5.36M
    for (std::size_t i = 0; i < size_; ++i) {
736
5.02M
      new(&dest[i]) T(src[i]);
737
5.02M
      src[i].~T();
738
5.02M
    }
739
337k
  }
740
741
393k
  buf_.swap(&buf);
742
393k
  capacity_ = capacity;
743
393k
}
Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize_buf(unsigned long)
Line
Count
Source
714
529k
void AutoPool<T>::resize_buf(std::size_t size) {
715
529k
  std::size_t capacity;
716
529k
  if (size >= capacity_ * 2) {
717
111k
    capacity = size;
718
417k
  } else {
719
417k
    capacity = 1;
720
2.73M
    while (capacity < size) {
721
2.32M
      capacity <<= 1;
722
2.32M
    }
723
417k
  }
724
725
529k
  AutoArray<char> buf;
726
529k
  try {
727
529k
    buf.reset(new char[sizeof(T) * capacity]);
728
529k
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
529k
  if (size_ > 0) {
733
473k
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
473k
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
50.9M
    for (std::size_t i = 0; i < size_; ++i) {
736
50.5M
      new(&dest[i]) T(src[i]);
737
50.5M
      src[i].~T();
738
50.5M
    }
739
473k
  }
740
741
529k
  buf_.swap(&buf);
742
529k
  capacity_ = capacity;
743
529k
}
Darts::Details::AutoPool<unsigned char>::resize_buf(unsigned long)
Line
Count
Source
714
878k
void AutoPool<T>::resize_buf(std::size_t size) {
715
878k
  std::size_t capacity;
716
878k
  if (size >= capacity_ * 2) {
717
242k
    capacity = size;
718
635k
  } else {
719
635k
    capacity = 1;
720
3.73M
    while (capacity < size) {
721
3.10M
      capacity <<= 1;
722
3.10M
    }
723
635k
  }
724
725
878k
  AutoArray<char> buf;
726
878k
  try {
727
878k
    buf.reset(new char[sizeof(T) * capacity]);
728
878k
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
878k
  if (size_ > 0) {
733
756k
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
756k
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
53.5M
    for (std::size_t i = 0; i < size_; ++i) {
736
52.7M
      new(&dest[i]) T(src[i]);
737
52.7M
      src[i].~T();
738
52.7M
    }
739
756k
  }
740
741
878k
  buf_.swap(&buf);
742
878k
  capacity_ = capacity;
743
878k
}
Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize_buf(unsigned long)
Line
Count
Source
714
104k
void AutoPool<T>::resize_buf(std::size_t size) {
715
104k
  std::size_t capacity;
716
104k
  if (size >= capacity_ * 2) {
717
99.9k
    capacity = size;
718
99.9k
  } else {
719
4.58k
    capacity = 1;
720
52.5k
    while (capacity < size) {
721
48.0k
      capacity <<= 1;
722
48.0k
    }
723
4.58k
  }
724
725
104k
  AutoArray<char> buf;
726
104k
  try {
727
104k
    buf.reset(new char[sizeof(T) * capacity]);
728
104k
  } catch (const std::bad_alloc &) {
729
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
730
0
  }
731
732
104k
  if (size_ > 0) {
733
10.4k
    T *src = reinterpret_cast<T *>(&buf_[0]);
734
10.4k
    T *dest = reinterpret_cast<T *>(&buf[0]);
735
5.40M
    for (std::size_t i = 0; i < size_; ++i) {
736
5.39M
      new(&dest[i]) T(src[i]);
737
5.39M
      src[i].~T();
738
5.39M
    }
739
10.4k
  }
740
741
104k
  buf_.swap(&buf);
742
104k
  capacity_ = capacity;
743
104k
}
744
745
//
746
// Memory management of stack.
747
//
748
749
template <typename T>
750
class AutoStack {
751
 public:
752
111k
  AutoStack() : pool_() {}
753
111k
  ~AutoStack() {
754
111k
    clear();
755
111k
  }
756
757
  const T &top() const {
758
    return pool_[size() - 1];
759
  }
760
120M
  T &top() {
761
120M
    return pool_[size() - 1];
762
120M
  }
763
764
35.2M
  bool empty() const {
765
35.2M
    return pool_.empty();
766
35.2M
  }
767
120M
  std::size_t size() const {
768
120M
    return pool_.size();
769
120M
  }
770
771
70.4M
  void push(const T &value) {
772
70.4M
    pool_.push_back(value);
773
70.4M
  }
774
66.9M
  void pop() {
775
66.9M
    pool_.pop_back();
776
66.9M
  }
777
778
445k
  void clear() {
779
445k
    pool_.clear();
780
445k
  }
781
782
 private:
783
  AutoPool<T> pool_;
784
785
  // Disallows copy and assignment.
786
  AutoStack(const AutoStack &);
787
  AutoStack &operator=(const AutoStack &);
788
};
789
790
//
791
// Succinct bit vector.
792
//
793
794
class BitVector {
795
 public:
796
55.6k
  BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {}
797
55.6k
  ~BitVector() {
798
55.6k
    clear();
799
55.6k
  }
800
801
53.9M
  bool operator[](std::size_t id) const {
802
53.9M
    return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1;
803
53.9M
  }
804
805
0
  id_type rank(std::size_t id) const {
806
0
    std::size_t unit_id = id / UNIT_SIZE;
807
0
    return ranks_[unit_id] + pop_count(units_[unit_id]
808
0
        & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1)));
809
0
  }
810
811
0
  void set(std::size_t id, bool bit) {
812
0
    if (bit) {
813
0
      units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE);
814
0
    } else {
815
0
      units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE));
816
0
    }
817
0
  }
818
819
0
  bool empty() const {
820
0
    return units_.empty();
821
0
  }
822
111k
  std::size_t num_ones() const {
823
111k
    return num_ones_;
824
111k
  }
825
35.2M
  std::size_t size() const {
826
35.2M
    return size_;
827
35.2M
  }
828
829
35.2M
  void append() {
830
35.2M
    if ((size_ % UNIT_SIZE) == 0) {
831
1.12M
      units_.append(0);
832
1.12M
    }
833
35.2M
    ++size_;
834
35.2M
  }
835
  void build();
836
837
167k
  void clear() {
838
167k
    units_.clear();
839
167k
    ranks_.clear();
840
167k
  }
841
842
 private:
843
  enum { UNIT_SIZE = sizeof(id_type) * 8 };
844
845
  AutoPool<id_type> units_;
846
  AutoArray<id_type> ranks_;
847
  std::size_t num_ones_;
848
  std::size_t size_;
849
850
  // Disallows copy and assignment.
851
  BitVector(const BitVector &);
852
  BitVector &operator=(const BitVector &);
853
854
1.12M
  static id_type pop_count(id_type unit) {
855
1.12M
    unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555);
856
1.12M
    unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333);
857
1.12M
    unit = ((unit >> 4) + unit) & 0x0F0F0F0F;
858
1.12M
    unit += unit >> 8;
859
1.12M
    unit += unit >> 16;
860
1.12M
    return unit & 0xFF;
861
1.12M
  }
862
};
863
864
55.6k
inline void BitVector::build() {
865
55.6k
  try {
866
55.6k
    ranks_.reset(new id_type[units_.size()]);
867
55.6k
  } catch (const std::bad_alloc &) {
868
0
    DARTS_THROW("failed to build rank index: std::bad_alloc");
869
0
  }
870
871
55.6k
  num_ones_ = 0;
872
1.18M
  for (std::size_t i = 0; i < units_.size(); ++i) {
873
1.12M
    ranks_[i] = num_ones_;
874
1.12M
    num_ones_ += pop_count(units_[i]);
875
1.12M
  }
876
55.6k
}
877
878
//
879
// Keyset.
880
//
881
882
template <typename T>
883
class Keyset {
884
 public:
885
  Keyset(std::size_t num_keys, const char_type * const *keys,
886
      const std::size_t *lengths, const T *values) :
887
66.2k
      num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {}
888
889
8.39M
  std::size_t num_keys() const {
890
8.39M
    return num_keys_;
891
8.39M
  }
892
8.28M
  const char_type *keys(std::size_t id) const {
893
8.28M
    return keys_[id];
894
8.28M
  }
895
699k
  uchar_type keys(std::size_t key_id, std::size_t char_id) const {
896
699k
    if (has_lengths() && char_id >= lengths_[key_id])
897
97.6k
      return '\0';
898
602k
    return keys_[key_id][char_id];
899
699k
  }
900
901
9.08M
  bool has_lengths() const {
902
9.08M
    return lengths_ != NULL;
903
9.08M
  }
904
8.33M
  std::size_t lengths(std::size_t id) const {
905
8.33M
    if (has_lengths()) {
906
8.33M
      return lengths_[id];
907
8.33M
    }
908
0
    std::size_t length = 0;
909
0
    while (keys_[id][length] != '\0') {
910
0
      ++length;
911
0
    }
912
0
    return length;
913
8.33M
  }
914
915
8.44M
  bool has_values() const {
916
8.44M
    return values_ != NULL;
917
8.44M
  }
918
8.38M
  const value_type values(std::size_t id) const {
919
8.38M
    if (has_values()) {
920
8.28M
      return static_cast<value_type>(values_[id]);
921
8.28M
    }
922
97.6k
    return static_cast<value_type>(id);
923
8.38M
  }
924
925
 private:
926
  std::size_t num_keys_;
927
  const char_type * const * keys_;
928
  const std::size_t *lengths_;
929
  const T *values_;
930
931
  // Disallows copy and assignment.
932
  Keyset(const Keyset &);
933
  Keyset &operator=(const Keyset &);
934
};
935
936
//
937
// Node of Directed Acyclic Word Graph (DAWG).
938
//
939
940
class DawgNode {
941
 public:
942
35.2M
  DawgNode() : child_(0), sibling_(0), label_('\0'),
943
35.2M
    is_state_(false), has_sibling_(false) {}
944
945
62.1M
  void set_child(id_type child) {
946
62.1M
    child_ = child;
947
62.1M
  }
948
35.1M
  void set_sibling(id_type sibling) {
949
35.1M
    sibling_ = sibling;
950
35.1M
  }
951
8.28M
  void set_value(value_type value) {
952
8.28M
    child_ = value;
953
8.28M
  }
954
35.2M
  void set_label(uchar_type label) {
955
35.2M
    label_ = label;
956
35.2M
  }
957
26.9M
  void set_is_state(bool is_state) {
958
26.9M
    is_state_ = is_state;
959
26.9M
  }
960
8.22M
  void set_has_sibling(bool has_sibling) {
961
8.22M
    has_sibling_ = has_sibling;
962
8.22M
  }
963
964
104M
  id_type child() const {
965
104M
    return child_;
966
104M
  }
967
177M
  id_type sibling() const {
968
177M
    return sibling_;
969
177M
  }
970
0
  value_type value() const {
971
0
    return static_cast<value_type>(child_);
972
0
  }
973
104M
  uchar_type label() const {
974
104M
    return label_;
975
104M
  }
976
0
  bool is_state() const {
977
0
    return is_state_;
978
0
  }
979
0
  bool has_sibling() const {
980
0
    return has_sibling_;
981
0
  }
982
983
98.8M
  id_type unit() const {
984
98.8M
    if (label_ == '\0') {
985
22.1M
      return (child_ << 1) | (has_sibling_ ? 1 : 0);
986
22.1M
    }
987
76.7M
    return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0);
988
98.8M
  }
989
990
 private:
991
  id_type child_;
992
  id_type sibling_;
993
  uchar_type label_;
994
  bool is_state_;
995
  bool has_sibling_;
996
997
  // Copyable.
998
};
999
1000
//
1001
// Fixed unit of Directed Acyclic Word Graph (DAWG).
1002
//
1003
1004
class DawgUnit {
1005
 public:
1006
35.2M
  explicit DawgUnit(id_type unit = 0) : unit_(unit) {}
1007
50.5M
  DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {}
1008
1009
35.2M
  DawgUnit &operator=(id_type unit) {
1010
35.2M
    unit_ = unit;
1011
35.2M
    return *this;
1012
35.2M
  }
1013
1014
50.6M
  id_type unit() const {
1015
50.6M
    return unit_;
1016
50.6M
  }
1017
1018
80.9M
  id_type child() const {
1019
80.9M
    return unit_ >> 2;
1020
80.9M
  }
1021
164M
  bool has_sibling() const {
1022
164M
    return (unit_ & 1) == 1;
1023
164M
  }
1024
8.28M
  value_type value() const {
1025
8.28M
    return static_cast<value_type>(unit_ >> 1);
1026
8.28M
  }
1027
17.0M
  bool is_state() const {
1028
17.0M
    return (unit_ & 2) == 2;
1029
17.0M
  }
1030
1031
 private:
1032
  id_type unit_;
1033
1034
  // Copyable.
1035
};
1036
1037
//
1038
// Directed Acyclic Word Graph (DAWG) builder.
1039
//
1040
1041
class DawgBuilder {
1042
 public:
1043
55.6k
  DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(),
1044
55.6k
    table_(), node_stack_(), recycle_bin_(), num_states_(0) {}
1045
55.6k
  ~DawgBuilder() {
1046
55.6k
    clear();
1047
55.6k
  }
1048
1049
111k
  id_type root() const {
1050
111k
    return 0;
1051
111k
  }
1052
1053
80.9M
  id_type child(id_type id) const {
1054
80.9M
    return units_[id].child();
1055
80.9M
  }
1056
105M
  id_type sibling(id_type id) const {
1057
105M
    return units_[id].has_sibling() ? (id + 1) : 0;
1058
105M
  }
1059
8.28M
  int value(id_type id) const {
1060
8.28M
    return units_[id].value();
1061
8.28M
  }
1062
1063
35.1M
  bool is_leaf(id_type id) const {
1064
35.1M
    return label(id) == '\0';
1065
35.1M
  }
1066
105M
  uchar_type label(id_type id) const {
1067
105M
    return labels_[id];
1068
105M
  }
1069
1070
53.9M
  bool is_intersection(id_type id) const {
1071
53.9M
    return is_intersections_[id];
1072
53.9M
  }
1073
0
  id_type intersection_id(id_type id) const {
1074
0
    return is_intersections_.rank(id) - 1;
1075
0
  }
1076
1077
111k
  std::size_t num_intersections() const {
1078
111k
    return is_intersections_.num_ones();
1079
111k
  }
1080
1081
529k
  std::size_t size() const {
1082
529k
    return units_.size();
1083
529k
  }
1084
1085
  void init();
1086
  void finish();
1087
1088
  void insert(const char *key, std::size_t length, value_type value);
1089
1090
  void clear();
1091
1092
 private:
1093
  enum { INITIAL_TABLE_SIZE = 1 << 10 };
1094
1095
  AutoPool<DawgNode> nodes_;
1096
  AutoPool<DawgUnit> units_;
1097
  AutoPool<uchar_type> labels_;
1098
  BitVector is_intersections_;
1099
  AutoPool<id_type> table_;
1100
  AutoStack<id_type> node_stack_;
1101
  AutoStack<id_type> recycle_bin_;
1102
  std::size_t num_states_;
1103
1104
  // Disallows copy and assignment.
1105
  DawgBuilder(const DawgBuilder &);
1106
  DawgBuilder &operator=(const DawgBuilder &);
1107
1108
  void flush(id_type id);
1109
1110
  void expand_table();
1111
1112
  id_type find_unit(id_type id, id_type *hash_id) const;
1113
  id_type find_node(id_type node_id, id_type *hash_id) const;
1114
1115
  bool are_equal(id_type node_id, id_type unit_id) const;
1116
1117
  id_type hash_unit(id_type id) const;
1118
  id_type hash_node(id_type id) const;
1119
1120
  id_type append_node();
1121
  id_type append_unit();
1122
1123
35.1M
  void free_node(id_type id) {
1124
35.1M
    recycle_bin_.push(id);
1125
35.1M
  }
1126
1127
57.3M
  static id_type hash(id_type key) {
1128
57.3M
    key = ~key + (key << 15);  // key = (key << 15) - key - 1;
1129
57.3M
    key = key ^ (key >> 12);
1130
57.3M
    key = key + (key << 2);
1131
57.3M
    key = key ^ (key >> 4);
1132
57.3M
    key = key * 2057;  // key = (key + (key << 3)) + (key << 11);
1133
57.3M
    key = key ^ (key >> 16);
1134
57.3M
    return key;
1135
57.3M
  }
1136
};
1137
1138
55.6k
inline void DawgBuilder::init() {
1139
55.6k
  table_.resize(INITIAL_TABLE_SIZE, 0);
1140
1141
55.6k
  append_node();
1142
55.6k
  append_unit();
1143
1144
55.6k
  num_states_ = 1;
1145
1146
55.6k
  nodes_[0].set_label(0xFF);
1147
55.6k
  node_stack_.push(0);
1148
55.6k
}
1149
1150
55.6k
inline void DawgBuilder::finish() {
1151
55.6k
  flush(0);
1152
1153
55.6k
  units_[0] = nodes_[0].unit();
1154
55.6k
  labels_[0] = nodes_[0].label();
1155
1156
55.6k
  nodes_.clear();
1157
55.6k
  table_.clear();
1158
55.6k
  node_stack_.clear();
1159
55.6k
  recycle_bin_.clear();
1160
1161
55.6k
  is_intersections_.build();
1162
55.6k
}
1163
1164
inline void DawgBuilder::insert(const char *key, std::size_t length,
1165
8.28M
    value_type value) {
1166
8.28M
  if (value < 0) {
1167
0
    DARTS_THROW("failed to insert key: negative value");
1168
8.28M
  } else if (length == 0) {
1169
0
    DARTS_THROW("failed to insert key: zero-length key");
1170
0
  }
1171
1172
8.28M
  id_type id = 0;
1173
8.28M
  std::size_t key_pos = 0;
1174
1175
34.1M
  for ( ; key_pos <= length; ++key_pos) {
1176
34.1M
    id_type child_id = nodes_[id].child();
1177
34.1M
    if (child_id == 0) {
1178
55.6k
      break;
1179
55.6k
    }
1180
1181
34.0M
    uchar_type key_label = static_cast<uchar_type>(key[key_pos]);
1182
34.0M
    if (key_pos < length && key_label == '\0') {
1183
0
      DARTS_THROW("failed to insert key: invalid null character");
1184
0
    }
1185
1186
34.0M
    uchar_type unit_label = nodes_[child_id].label();
1187
34.0M
    if (key_label < unit_label) {
1188
0
      DARTS_THROW("failed to insert key: wrong key order");
1189
34.0M
    } else if (key_label > unit_label) {
1190
8.22M
      nodes_[child_id].set_has_sibling(true);
1191
8.22M
      flush(child_id);
1192
8.22M
      break;
1193
8.22M
    }
1194
25.8M
    id = child_id;
1195
25.8M
  }
1196
1197
8.28M
  if (key_pos > length) {
1198
0
    return;
1199
0
  }
1200
1201
43.4M
  for ( ; key_pos <= length; ++key_pos) {
1202
35.1M
    uchar_type key_label = static_cast<uchar_type>(
1203
35.1M
        (key_pos < length) ? key[key_pos] : '\0');
1204
35.1M
    id_type child_id = append_node();
1205
1206
35.1M
    if (nodes_[id].child() == 0) {
1207
26.9M
      nodes_[child_id].set_is_state(true);
1208
26.9M
    }
1209
35.1M
    nodes_[child_id].set_sibling(nodes_[id].child());
1210
35.1M
    nodes_[child_id].set_label(key_label);
1211
35.1M
    nodes_[id].set_child(child_id);
1212
35.1M
    node_stack_.push(child_id);
1213
1214
35.1M
    id = child_id;
1215
35.1M
  }
1216
8.28M
  nodes_[id].set_value(value);
1217
8.28M
}
1218
1219
111k
inline void DawgBuilder::clear() {
1220
111k
  nodes_.clear();
1221
111k
  units_.clear();
1222
111k
  labels_.clear();
1223
111k
  is_intersections_.clear();
1224
111k
  table_.clear();
1225
111k
  node_stack_.clear();
1226
111k
  recycle_bin_.clear();
1227
111k
  num_states_ = 0;
1228
111k
}
1229
1230
8.28M
inline void DawgBuilder::flush(id_type id) {
1231
35.2M
  while (node_stack_.top() != id) {
1232
26.9M
    id_type node_id = node_stack_.top();
1233
26.9M
    node_stack_.pop();
1234
1235
26.9M
    if (num_states_ >= table_.size() - (table_.size() >> 2)) {
1236
15.1k
      expand_table();
1237
15.1k
    }
1238
1239
26.9M
    id_type num_siblings = 0;
1240
62.1M
    for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1241
35.1M
      ++num_siblings;
1242
35.1M
    }
1243
1244
26.9M
    id_type hash_id;
1245
26.9M
    id_type match_id = find_node(node_id, &hash_id);
1246
26.9M
    if (match_id != 0) {
1247
0
      is_intersections_.set(match_id, true);
1248
26.9M
    } else {
1249
26.9M
      id_type unit_id = 0;
1250
62.1M
      for (id_type i = 0; i < num_siblings; ++i) {
1251
35.1M
        unit_id = append_unit();
1252
35.1M
      }
1253
62.1M
      for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1254
35.1M
        units_[unit_id] = nodes_[i].unit();
1255
35.1M
        labels_[unit_id] = nodes_[i].label();
1256
35.1M
        --unit_id;
1257
35.1M
      }
1258
26.9M
      match_id = unit_id + 1;
1259
26.9M
      table_[hash_id] = match_id;
1260
26.9M
      ++num_states_;
1261
26.9M
    }
1262
1263
62.1M
    for (id_type i = node_id, next; i != 0; i = next) {
1264
35.1M
      next = nodes_[i].sibling();
1265
35.1M
      free_node(i);
1266
35.1M
    }
1267
1268
26.9M
    nodes_[node_stack_.top()].set_child(match_id);
1269
26.9M
  }
1270
8.28M
  node_stack_.pop();
1271
8.28M
}
1272
1273
15.1k
inline void DawgBuilder::expand_table() {
1274
15.1k
  std::size_t table_size = table_.size() << 1;
1275
15.1k
  table_.clear();
1276
15.1k
  table_.resize(table_size, 0);
1277
1278
22.2M
  for (std::size_t i = 1; i < units_.size(); ++i) {
1279
22.1M
    id_type id = static_cast<id_type>(i);
1280
22.1M
    if (labels_[id] == '\0' || units_[id].is_state()) {
1281
17.7M
      id_type hash_id;
1282
17.7M
      find_unit(id, &hash_id);
1283
17.7M
      table_[hash_id] = id;
1284
17.7M
    }
1285
22.1M
  }
1286
15.1k
}
1287
1288
17.7M
inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const {
1289
17.7M
  *hash_id = hash_unit(id) % table_.size();
1290
23.0M
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1291
23.0M
    id_type unit_id = table_[*hash_id];
1292
23.0M
    if (unit_id == 0) {
1293
17.7M
      break;
1294
17.7M
    }
1295
1296
    // There must not be the same unit.
1297
23.0M
  }
1298
17.7M
  return 0;
1299
17.7M
}
1300
1301
inline id_type DawgBuilder::find_node(id_type node_id,
1302
26.9M
    id_type *hash_id) const {
1303
26.9M
  *hash_id = hash_node(node_id) % table_.size();
1304
62.7M
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1305
62.7M
    id_type unit_id = table_[*hash_id];
1306
62.7M
    if (unit_id == 0) {
1307
26.9M
      break;
1308
26.9M
    }
1309
1310
35.7M
    if (are_equal(node_id, unit_id)) {
1311
0
      return unit_id;
1312
0
    }
1313
35.7M
  }
1314
26.9M
  return 0;
1315
26.9M
}
1316
1317
35.7M
inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const {
1318
36.6M
  for (id_type i = nodes_[node_id].sibling(); i != 0;
1319
35.7M
      i = nodes_[i].sibling()) {
1320
4.48M
    if (units_[unit_id].has_sibling() == false) {
1321
3.62M
      return false;
1322
3.62M
    }
1323
855k
    ++unit_id;
1324
855k
  }
1325
32.1M
  if (units_[unit_id].has_sibling() == true) {
1326
3.70M
    return false;
1327
3.70M
  }
1328
1329
28.4M
  for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) {
1330
28.4M
    if (nodes_[i].unit() != units_[unit_id].unit() ||
1331
28.4M
        nodes_[i].label() != labels_[unit_id]) {
1332
28.4M
      return false;
1333
28.4M
    }
1334
28.4M
  }
1335
0
  return true;
1336
28.4M
}
1337
1338
17.7M
inline id_type DawgBuilder::hash_unit(id_type id) const {
1339
17.7M
  id_type hash_value = 0;
1340
22.1M
  for ( ; id != 0; ++id) {
1341
22.1M
    id_type unit = units_[id].unit();
1342
22.1M
    uchar_type label = labels_[id];
1343
22.1M
    hash_value ^= hash((label << 24) ^ unit);
1344
1345
22.1M
    if (units_[id].has_sibling() == false) {
1346
17.7M
      break;
1347
17.7M
    }
1348
22.1M
  }
1349
17.7M
  return hash_value;
1350
17.7M
}
1351
1352
26.9M
inline id_type DawgBuilder::hash_node(id_type id) const {
1353
26.9M
  id_type hash_value = 0;
1354
62.1M
  for ( ; id != 0; id = nodes_[id].sibling()) {
1355
35.1M
    id_type unit = nodes_[id].unit();
1356
35.1M
    uchar_type label = nodes_[id].label();
1357
35.1M
    hash_value ^= hash((label << 24) ^ unit);
1358
35.1M
  }
1359
26.9M
  return hash_value;
1360
26.9M
}
1361
1362
35.2M
inline id_type DawgBuilder::append_unit() {
1363
35.2M
  is_intersections_.append();
1364
35.2M
  units_.append();
1365
35.2M
  labels_.append();
1366
1367
35.2M
  return static_cast<id_type>(is_intersections_.size() - 1);
1368
35.2M
}
1369
1370
35.2M
inline id_type DawgBuilder::append_node() {
1371
35.2M
  id_type id;
1372
35.2M
  if (recycle_bin_.empty()) {
1373
3.57M
    id = static_cast<id_type>(nodes_.size());
1374
3.57M
    nodes_.append();
1375
31.6M
  } else {
1376
31.6M
    id = recycle_bin_.top();
1377
31.6M
    nodes_[id] = DawgNode();
1378
31.6M
    recycle_bin_.pop();
1379
31.6M
  }
1380
35.2M
  return id;
1381
35.2M
}
1382
1383
//
1384
// Unit of double-array builder.
1385
//
1386
1387
class DoubleArrayBuilderUnit {
1388
 public:
1389
49.8M
  DoubleArrayBuilderUnit() : unit_(0) {}
1390
1391
8.33M
  void set_has_leaf(bool has_leaf) {
1392
8.33M
    if (has_leaf) {
1393
8.33M
      unit_ |= 1U << 8;
1394
8.33M
    } else {
1395
0
      unit_ &= ~(1U << 8);
1396
0
    }
1397
8.33M
  }
1398
8.33M
  void set_value(value_type value) {
1399
8.33M
    unit_ = value | (1U << 31);
1400
8.33M
  }
1401
41.5M
  void set_label(uchar_type label) {
1402
41.5M
    unit_ = (unit_ & ~0xFFU) | label;
1403
41.5M
  }
1404
27.1M
  void set_offset(id_type offset) {
1405
27.1M
    if (offset >= 1U << 29) {
1406
0
      DARTS_THROW("failed to modify unit: too large offset");
1407
0
    }
1408
27.1M
    unit_ &= (1U << 31) | (1U << 8) | 0xFF;
1409
27.1M
    if (offset < 1U << 21) {
1410
27.1M
      unit_ |= (offset << 10);
1411
27.1M
    } else {
1412
0
      unit_ |= (offset << 2) | (1U << 9);
1413
0
    }
1414
27.1M
  }
1415
1416
 private:
1417
  id_type unit_;
1418
1419
  // Copyable.
1420
};
1421
1422
//
1423
// Extra unit of double-array builder.
1424
//
1425
1426
class DoubleArrayBuilderExtraUnit {
1427
 public:
1428
271M
  DoubleArrayBuilderExtraUnit() : prev_(0), next_(0),
1429
271M
      is_fixed_(false), is_used_(false) {}
1430
1431
100M
  void set_prev(id_type prev) {
1432
100M
    prev_ = prev;
1433
100M
  }
1434
100M
  void set_next(id_type next) {
1435
100M
    next_ = next;
1436
100M
  }
1437
52.2M
  void set_is_fixed(bool is_fixed) {
1438
52.2M
    is_fixed_ = is_fixed;
1439
52.2M
  }
1440
29.5M
  void set_is_used(bool is_used) {
1441
29.5M
    is_used_ = is_used;
1442
29.5M
  }
1443
1444
100M
  id_type prev() const {
1445
100M
    return prev_;
1446
100M
  }
1447
324M
  id_type next() const {
1448
324M
    return next_;
1449
324M
  }
1450
103M
  bool is_fixed() const {
1451
103M
    return is_fixed_;
1452
103M
  }
1453
233M
  bool is_used() const {
1454
233M
    return is_used_;
1455
233M
  }
1456
1457
 private:
1458
  id_type prev_;
1459
  id_type next_;
1460
  bool is_fixed_;
1461
  bool is_used_;
1462
1463
  // Copyable.
1464
};
1465
1466
//
1467
// DAWG -> double-array converter.
1468
//
1469
1470
class DoubleArrayBuilder {
1471
 public:
1472
  explicit DoubleArrayBuilder(progress_func_type progress_func)
1473
66.2k
      : progress_func_(progress_func), units_(), extras_(), labels_(),
1474
66.2k
        table_(), extras_head_(0) {}
1475
66.2k
  ~DoubleArrayBuilder() {
1476
66.2k
    clear();
1477
66.2k
  }
1478
1479
  template <typename T>
1480
  void build(const Keyset<T> &keyset);
1481
  void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const;
1482
1483
  void clear();
1484
1485
 private:
1486
  static constexpr int BLOCK_SIZE = 256;
1487
  static constexpr int NUM_EXTRA_BLOCKS = 16;
1488
  static constexpr int NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS;
1489
1490
  static constexpr int  UPPER_MASK = 0xFF << 21;
1491
  static constexpr int  LOWER_MASK = 0xFF;
1492
1493
  typedef DoubleArrayBuilderUnit unit_type;
1494
  typedef DoubleArrayBuilderExtraUnit extra_type;
1495
1496
  progress_func_type progress_func_;
1497
  AutoPool<unit_type> units_;
1498
  AutoArray<extra_type> extras_;
1499
  AutoPool<uchar_type> labels_;
1500
  AutoArray<id_type> table_;
1501
  id_type extras_head_;
1502
1503
  // Disallows copy and assignment.
1504
  DoubleArrayBuilder(const DoubleArrayBuilder &);
1505
  DoubleArrayBuilder &operator=(const DoubleArrayBuilder &);
1506
1507
328k
  std::size_t num_blocks() const {
1508
328k
    return units_.size() / BLOCK_SIZE;
1509
328k
  }
1510
1511
489M
  const extra_type &extras(id_type id) const {
1512
489M
    return extras_[id % NUM_EXTRAS];
1513
489M
  }
1514
554M
  extra_type &extras(id_type id) {
1515
554M
    return extras_[id % NUM_EXTRAS];
1516
554M
  }
1517
1518
  template <typename T>
1519
  void build_dawg(const Keyset<T> &keyset, DawgBuilder *dawg_builder);
1520
  void build_from_dawg(const DawgBuilder &dawg);
1521
  void build_from_dawg(const DawgBuilder &dawg,
1522
      id_type dawg_id, id_type dic_id);
1523
  id_type arrange_from_dawg(const DawgBuilder &dawg,
1524
      id_type dawg_id, id_type dic_id);
1525
1526
  template <typename T>
1527
  void build_from_keyset(const Keyset<T> &keyset);
1528
  template <typename T>
1529
  void build_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1530
      std::size_t end, std::size_t depth, id_type dic_id);
1531
  template <typename T>
1532
  id_type arrange_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1533
      std::size_t end, std::size_t depth, id_type dic_id);
1534
1535
  id_type find_valid_offset(id_type id) const;
1536
  bool is_valid_offset(id_type id, id_type offset) const;
1537
1538
  void reserve_id(id_type id);
1539
  void expand_units();
1540
1541
  void fix_all_blocks();
1542
  void fix_block(id_type block_id);
1543
};
1544
1545
template <typename T>
1546
66.2k
void DoubleArrayBuilder::build(const Keyset<T> &keyset) {
1547
66.2k
  if (keyset.has_values()) {
1548
55.6k
    Details::DawgBuilder dawg_builder;
1549
55.6k
    build_dawg(keyset, &dawg_builder);
1550
55.6k
    build_from_dawg(dawg_builder);
1551
55.6k
    dawg_builder.clear();
1552
55.6k
  } else {
1553
10.5k
    build_from_keyset(keyset);
1554
10.5k
  }
1555
66.2k
}
1556
1557
inline void DoubleArrayBuilder::copy(std::size_t *size_ptr,
1558
66.2k
    DoubleArrayUnit **buf_ptr) const {
1559
66.2k
  if (size_ptr != NULL) {
1560
66.2k
    *size_ptr = units_.size();
1561
66.2k
  }
1562
66.2k
  if (buf_ptr != NULL) {
1563
66.2k
    *buf_ptr = new DoubleArrayUnit[units_.size()];
1564
66.2k
    unit_type *units = reinterpret_cast<unit_type *>(*buf_ptr);
1565
49.9M
    for (std::size_t i = 0; i < units_.size(); ++i) {
1566
49.8M
      units[i] = units_[i];
1567
49.8M
    }
1568
66.2k
  }
1569
66.2k
}
1570
1571
66.2k
inline void DoubleArrayBuilder::clear() {
1572
66.2k
  units_.clear();
1573
66.2k
  extras_.clear();
1574
66.2k
  labels_.clear();
1575
66.2k
  table_.clear();
1576
66.2k
  extras_head_ = 0;
1577
66.2k
}
1578
1579
template <typename T>
1580
void DoubleArrayBuilder::build_dawg(const Keyset<T> &keyset,
1581
55.6k
    DawgBuilder *dawg_builder) {
1582
55.6k
  dawg_builder->init();
1583
8.33M
  for (std::size_t i = 0; i < keyset.num_keys(); ++i) {
1584
8.28M
    dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i));
1585
8.28M
    if (progress_func_ != NULL) {
1586
0
      progress_func_(i + 1, keyset.num_keys() + 1);
1587
0
    }
1588
8.28M
  }
1589
55.6k
  dawg_builder->finish();
1590
55.6k
}
1591
1592
55.6k
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) {
1593
55.6k
  std::size_t num_units = 1;
1594
529k
  while (num_units < dawg.size()) {
1595
473k
    num_units <<= 1;
1596
473k
  }
1597
55.6k
  units_.reserve(num_units);
1598
1599
55.6k
  table_.reset(new id_type[dawg.num_intersections()]);
1600
55.6k
  for (std::size_t i = 0; i < dawg.num_intersections(); ++i) {
1601
0
    table_[i] = 0;
1602
0
  }
1603
1604
55.6k
  extras_.reset(new extra_type[NUM_EXTRAS]);
1605
1606
55.6k
  reserve_id(0);
1607
55.6k
  extras(0).set_is_used(true);
1608
55.6k
  units_[0].set_offset(1);
1609
55.6k
  units_[0].set_label('\0');
1610
1611
55.6k
  if (dawg.child(dawg.root()) != 0) {
1612
55.6k
    build_from_dawg(dawg, dawg.root(), 0);
1613
55.6k
  }
1614
1615
55.6k
  fix_all_blocks();
1616
1617
55.6k
  extras_.clear();
1618
55.6k
  labels_.clear();
1619
55.6k
  table_.clear();
1620
55.6k
}
1621
1622
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg,
1623
26.9M
    id_type dawg_id, id_type dic_id) {
1624
26.9M
  id_type dawg_child_id = dawg.child(dawg_id);
1625
26.9M
  if (dawg.is_intersection(dawg_child_id)) {
1626
0
    id_type intersection_id = dawg.intersection_id(dawg_child_id);
1627
0
    id_type offset = table_[intersection_id];
1628
0
    if (offset != 0) {
1629
0
      offset ^= dic_id;
1630
0
      if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) {
1631
0
        if (dawg.is_leaf(dawg_child_id)) {
1632
0
          units_[dic_id].set_has_leaf(true);
1633
0
        }
1634
0
        units_[dic_id].set_offset(offset);
1635
0
        return;
1636
0
      }
1637
0
    }
1638
0
  }
1639
1640
26.9M
  id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id);
1641
26.9M
  if (dawg.is_intersection(dawg_child_id)) {
1642
0
    table_[dawg.intersection_id(dawg_child_id)] = offset;
1643
0
  }
1644
1645
35.1M
  do {
1646
35.1M
    uchar_type child_label = dawg.label(dawg_child_id);
1647
35.1M
    id_type dic_child_id = offset ^ child_label;
1648
35.1M
    if (child_label != '\0') {
1649
26.9M
      build_from_dawg(dawg, dawg_child_id, dic_child_id);
1650
26.9M
    }
1651
35.1M
    dawg_child_id = dawg.sibling(dawg_child_id);
1652
35.1M
  } while (dawg_child_id != 0);
1653
26.9M
}
1654
1655
inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg,
1656
26.9M
    id_type dawg_id, id_type dic_id) {
1657
26.9M
  labels_.resize(0);
1658
1659
26.9M
  id_type dawg_child_id = dawg.child(dawg_id);
1660
62.1M
  while (dawg_child_id != 0) {
1661
35.1M
    labels_.append(dawg.label(dawg_child_id));
1662
35.1M
    dawg_child_id = dawg.sibling(dawg_child_id);
1663
35.1M
  }
1664
1665
26.9M
  id_type offset = find_valid_offset(dic_id);
1666
26.9M
  units_[dic_id].set_offset(dic_id ^ offset);
1667
1668
26.9M
  dawg_child_id = dawg.child(dawg_id);
1669
62.1M
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1670
35.1M
    id_type dic_child_id = offset ^ labels_[i];
1671
35.1M
    reserve_id(dic_child_id);
1672
1673
35.1M
    if (dawg.is_leaf(dawg_child_id)) {
1674
8.28M
      units_[dic_id].set_has_leaf(true);
1675
8.28M
      units_[dic_child_id].set_value(dawg.value(dawg_child_id));
1676
26.9M
    } else {
1677
26.9M
      units_[dic_child_id].set_label(labels_[i]);
1678
26.9M
    }
1679
1680
35.1M
    dawg_child_id = dawg.sibling(dawg_child_id);
1681
35.1M
  }
1682
26.9M
  extras(offset).set_is_used(true);
1683
1684
26.9M
  return offset;
1685
26.9M
}
1686
1687
template <typename T>
1688
10.5k
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset) {
1689
10.5k
  std::size_t num_units = 1;
1690
31.3k
  while (num_units < keyset.num_keys()) {
1691
20.8k
    num_units <<= 1;
1692
20.8k
  }
1693
10.5k
  units_.reserve(num_units);
1694
1695
10.5k
  extras_.reset(new extra_type[NUM_EXTRAS]);
1696
1697
10.5k
  reserve_id(0);
1698
10.5k
  extras(0).set_is_used(true);
1699
10.5k
  units_[0].set_offset(1);
1700
10.5k
  units_[0].set_label('\0');
1701
1702
10.5k
  if (keyset.num_keys() > 0) {
1703
10.5k
    build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0);
1704
10.5k
  }
1705
1706
10.5k
  fix_all_blocks();
1707
1708
10.5k
  extras_.clear();
1709
10.5k
  labels_.clear();
1710
10.5k
}
1711
1712
template <typename T>
1713
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset,
1714
150k
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1715
150k
  id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id);
1716
1717
199k
  while (begin < end) {
1718
150k
    if (keyset.keys(begin, depth) != '\0') {
1719
101k
      break;
1720
101k
    }
1721
48.8k
    ++begin;
1722
48.8k
  }
1723
150k
  if (begin == end) {
1724
48.8k
    return;
1725
48.8k
  }
1726
1727
101k
  std::size_t last_begin = begin;
1728
101k
  uchar_type last_label = keyset.keys(begin, depth);
1729
230k
  while (++begin < end) {
1730
128k
    uchar_type label = keyset.keys(begin, depth);
1731
128k
    if (label != last_label) {
1732
38.2k
      build_from_keyset(keyset, last_begin, begin,
1733
38.2k
          depth + 1, offset ^ last_label);
1734
38.2k
      last_begin = begin;
1735
38.2k
      last_label = keyset.keys(begin, depth);
1736
38.2k
    }
1737
128k
  }
1738
101k
  build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label);
1739
101k
}
1740
1741
template <typename T>
1742
id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset<T> &keyset,
1743
150k
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1744
150k
  labels_.resize(0);
1745
1746
150k
  value_type value = -1;
1747
430k
  for (std::size_t i = begin; i < end; ++i) {
1748
279k
    uchar_type label = keyset.keys(i, depth);
1749
279k
    if (label == '\0') {
1750
48.8k
      if (keyset.has_lengths() && depth < keyset.lengths(i)) {
1751
0
        DARTS_THROW("failed to build double-array: "
1752
0
            "invalid null character");
1753
48.8k
      } else if (keyset.values(i) < 0) {
1754
0
        DARTS_THROW("failed to build double-array: negative value");
1755
0
      }
1756
1757
48.8k
      if (value == -1) {
1758
48.8k
        value = keyset.values(i);
1759
48.8k
      }
1760
48.8k
      if (progress_func_ != NULL) {
1761
0
        progress_func_(i + 1, keyset.num_keys() + 1);
1762
0
      }
1763
48.8k
    }
1764
1765
279k
    if (labels_.empty()) {
1766
150k
      labels_.append(label);
1767
150k
    } else if (label != labels_[labels_.size() - 1]) {
1768
38.2k
      if (label < labels_[labels_.size() - 1]) {
1769
0
        DARTS_THROW("failed to build double-array: wrong key order");
1770
0
      }
1771
38.2k
      labels_.append(label);
1772
38.2k
    }
1773
279k
  }
1774
1775
150k
  id_type offset = find_valid_offset(dic_id);
1776
150k
  units_[dic_id].set_offset(dic_id ^ offset);
1777
1778
339k
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1779
189k
    id_type dic_child_id = offset ^ labels_[i];
1780
189k
    reserve_id(dic_child_id);
1781
189k
    if (labels_[i] == '\0') {
1782
48.8k
      units_[dic_id].set_has_leaf(true);
1783
48.8k
      units_[dic_child_id].set_value(value);
1784
140k
    } else {
1785
140k
      units_[dic_child_id].set_label(labels_[i]);
1786
140k
    }
1787
189k
  }
1788
150k
  extras(offset).set_is_used(true);
1789
1790
150k
  return offset;
1791
150k
}
1792
1793
27.1M
inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const {
1794
27.1M
  if (extras_head_ >= units_.size()) {
1795
86
    return units_.size() | (id & LOWER_MASK);
1796
86
  }
1797
1798
27.1M
  id_type unfixed_id = extras_head_;
1799
231M
  do {
1800
231M
    id_type offset = unfixed_id ^ labels_[0];
1801
231M
    if (is_valid_offset(id, offset)) {
1802
26.9M
      return offset;
1803
26.9M
    }
1804
204M
    unfixed_id = extras(unfixed_id).next();
1805
204M
  } while (unfixed_id != extras_head_);
1806
1807
128k
  return units_.size() | (id & LOWER_MASK);
1808
27.1M
}
1809
1810
inline bool DoubleArrayBuilder::is_valid_offset(id_type id,
1811
231M
    id_type offset) const {
1812
231M
  if (extras(offset).is_used()) {
1813
175M
    return false;
1814
175M
  }
1815
1816
55.4M
  id_type rel_offset = id ^ offset;
1817
55.4M
  if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) {
1818
0
    return false;
1819
0
  }
1820
1821
81.0M
  for (std::size_t i = 1; i < labels_.size(); ++i) {
1822
54.0M
    if (extras(offset ^ labels_[i]).is_fixed()) {
1823
28.5M
      return false;
1824
28.5M
    }
1825
54.0M
  }
1826
1827
26.9M
  return true;
1828
55.4M
}
1829
1830
49.8M
inline void DoubleArrayBuilder::reserve_id(id_type id) {
1831
49.8M
  if (id >= units_.size()) {
1832
194k
    expand_units();
1833
194k
  }
1834
1835
49.8M
  if (id == extras_head_) {
1836
20.3M
    extras_head_ = extras(id).next();
1837
20.3M
    if (extras_head_ == id) {
1838
66.3k
      extras_head_ = units_.size();
1839
66.3k
    }
1840
20.3M
  }
1841
49.8M
  extras(extras(id).prev()).set_next(extras(id).next());
1842
49.8M
  extras(extras(id).next()).set_prev(extras(id).prev());
1843
49.8M
  extras(id).set_is_fixed(true);
1844
49.8M
}
1845
1846
194k
inline void DoubleArrayBuilder::expand_units() {
1847
194k
  id_type src_num_units = units_.size();
1848
194k
  id_type src_num_blocks = num_blocks();
1849
1850
194k
  id_type dest_num_units = src_num_units + BLOCK_SIZE;
1851
194k
  id_type dest_num_blocks = src_num_blocks + 1;
1852
1853
194k
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1854
9.29k
    fix_block(src_num_blocks - NUM_EXTRA_BLOCKS);
1855
9.29k
  }
1856
1857
194k
  units_.resize(dest_num_units);
1858
1859
194k
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1860
2.38M
    for (std::size_t id = src_num_units; id < dest_num_units; ++id) {
1861
2.37M
      extras(id).set_is_used(false);
1862
2.37M
      extras(id).set_is_fixed(false);
1863
2.37M
    }
1864
9.29k
  }
1865
1866
49.8M
  for (id_type i = src_num_units + 1; i < dest_num_units; ++i) {
1867
49.6M
    extras(i - 1).set_next(i);
1868
49.6M
    extras(i).set_prev(i - 1);
1869
49.6M
  }
1870
1871
194k
  extras(src_num_units).set_prev(dest_num_units - 1);
1872
194k
  extras(dest_num_units - 1).set_next(src_num_units);
1873
1874
194k
  extras(src_num_units).set_prev(extras(extras_head_).prev());
1875
194k
  extras(dest_num_units - 1).set_next(extras_head_);
1876
1877
194k
  extras(extras(extras_head_).prev()).set_next(src_num_units);
1878
194k
  extras(extras_head_).set_prev(dest_num_units - 1);
1879
194k
}
1880
1881
66.2k
inline void DoubleArrayBuilder::fix_all_blocks() {
1882
66.2k
  id_type begin = 0;
1883
66.2k
  if (num_blocks() > NUM_EXTRA_BLOCKS) {
1884
735
    begin = num_blocks() - NUM_EXTRA_BLOCKS;
1885
735
  }
1886
66.2k
  id_type end = num_blocks();
1887
1888
251k
  for (id_type block_id = begin; block_id != end; ++block_id) {
1889
185k
    fix_block(block_id);
1890
185k
  }
1891
66.2k
}
1892
1893
194k
inline void DoubleArrayBuilder::fix_block(id_type block_id) {
1894
194k
  id_type begin = block_id * BLOCK_SIZE;
1895
194k
  id_type end = begin + BLOCK_SIZE;
1896
1897
194k
  id_type unused_offset = 0;
1898
2.43M
  for (id_type offset = begin; offset != end; ++offset) {
1899
2.43M
    if (!extras(offset).is_used()) {
1900
194k
      unused_offset = offset;
1901
194k
      break;
1902
194k
    }
1903
2.43M
  }
1904
1905
50.0M
  for (id_type id = begin; id != end; ++id) {
1906
49.8M
    if (!extras(id).is_fixed()) {
1907
14.4M
      reserve_id(id);
1908
14.4M
      units_[id].set_label(static_cast<uchar_type>(id ^ unused_offset));
1909
14.4M
    }
1910
49.8M
  }
1911
194k
}
1912
1913
}  // namespace Details
1914
1915
//
1916
// Member function build() of DoubleArrayImpl.
1917
//
1918
1919
template <typename A, typename B, typename T, typename C>
1920
int DoubleArrayImpl<A, B, T, C>::build(std::size_t num_keys,
1921
    const key_type * const *keys, const std::size_t *lengths,
1922
66.2k
    const value_type *values, Details::progress_func_type progress_func) {
1923
66.2k
  Details::Keyset<value_type> keyset(num_keys, keys, lengths, values);
1924
1925
66.2k
  Details::DoubleArrayBuilder builder(progress_func);
1926
66.2k
  builder.build(keyset);
1927
1928
66.2k
  std::size_t size = 0;
1929
66.2k
  unit_type *buf = NULL;
1930
66.2k
  builder.copy(&size, &buf);
1931
1932
66.2k
  clear();
1933
1934
66.2k
  size_ = size;
1935
66.2k
  array_ = buf;
1936
66.2k
  buf_ = buf;
1937
1938
66.2k
  if (progress_func != NULL) {
1939
0
    progress_func(num_keys + 1, num_keys + 1);
1940
0
  }
1941
1942
66.2k
  return 0;
1943
66.2k
}
1944
1945
}  // namespace Darts
1946
1947
#undef DARTS_INT_TO_STR
1948
#undef DARTS_LINE_TO_STR
1949
#undef DARTS_LINE_STR
1950
#undef DARTS_THROW
1951
1952
#endif  // DARTS_H_