Coverage Report

Created: 2025-08-08 06:25

/src/sentencepiece/third_party/darts_clone/darts.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef DARTS_H_
2
#define DARTS_H_
3
4
#include <cstdio>
5
#include <exception>
6
#include <new>
7
8
#define DARTS_VERSION "0.32"
9
10
// DARTS_THROW() throws a <Darts::Exception> whose message starts with the
11
// file name and the line number. For example, DARTS_THROW("error message") at
12
// line 123 of "darts.h" throws a <Darts::Exception> which has a pointer to
13
// "darts.h:123: exception: error message". The message is available by using
14
// what() as well as that of <std::exception>.
15
#define DARTS_INT_TO_STR(value) #value
16
#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line)
17
#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__)
18
0
#define DARTS_THROW(msg) throw Darts::Details::Exception( \
19
0
  __FILE__ ":" DARTS_LINE_STR ": exception: " msg)
20
21
namespace Darts {
22
23
// The following namespace hides the internal types and classes.
24
namespace Details {
25
26
// This header assumes that <int> and <unsigned int> are 32-bit integer types.
27
//
28
// Darts-clone keeps values associated with keys. The type of the values is
29
// <value_type>. Note that the values must be positive integers because the
30
// most significant bit (MSB) of each value is used to represent whether the
31
// corresponding unit is a leaf or not. Also, the keys are represented by
32
// sequences of <char_type>s. <uchar_type> is the unsigned type of <char_type>.
33
typedef char char_type;
34
typedef unsigned char uchar_type;
35
typedef int value_type;
36
37
// The main structure of Darts-clone is an array of <DoubleArrayUnit>s, and the
38
// unit type is actually a wrapper of <id_type>.
39
typedef unsigned int id_type;
40
41
// <progress_func_type> is the type of callback functions for reporting the
42
// progress of building a dictionary. See also build() of <DoubleArray>.
43
// The 1st argument receives the progress value and the 2nd argument receives
44
// the maximum progress value. A usage example is to show the progress
45
// percentage, 100.0 * (the 1st argument) / (the 2nd argument).
46
typedef int (*progress_func_type)(std::size_t, std::size_t);
47
48
// <DoubleArrayUnit> is the type of double-array units and it is a wrapper of
49
// <id_type> in practice.
50
class DoubleArrayUnit {
51
 public:
52
0
  DoubleArrayUnit() : unit_() {}
53
54
  // has_leaf() returns whether a leaf unit is immediately derived from the
55
  // unit (true) or not (false).
56
0
  bool has_leaf() const {
57
0
    return ((unit_ >> 8) & 1) == 1;
58
0
  }
59
  // value() returns the value stored in the unit, and thus value() is
60
  // available when and only when the unit is a leaf unit.
61
0
  value_type value() const {
62
0
    return static_cast<value_type>(unit_ & ((1U << 31) - 1));
63
0
  }
64
65
  // label() returns the label associted with the unit. Note that a leaf unit
66
  // always returns an invalid label. For this feature, leaf unit's label()
67
  // returns an <id_type> that has the MSB of 1.
68
0
  id_type label() const {
69
0
    return unit_ & ((1U << 31) | 0xFF);
70
0
  }
71
  // offset() returns the offset from the unit to its derived units.
72
0
  id_type offset() const {
73
0
    return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6);
74
0
  }
75
76
 private:
77
  id_type unit_;
78
79
  // Copyable.
80
};
81
82
// Darts-clone throws an <Exception> for memory allocation failure, invalid
83
// arguments or a too large offset. The last case means that there are too many
84
// keys in the given set of keys. Note that the `msg' of <Exception> must be a
85
// constant or static string because an <Exception> keeps only a pointer to
86
// that string.
87
class Exception : public std::exception {
88
 public:
89
0
  explicit Exception(const char *msg = NULL) throw() : msg_(msg) {}
90
0
  Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {}
91
0
  virtual ~Exception() throw() {}
92
93
  // <Exception> overrides what() of <std::exception>.
94
0
  virtual const char *what() const throw() {
95
0
    return (msg_ != NULL) ? msg_ : "";
96
0
  }
97
98
 private:
99
  const char *msg_;
100
101
  // Disallows operator=.
102
  Exception &operator=(const Exception &);
103
};
104
105
}  // namespace Details
106
107
// <DoubleArrayImpl> is the interface of Darts-clone. Note that other
108
// classes should not be accessed from outside.
109
//
110
// <DoubleArrayImpl> has 4 template arguments but only the 3rd one is used as
111
// the type of values. Note that the given <T> is used only from outside, and
112
// the internal value type is not changed from <Darts::Details::value_type>.
113
// In build(), given values are casted from <T> to <Darts::Details::value_type>
114
// by using static_cast. On the other hand, values are casted from
115
// <Darts::Details::value_type> to <T> in searching dictionaries.
116
template <typename, typename, typename T, typename>
117
class DoubleArrayImpl {
118
 public:
119
  // Even if this <value_type> is changed, the internal value type is still
120
  // <Darts::Details::value_type>. Other types, such as 64-bit integer types
121
  // and floating-point number types, should not be used.
122
  typedef T value_type;
123
  // A key is reprenseted by a sequence of <key_type>s. For example,
124
  // exactMatchSearch() takes a <const key_type *>.
125
  typedef Details::char_type key_type;
126
  // In searching dictionaries, the values associated with the matched keys are
127
  // stored into or returned as <result_type>s.
128
  typedef value_type result_type;
129
130
  // <result_pair_type> enables applications to get the lengths of the matched
131
  // keys in addition to the values.
132
  struct result_pair_type {
133
    value_type value;
134
    std::size_t length;
135
  };
136
137
  // The constructor initializes member variables with 0 and NULLs.
138
0
  DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {}
139
  // The destructor frees memory allocated for units and then initializes
140
  // member variables with 0 and NULLs.
141
0
  virtual ~DoubleArrayImpl() {
142
0
    clear();
143
0
  }
144
145
  // <DoubleArrayImpl> has 2 kinds of set_result()s. The 1st set_result() is to
146
  // set a value to a <value_type>. The 2nd set_result() is to set a value and
147
  // a length to a <result_pair_type>. By using set_result()s, search methods
148
  // can return the 2 kinds of results in the same way.
149
  // Why the set_result()s are non-static? It is for compatibility.
150
  //
151
  // The 1st set_result() takes a length as the 3rd argument but it is not
152
  // used. If a compiler does a good job, codes for getting the length may be
153
  // removed.
154
0
  void set_result(value_type *result, value_type value, std::size_t) const {
155
0
    *result = value;
156
0
  }
157
  // The 2nd set_result() uses both `value' and `length'.
158
  void set_result(result_pair_type *result,
159
0
      value_type value, std::size_t length) const {
160
0
    result->value = value;
161
0
    result->length = length;
162
0
  }
163
164
  // set_array() calls clear() in order to free memory allocated to the old
165
  // array and then sets a new array. This function is useful to set a memory-
166
  // mapped array. Note that the array set by set_array() is not freed in
167
  // clear() and the destructor of <DoubleArrayImpl>.
168
  // set_array() can also set the size of the new array but the size is not
169
  // used in search methods. So it works well even if the 2nd argument is 0 or
170
  // omitted. Remember that size() and total_size() returns 0 in such a case.
171
0
  void set_array(const void *ptr, std::size_t size = 0) {
172
0
    clear();
173
0
    array_ = static_cast<const unit_type *>(ptr);
174
0
    size_ = size;
175
0
  }
176
  // array() returns a pointer to the array of units.
177
  const void *array() const {
178
    return array_;
179
  }
180
181
  // clear() frees memory allocated to units and then initializes member
182
  // variables with 0 and NULLs. Note that clear() does not free memory if the
183
  // array of units was set by set_array(). In such a case, `array_' is not
184
  // NULL and `buf_' is NULL.
185
0
  void clear() {
186
0
    size_ = 0;
187
0
    array_ = NULL;
188
0
    if (buf_ != NULL) {
189
0
      delete[] buf_;
190
0
      buf_ = NULL;
191
0
    }
192
0
  }
193
194
  // unit_size() returns the size of each unit. The size must be 4 bytes.
195
0
  std::size_t unit_size() const {
196
0
    return sizeof(unit_type);
197
0
  }
198
  // size() returns the number of units. It can be 0 if set_array() is used.
199
  std::size_t size() const {
200
    return size_;
201
  }
202
  // total_size() returns the number of bytes allocated to the array of units.
203
  // It can be 0 if set_array() is used.
204
  std::size_t total_size() const {
205
    return unit_size() * size();
206
  }
207
  // nonzero_size() exists for compatibility. It always returns the number of
208
  // units because it takes long time to count the number of non-zero units.
209
  std::size_t nonzero_size() const {
210
    return size();
211
  }
212
213
  // build() constructs a dictionary from given key-value pairs. If `lengths'
214
  // is NULL, `keys' is handled as an array of zero-terminated strings. If
215
  // `values' is NULL, the index in `keys' is associated with each key, i.e.
216
  // the ith key has (i - 1) as its value.
217
  // Note that the key-value pairs must be arranged in key order and the values
218
  // must not be negative. Also, if there are duplicate keys, only the first
219
  // pair will be stored in the resultant dictionary.
220
  // `progress_func' is a pointer to a callback function. If it is not NULL,
221
  // it will be called in build() so that the caller can check the progress of
222
  // dictionary construction. For details, please see the definition of
223
  // <Darts::Details::progress_func_type>.
224
  // The return value of build() is 0, and it indicates the success of the
225
  // operation. Otherwise, build() throws a <Darts::Exception>, which is a
226
  // derived class of <std::exception>.
227
  // build() uses another construction algorithm if `values' is not NULL. In
228
  // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead
229
  // of a trie because a DAWG is likely to be more compact than a trie.
230
  int build(std::size_t num_keys, const key_type * const *keys,
231
      const std::size_t *lengths = NULL, const value_type *values = NULL,
232
      Details::progress_func_type progress_func = NULL);
233
234
  // open() reads an array of units from the specified file. And if it goes
235
  // well, the old array will be freed and replaced with the new array read
236
  // from the file. `offset' specifies the number of bytes to be skipped before
237
  // reading an array. `size' specifies the number of bytes to be read from the
238
  // file. If the `size' is 0, the whole file will be read.
239
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
240
  // non-zero value or throws a <Darts::Exception>. The exception is thrown
241
  // when and only when a memory allocation fails.
242
  int open(const char *file_name, const char *mode = "rb",
243
      std::size_t offset = 0, std::size_t size = 0);
244
  // save() writes the array of units into the specified file. `offset'
245
  // specifies the number of bytes to be skipped before writing the array.
246
  // open() returns 0 iff the operation succeeds. Otherwise, it returns a
247
  // non-zero value.
248
  int save(const char *file_name, const char *mode = "wb",
249
      std::size_t offset = 0) const;
250
251
  // The 1st exactMatchSearch() tests whether the given key exists or not, and
252
  // if it exists, its value and length are set to `result'. Otherwise, the
253
  // value and the length of `result' are set to -1 and 0 respectively.
254
  // Note that if `length' is 0, `key' is handled as a zero-terminated string.
255
  // `node_pos' specifies the start position of matching. This argument enables
256
  // the combination of exactMatchSearch() and traverse(). For example, if you
257
  // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get
258
  // the node position corresponding to "xyz" and then you can use
259
  // exactMatchSearch() to test "A", "BC", and "DE" from that position.
260
  // Note that the length of `result' indicates the length from the `node_pos'.
261
  // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }.
262
  template <class U>
263
  void exactMatchSearch(const key_type *key, U &result,
264
0
      std::size_t length = 0, std::size_t node_pos = 0) const {
265
0
    result = exactMatchSearch<U>(key, length, node_pos);
266
0
  }
267
  // The 2nd exactMatchSearch() returns a result instead of updating the 2nd
268
  // argument. So, the following exactMatchSearch() has only 3 arguments.
269
  template <class U>
270
  inline U exactMatchSearch(const key_type *key, std::size_t length = 0,
271
      std::size_t node_pos = 0) const;
272
273
  // commonPrefixSearch() searches for keys which match a prefix of the given
274
  // string. If `length' is 0, `key' is handled as a zero-terminated string.
275
  // The values and the lengths of at most `max_num_results' matched keys are
276
  // stored in `results'. commonPrefixSearch() returns the number of matched
277
  // keys. Note that the return value can be larger than `max_num_results' if
278
  // there are more than `max_num_results' matches. If you want to get all the
279
  // results, allocate more spaces and call commonPrefixSearch() again.
280
  // `node_pos' works as well as in exactMatchSearch().
281
  template <class U>
282
  inline std::size_t commonPrefixSearch(const key_type *key, U *results,
283
      std::size_t max_num_results, std::size_t length = 0,
284
      std::size_t node_pos = 0) const;
285
286
  // In Darts-clone, a dictionary is a deterministic finite-state automaton
287
  // (DFA) and traverse() tests transitions on the DFA. The initial state is
288
  // `node_pos' and traverse() chooses transitions labeled key[key_pos],
289
  // key[key_pos + 1], ... in order. If there is not a transition labeled
290
  // key[key_pos + i], traverse() terminates the transitions at that state and
291
  // returns -2. Otherwise, traverse() ends without a termination and returns
292
  // -1 or a nonnegative value, -1 indicates that the final state was not an
293
  // accept state. When a nonnegative value is returned, it is the value
294
  // associated with the final accept state. That is, traverse() returns the
295
  // value associated with the given key if it exists. Note that traverse()
296
  // updates `node_pos' and `key_pos' after each transition.
297
  inline value_type traverse(const key_type *key, std::size_t &node_pos,
298
      std::size_t &key_pos, std::size_t length = 0) const;
299
300
 private:
301
  typedef Details::uchar_type uchar_type;
302
  typedef Details::id_type id_type;
303
  typedef Details::DoubleArrayUnit unit_type;
304
305
  std::size_t size_;
306
  const unit_type *array_;
307
  unit_type *buf_;
308
309
  // Disallows copy and assignment.
310
  DoubleArrayImpl(const DoubleArrayImpl &);
311
  DoubleArrayImpl &operator=(const DoubleArrayImpl &);
312
};
313
314
// <DoubleArray> is the typical instance of <DoubleArrayImpl>. It uses <int>
315
// as the type of values and it is suitable for most cases.
316
typedef DoubleArrayImpl<void, void, int, void> DoubleArray;
317
318
// The interface section ends here. For using Darts-clone, there is no need
319
// to read the remaining section, which gives the implementation of
320
// Darts-clone.
321
322
//
323
// Member functions of DoubleArrayImpl (except build()).
324
//
325
326
template <typename A, typename B, typename T, typename C>
327
int DoubleArrayImpl<A, B, T, C>::open(const char *file_name,
328
    const char *mode, std::size_t offset, std::size_t size) {
329
#ifdef _MSC_VER
330
  std::FILE *file;
331
  if (::fopen_s(&file, file_name, mode) != 0) {
332
    return -1;
333
  }
334
#else
335
  std::FILE *file = std::fopen(file_name, mode);
336
  if (file == NULL) {
337
    return -1;
338
  }
339
#endif
340
341
  if (size == 0) {
342
    if (std::fseek(file, 0, SEEK_END) != 0) {
343
      std::fclose(file);
344
      return -1;
345
    }
346
    size = std::ftell(file) - offset;
347
  }
348
349
  size /= unit_size();
350
  if (size < 256 || (size & 0xFF) != 0) {
351
    std::fclose(file);
352
    return -1;
353
  }
354
355
  if (std::fseek(file, offset, SEEK_SET) != 0) {
356
    std::fclose(file);
357
    return -1;
358
  }
359
360
  unit_type units[256];
361
  if (std::fread(units, unit_size(), 256, file) != 256) {
362
    std::fclose(file);
363
    return -1;
364
  }
365
366
  if (units[0].label() != '\0' || units[0].has_leaf() ||
367
      units[0].offset() == 0 || units[0].offset() >= 512) {
368
    std::fclose(file);
369
    return -1;
370
  }
371
  for (id_type i = 1; i < 256; ++i) {
372
    if (units[i].label() <= 0xFF && units[i].offset() >= size) {
373
      std::fclose(file);
374
      return -1;
375
    }
376
  }
377
378
  unit_type *buf;
379
  try {
380
    buf = new unit_type[size];
381
    for (id_type i = 0; i < 256; ++i) {
382
      buf[i] = units[i];
383
    }
384
  } catch (const std::bad_alloc &) {
385
    std::fclose(file);
386
    DARTS_THROW("failed to open double-array: std::bad_alloc");
387
  }
388
389
  if (size > 256) {
390
    if (std::fread(buf + 256, unit_size(), size - 256, file) != size - 256) {
391
      std::fclose(file);
392
      delete[] buf;
393
      return -1;
394
    }
395
  }
396
  std::fclose(file);
397
398
  clear();
399
400
  size_ = size;
401
  array_ = buf;
402
  buf_ = buf;
403
  return 0;
404
}
405
406
template <typename A, typename B, typename T, typename C>
407
int DoubleArrayImpl<A, B, T, C>::save(const char *file_name,
408
    const char *mode, std::size_t) const {
409
  if (size() == 0) {
410
    return -1;
411
  }
412
413
#ifdef _MSC_VER
414
  std::FILE *file;
415
  if (::fopen_s(&file, file_name, mode) != 0) {
416
    return -1;
417
  }
418
#else
419
  std::FILE *file = std::fopen(file_name, mode);
420
  if (file == NULL) {
421
    return -1;
422
  }
423
#endif
424
425
  if (std::fwrite(array_, unit_size(), size(), file) != size()) {
426
    std::fclose(file);
427
    return -1;
428
  }
429
  std::fclose(file);
430
  return 0;
431
}
432
433
template <typename A, typename B, typename T, typename C>
434
template <typename U>
435
inline U DoubleArrayImpl<A, B, T, C>::exactMatchSearch(const key_type *key,
436
0
    std::size_t length, std::size_t node_pos) const {
437
0
  U result;
438
0
  set_result(&result, static_cast<value_type>(-1), 0);
439
440
0
  unit_type unit = array_[node_pos];
441
0
  if (length != 0) {
442
0
    for (std::size_t i = 0; i < length; ++i) {
443
0
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[i]);
444
0
      unit = array_[node_pos];
445
0
      if (unit.label() != static_cast<uchar_type>(key[i])) {
446
0
        return result;
447
0
      }
448
0
    }
449
0
  } else {
450
0
    for ( ; key[length] != '\0'; ++length) {
451
0
      node_pos ^= unit.offset() ^ static_cast<uchar_type>(key[length]);
452
0
      unit = array_[node_pos];
453
0
      if (unit.label() != static_cast<uchar_type>(key[length])) {
454
0
        return result;
455
0
      }
456
0
    }
457
0
  }
458
459
0
  if (!unit.has_leaf()) {
460
0
    return result;
461
0
  }
462
0
  unit = array_[node_pos ^ unit.offset()];
463
0
  set_result(&result, static_cast<value_type>(unit.value()), length);
464
0
  return result;
465
0
}
466
467
template <typename A, typename B, typename T, typename C>
468
template <typename U>
469
inline std::size_t DoubleArrayImpl<A, B, T, C>::commonPrefixSearch(
470
    const key_type *key, U *results, std::size_t max_num_results,
471
0
    std::size_t length, std::size_t node_pos) const {
472
0
  std::size_t num_results = 0;
473
474
0
  unit_type unit = array_[node_pos];
475
0
  node_pos ^= unit.offset();
476
0
  if (length != 0) {
477
0
    for (std::size_t i = 0; i < length; ++i) {
478
0
      node_pos ^= static_cast<uchar_type>(key[i]);
479
0
      unit = array_[node_pos];
480
0
      if (unit.label() != static_cast<uchar_type>(key[i])) {
481
0
        return num_results;
482
0
      }
483
484
0
      node_pos ^= unit.offset();
485
0
      if (unit.has_leaf()) {
486
0
        if (num_results < max_num_results) {
487
0
          set_result(&results[num_results], static_cast<value_type>(
488
0
              array_[node_pos].value()), i + 1);
489
0
        }
490
0
        ++num_results;
491
0
      }
492
0
    }
493
0
  } else {
494
0
    for ( ; key[length] != '\0'; ++length) {
495
0
      node_pos ^= static_cast<uchar_type>(key[length]);
496
0
      unit = array_[node_pos];
497
0
      if (unit.label() != static_cast<uchar_type>(key[length])) {
498
0
        return num_results;
499
0
      }
500
501
0
      node_pos ^= unit.offset();
502
0
      if (unit.has_leaf()) {
503
0
        if (num_results < max_num_results) {
504
0
          set_result(&results[num_results], static_cast<value_type>(
505
0
              array_[node_pos].value()), length + 1);
506
0
        }
507
0
        ++num_results;
508
0
      }
509
0
    }
510
0
  }
511
512
0
  return num_results;
513
0
}
514
515
template <typename A, typename B, typename T, typename C>
516
inline typename DoubleArrayImpl<A, B, T, C>::value_type
517
DoubleArrayImpl<A, B, T, C>::traverse(const key_type *key,
518
0
    std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const {
519
0
  id_type id = static_cast<id_type>(node_pos);
520
0
  unit_type unit = array_[id];
521
522
0
  if (length != 0) {
523
0
    for ( ; key_pos < length; ++key_pos) {
524
0
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
525
0
      unit = array_[id];
526
0
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
527
0
        return static_cast<value_type>(-2);
528
0
      }
529
0
      node_pos = id;
530
0
    }
531
0
  } else {
532
0
    for ( ; key[key_pos] != '\0'; ++key_pos) {
533
0
      id ^= unit.offset() ^ static_cast<uchar_type>(key[key_pos]);
534
0
      unit = array_[id];
535
0
      if (unit.label() != static_cast<uchar_type>(key[key_pos])) {
536
0
        return static_cast<value_type>(-2);
537
0
      }
538
0
      node_pos = id;
539
0
    }
540
0
  }
541
542
0
  if (!unit.has_leaf()) {
543
0
    return static_cast<value_type>(-1);
544
0
  }
545
0
  unit = array_[id ^ unit.offset()];
546
0
  return static_cast<value_type>(unit.value());
547
0
}
548
549
namespace Details {
550
551
//
552
// Memory management of array.
553
//
554
555
template <typename T>
556
class AutoArray {
557
 public:
558
0
  explicit AutoArray(T *array = NULL) : array_(array) {}
Unexecuted instantiation: Darts::Details::AutoArray<char>::AutoArray(char*)
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::AutoArray(Darts::Details::DoubleArrayBuilderExtraUnit*)
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::AutoArray(unsigned int*)
559
0
  ~AutoArray() {
560
0
    clear();
561
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::~AutoArray()
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::~AutoArray()
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::~AutoArray()
562
563
0
  const T &operator[](std::size_t id) const {
564
0
    return array_[id];
565
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long) const
566
0
  T &operator[](std::size_t id) {
567
0
    return array_[id];
568
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::operator[](unsigned long)
569
570
  bool empty() const {
571
    return array_ == NULL;
572
  }
573
574
0
  void clear() {
575
0
    if (array_ != NULL) {
576
0
      delete[] array_;
577
0
      array_ = NULL;
578
0
    }
579
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::clear()
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::clear()
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::clear()
580
0
  void swap(AutoArray *array) {
581
0
    T *temp = array_;
582
0
    array_ = array->array_;
583
0
    array->array_ = temp;
584
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::swap(Darts::Details::AutoArray<char>*)
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::swap(Darts::Details::AutoArray<unsigned int>*)
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::swap(Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>*)
585
0
  void reset(T *array = NULL) {
586
0
    AutoArray(array).swap(this);
587
0
  }
Unexecuted instantiation: Darts::Details::AutoArray<char>::reset(char*)
Unexecuted instantiation: Darts::Details::AutoArray<unsigned int>::reset(unsigned int*)
Unexecuted instantiation: Darts::Details::AutoArray<Darts::Details::DoubleArrayBuilderExtraUnit>::reset(Darts::Details::DoubleArrayBuilderExtraUnit*)
588
589
 private:
590
  T *array_;
591
592
  // Disallows copy and assignment.
593
  AutoArray(const AutoArray &);
594
  AutoArray &operator=(const AutoArray &);
595
};
596
597
//
598
// Memory management of resizable array.
599
//
600
601
template <typename T>
602
class AutoPool {
603
 public:
604
0
  AutoPool() : buf_(), size_(0), capacity_(0) {}
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::AutoPool()
605
0
  ~AutoPool() { clear(); }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::~AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::~AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::~AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::~AutoPool()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::~AutoPool()
606
607
0
  const T &operator[](std::size_t id) const {
608
0
    return *(reinterpret_cast<const T *>(&buf_[0]) + id);
609
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long) const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long) const
610
0
  T &operator[](std::size_t id) {
611
0
    return *(reinterpret_cast<T *>(&buf_[0]) + id);
612
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::operator[](unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::operator[](unsigned long)
613
614
0
  bool empty() const {
615
0
    return size_ == 0;
616
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::empty() const
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::empty() const
617
0
  std::size_t size() const {
618
0
    return size_;
619
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::size() const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::size() const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::size() const
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::size() const
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::size() const
620
621
0
  void clear() {
622
0
    resize(0);
623
0
    buf_.clear();
624
0
    size_ = 0;
625
0
    capacity_ = 0;
626
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::clear()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::clear()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::clear()
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::clear()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::clear()
627
628
0
  void push_back(const T &value) {
629
0
    append(value);
630
0
  }
631
0
  void pop_back() {
632
0
    (*this)[--size_].~T();
633
0
  }
634
635
0
  void append() {
636
0
    if (size_ == capacity_)
637
0
      resize_buf(size_ + 1);
638
0
    new(&(*this)[size_++]) T;
639
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::append()
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::append()
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::append()
640
0
  void append(const T &value) {
641
0
    if (size_ == capacity_)
642
0
      resize_buf(size_ + 1);
643
0
    new(&(*this)[size_++]) T(value);
644
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::append(unsigned int const&)
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::append(unsigned char const&)
645
646
0
  void resize(std::size_t size) {
647
0
    while (size_ > size) {
648
0
      (*this)[--size_].~T();
649
0
    }
650
0
    if (size > capacity_) {
651
0
      resize_buf(size);
652
0
    }
653
0
    while (size_ < size) {
654
0
      new(&(*this)[size_++]) T;
655
0
    }
656
0
  }
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::resize(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::resize(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::resize(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize(unsigned long)
657
0
  void resize(std::size_t size, const T &value) {
658
0
    while (size_ > size) {
659
0
      (*this)[--size_].~T();
660
0
    }
661
0
    if (size > capacity_) {
662
0
      resize_buf(size);
663
0
    }
664
0
    while (size_ < size) {
665
0
      new(&(*this)[size_++]) T(value);
666
0
    }
667
0
  }
668
669
0
  void reserve(std::size_t size) {
670
0
    if (size > capacity_) {
671
0
      resize_buf(size);
672
0
    }
673
0
  }
674
675
 private:
676
  AutoArray<char> buf_;
677
  std::size_t size_;
678
  std::size_t capacity_;
679
680
  // Disallows copy and assignment.
681
  AutoPool(const AutoPool &);
682
  AutoPool &operator=(const AutoPool &);
683
684
  void resize_buf(std::size_t size);
685
};
686
687
template <typename T>
688
0
void AutoPool<T>::resize_buf(std::size_t size) {
689
0
  std::size_t capacity;
690
0
  if (size >= capacity_ * 2) {
691
0
    capacity = size;
692
0
  } else {
693
0
    capacity = 1;
694
0
    while (capacity < size) {
695
0
      capacity <<= 1;
696
0
    }
697
0
  }
698
699
0
  AutoArray<char> buf;
700
0
  try {
701
0
    buf.reset(new char[sizeof(T) * capacity]);
702
0
  } catch (const std::bad_alloc &) {
703
0
    DARTS_THROW("failed to resize pool: std::bad_alloc");
704
0
  }
705
706
0
  if (size_ > 0) {
707
0
    T *src = reinterpret_cast<T *>(&buf_[0]);
708
0
    T *dest = reinterpret_cast<T *>(&buf[0]);
709
0
    for (std::size_t i = 0; i < size_; ++i) {
710
0
      new(&dest[i]) T(src[i]);
711
0
      src[i].~T();
712
0
    }
713
0
  }
714
715
0
  buf_.swap(&buf);
716
0
  capacity_ = capacity;
717
0
}
Unexecuted instantiation: Darts::Details::AutoPool<unsigned int>::resize_buf(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgNode>::resize_buf(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DawgUnit>::resize_buf(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<unsigned char>::resize_buf(unsigned long)
Unexecuted instantiation: Darts::Details::AutoPool<Darts::Details::DoubleArrayBuilderUnit>::resize_buf(unsigned long)
718
719
//
720
// Memory management of stack.
721
//
722
723
template <typename T>
724
class AutoStack {
725
 public:
726
0
  AutoStack() : pool_() {}
727
0
  ~AutoStack() {
728
0
    clear();
729
0
  }
730
731
  const T &top() const {
732
    return pool_[size() - 1];
733
  }
734
0
  T &top() {
735
0
    return pool_[size() - 1];
736
0
  }
737
738
0
  bool empty() const {
739
0
    return pool_.empty();
740
0
  }
741
0
  std::size_t size() const {
742
0
    return pool_.size();
743
0
  }
744
745
0
  void push(const T &value) {
746
0
    pool_.push_back(value);
747
0
  }
748
0
  void pop() {
749
0
    pool_.pop_back();
750
0
  }
751
752
0
  void clear() {
753
0
    pool_.clear();
754
0
  }
755
756
 private:
757
  AutoPool<T> pool_;
758
759
  // Disallows copy and assignment.
760
  AutoStack(const AutoStack &);
761
  AutoStack &operator=(const AutoStack &);
762
};
763
764
//
765
// Succinct bit vector.
766
//
767
768
class BitVector {
769
 public:
770
0
  BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {}
771
0
  ~BitVector() {
772
0
    clear();
773
0
  }
774
775
0
  bool operator[](std::size_t id) const {
776
0
    return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1;
777
0
  }
778
779
0
  id_type rank(std::size_t id) const {
780
0
    std::size_t unit_id = id / UNIT_SIZE;
781
0
    return ranks_[unit_id] + pop_count(units_[unit_id]
782
0
        & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1)));
783
0
  }
784
785
0
  void set(std::size_t id, bool bit) {
786
0
    if (bit) {
787
0
      units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE);
788
0
    } else {
789
0
      units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE));
790
0
    }
791
0
  }
792
793
0
  bool empty() const {
794
0
    return units_.empty();
795
0
  }
796
0
  std::size_t num_ones() const {
797
0
    return num_ones_;
798
0
  }
799
0
  std::size_t size() const {
800
0
    return size_;
801
0
  }
802
803
0
  void append() {
804
0
    if ((size_ % UNIT_SIZE) == 0) {
805
0
      units_.append(0);
806
0
    }
807
0
    ++size_;
808
0
  }
809
  void build();
810
811
0
  void clear() {
812
0
    units_.clear();
813
0
    ranks_.clear();
814
0
  }
815
816
 private:
817
  enum { UNIT_SIZE = sizeof(id_type) * 8 };
818
819
  AutoPool<id_type> units_;
820
  AutoArray<id_type> ranks_;
821
  std::size_t num_ones_;
822
  std::size_t size_;
823
824
  // Disallows copy and assignment.
825
  BitVector(const BitVector &);
826
  BitVector &operator=(const BitVector &);
827
828
0
  static id_type pop_count(id_type unit) {
829
0
    unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555);
830
0
    unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333);
831
0
    unit = ((unit >> 4) + unit) & 0x0F0F0F0F;
832
0
    unit += unit >> 8;
833
0
    unit += unit >> 16;
834
0
    return unit & 0xFF;
835
0
  }
836
};
837
838
0
inline void BitVector::build() {
839
0
  try {
840
0
    ranks_.reset(new id_type[units_.size()]);
841
0
  } catch (const std::bad_alloc &) {
842
0
    DARTS_THROW("failed to build rank index: std::bad_alloc");
843
0
  }
844
845
0
  num_ones_ = 0;
846
0
  for (std::size_t i = 0; i < units_.size(); ++i) {
847
0
    ranks_[i] = num_ones_;
848
0
    num_ones_ += pop_count(units_[i]);
849
0
  }
850
0
}
851
852
//
853
// Keyset.
854
//
855
856
template <typename T>
857
class Keyset {
858
 public:
859
  Keyset(std::size_t num_keys, const char_type * const *keys,
860
      const std::size_t *lengths, const T *values) :
861
0
      num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {}
862
863
0
  std::size_t num_keys() const {
864
0
    return num_keys_;
865
0
  }
866
0
  const char_type *keys(std::size_t id) const {
867
0
    return keys_[id];
868
0
  }
869
0
  uchar_type keys(std::size_t key_id, std::size_t char_id) const {
870
0
    if (has_lengths() && char_id >= lengths_[key_id])
871
0
      return '\0';
872
0
    return keys_[key_id][char_id];
873
0
  }
874
875
0
  bool has_lengths() const {
876
0
    return lengths_ != NULL;
877
0
  }
878
0
  std::size_t lengths(std::size_t id) const {
879
0
    if (has_lengths()) {
880
0
      return lengths_[id];
881
0
    }
882
0
    std::size_t length = 0;
883
0
    while (keys_[id][length] != '\0') {
884
0
      ++length;
885
0
    }
886
0
    return length;
887
0
  }
888
889
0
  bool has_values() const {
890
0
    return values_ != NULL;
891
0
  }
892
0
  const value_type values(std::size_t id) const {
893
0
    if (has_values()) {
894
0
      return static_cast<value_type>(values_[id]);
895
0
    }
896
0
    return static_cast<value_type>(id);
897
0
  }
898
899
 private:
900
  std::size_t num_keys_;
901
  const char_type * const * keys_;
902
  const std::size_t *lengths_;
903
  const T *values_;
904
905
  // Disallows copy and assignment.
906
  Keyset(const Keyset &);
907
  Keyset &operator=(const Keyset &);
908
};
909
910
//
911
// Node of Directed Acyclic Word Graph (DAWG).
912
//
913
914
class DawgNode {
915
 public:
916
0
  DawgNode() : child_(0), sibling_(0), label_('\0'),
917
0
    is_state_(false), has_sibling_(false) {}
918
919
0
  void set_child(id_type child) {
920
0
    child_ = child;
921
0
  }
922
0
  void set_sibling(id_type sibling) {
923
0
    sibling_ = sibling;
924
0
  }
925
0
  void set_value(value_type value) {
926
0
    child_ = value;
927
0
  }
928
0
  void set_label(uchar_type label) {
929
0
    label_ = label;
930
0
  }
931
0
  void set_is_state(bool is_state) {
932
0
    is_state_ = is_state;
933
0
  }
934
0
  void set_has_sibling(bool has_sibling) {
935
0
    has_sibling_ = has_sibling;
936
0
  }
937
938
0
  id_type child() const {
939
0
    return child_;
940
0
  }
941
0
  id_type sibling() const {
942
0
    return sibling_;
943
0
  }
944
0
  value_type value() const {
945
0
    return static_cast<value_type>(child_);
946
0
  }
947
0
  uchar_type label() const {
948
0
    return label_;
949
0
  }
950
0
  bool is_state() const {
951
0
    return is_state_;
952
0
  }
953
0
  bool has_sibling() const {
954
0
    return has_sibling_;
955
0
  }
956
957
0
  id_type unit() const {
958
0
    if (label_ == '\0') {
959
0
      return (child_ << 1) | (has_sibling_ ? 1 : 0);
960
0
    }
961
0
    return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0);
962
0
  }
963
964
 private:
965
  id_type child_;
966
  id_type sibling_;
967
  uchar_type label_;
968
  bool is_state_;
969
  bool has_sibling_;
970
971
  // Copyable.
972
};
973
974
//
975
// Fixed unit of Directed Acyclic Word Graph (DAWG).
976
//
977
978
class DawgUnit {
979
 public:
980
0
  explicit DawgUnit(id_type unit = 0) : unit_(unit) {}
981
0
  DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {}
982
983
0
  DawgUnit &operator=(id_type unit) {
984
0
    unit_ = unit;
985
0
    return *this;
986
0
  }
987
988
0
  id_type unit() const {
989
0
    return unit_;
990
0
  }
991
992
0
  id_type child() const {
993
0
    return unit_ >> 2;
994
0
  }
995
0
  bool has_sibling() const {
996
0
    return (unit_ & 1) == 1;
997
0
  }
998
0
  value_type value() const {
999
0
    return static_cast<value_type>(unit_ >> 1);
1000
0
  }
1001
0
  bool is_state() const {
1002
0
    return (unit_ & 2) == 2;
1003
0
  }
1004
1005
 private:
1006
  id_type unit_;
1007
1008
  // Copyable.
1009
};
1010
1011
//
1012
// Directed Acyclic Word Graph (DAWG) builder.
1013
//
1014
1015
class DawgBuilder {
1016
 public:
1017
0
  DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(),
1018
0
    table_(), node_stack_(), recycle_bin_(), num_states_(0) {}
1019
0
  ~DawgBuilder() {
1020
0
    clear();
1021
0
  }
1022
1023
0
  id_type root() const {
1024
0
    return 0;
1025
0
  }
1026
1027
0
  id_type child(id_type id) const {
1028
0
    return units_[id].child();
1029
0
  }
1030
0
  id_type sibling(id_type id) const {
1031
0
    return units_[id].has_sibling() ? (id + 1) : 0;
1032
0
  }
1033
0
  int value(id_type id) const {
1034
0
    return units_[id].value();
1035
0
  }
1036
1037
0
  bool is_leaf(id_type id) const {
1038
0
    return label(id) == '\0';
1039
0
  }
1040
0
  uchar_type label(id_type id) const {
1041
0
    return labels_[id];
1042
0
  }
1043
1044
0
  bool is_intersection(id_type id) const {
1045
0
    return is_intersections_[id];
1046
0
  }
1047
0
  id_type intersection_id(id_type id) const {
1048
0
    return is_intersections_.rank(id) - 1;
1049
0
  }
1050
1051
0
  std::size_t num_intersections() const {
1052
0
    return is_intersections_.num_ones();
1053
0
  }
1054
1055
0
  std::size_t size() const {
1056
0
    return units_.size();
1057
0
  }
1058
1059
  void init();
1060
  void finish();
1061
1062
  void insert(const char *key, std::size_t length, value_type value);
1063
1064
  void clear();
1065
1066
 private:
1067
  enum { INITIAL_TABLE_SIZE = 1 << 10 };
1068
1069
  AutoPool<DawgNode> nodes_;
1070
  AutoPool<DawgUnit> units_;
1071
  AutoPool<uchar_type> labels_;
1072
  BitVector is_intersections_;
1073
  AutoPool<id_type> table_;
1074
  AutoStack<id_type> node_stack_;
1075
  AutoStack<id_type> recycle_bin_;
1076
  std::size_t num_states_;
1077
1078
  // Disallows copy and assignment.
1079
  DawgBuilder(const DawgBuilder &);
1080
  DawgBuilder &operator=(const DawgBuilder &);
1081
1082
  void flush(id_type id);
1083
1084
  void expand_table();
1085
1086
  id_type find_unit(id_type id, id_type *hash_id) const;
1087
  id_type find_node(id_type node_id, id_type *hash_id) const;
1088
1089
  bool are_equal(id_type node_id, id_type unit_id) const;
1090
1091
  id_type hash_unit(id_type id) const;
1092
  id_type hash_node(id_type id) const;
1093
1094
  id_type append_node();
1095
  id_type append_unit();
1096
1097
0
  void free_node(id_type id) {
1098
0
    recycle_bin_.push(id);
1099
0
  }
1100
1101
0
  static id_type hash(id_type key) {
1102
0
    key = ~key + (key << 15);  // key = (key << 15) - key - 1;
1103
0
    key = key ^ (key >> 12);
1104
0
    key = key + (key << 2);
1105
0
    key = key ^ (key >> 4);
1106
0
    key = key * 2057;  // key = (key + (key << 3)) + (key << 11);
1107
0
    key = key ^ (key >> 16);
1108
0
    return key;
1109
0
  }
1110
};
1111
1112
0
inline void DawgBuilder::init() {
1113
0
  table_.resize(INITIAL_TABLE_SIZE, 0);
1114
1115
0
  append_node();
1116
0
  append_unit();
1117
1118
0
  num_states_ = 1;
1119
1120
0
  nodes_[0].set_label(0xFF);
1121
0
  node_stack_.push(0);
1122
0
}
1123
1124
0
inline void DawgBuilder::finish() {
1125
0
  flush(0);
1126
1127
0
  units_[0] = nodes_[0].unit();
1128
0
  labels_[0] = nodes_[0].label();
1129
1130
0
  nodes_.clear();
1131
0
  table_.clear();
1132
0
  node_stack_.clear();
1133
0
  recycle_bin_.clear();
1134
1135
0
  is_intersections_.build();
1136
0
}
1137
1138
inline void DawgBuilder::insert(const char *key, std::size_t length,
1139
0
    value_type value) {
1140
0
  if (value < 0) {
1141
0
    DARTS_THROW("failed to insert key: negative value");
1142
0
  } else if (length == 0) {
1143
0
    DARTS_THROW("failed to insert key: zero-length key");
1144
0
  }
1145
1146
0
  id_type id = 0;
1147
0
  std::size_t key_pos = 0;
1148
1149
0
  for ( ; key_pos <= length; ++key_pos) {
1150
0
    id_type child_id = nodes_[id].child();
1151
0
    if (child_id == 0) {
1152
0
      break;
1153
0
    }
1154
1155
0
    uchar_type key_label = static_cast<uchar_type>(key[key_pos]);
1156
0
    if (key_pos < length && key_label == '\0') {
1157
0
      DARTS_THROW("failed to insert key: invalid null character");
1158
0
    }
1159
1160
0
    uchar_type unit_label = nodes_[child_id].label();
1161
0
    if (key_label < unit_label) {
1162
0
      DARTS_THROW("failed to insert key: wrong key order");
1163
0
    } else if (key_label > unit_label) {
1164
0
      nodes_[child_id].set_has_sibling(true);
1165
0
      flush(child_id);
1166
0
      break;
1167
0
    }
1168
0
    id = child_id;
1169
0
  }
1170
1171
0
  if (key_pos > length) {
1172
0
    return;
1173
0
  }
1174
1175
0
  for ( ; key_pos <= length; ++key_pos) {
1176
0
    uchar_type key_label = static_cast<uchar_type>(
1177
0
        (key_pos < length) ? key[key_pos] : '\0');
1178
0
    id_type child_id = append_node();
1179
1180
0
    if (nodes_[id].child() == 0) {
1181
0
      nodes_[child_id].set_is_state(true);
1182
0
    }
1183
0
    nodes_[child_id].set_sibling(nodes_[id].child());
1184
0
    nodes_[child_id].set_label(key_label);
1185
0
    nodes_[id].set_child(child_id);
1186
0
    node_stack_.push(child_id);
1187
1188
0
    id = child_id;
1189
0
  }
1190
0
  nodes_[id].set_value(value);
1191
0
}
1192
1193
0
inline void DawgBuilder::clear() {
1194
0
  nodes_.clear();
1195
0
  units_.clear();
1196
0
  labels_.clear();
1197
0
  is_intersections_.clear();
1198
0
  table_.clear();
1199
0
  node_stack_.clear();
1200
0
  recycle_bin_.clear();
1201
0
  num_states_ = 0;
1202
0
}
1203
1204
0
inline void DawgBuilder::flush(id_type id) {
1205
0
  while (node_stack_.top() != id) {
1206
0
    id_type node_id = node_stack_.top();
1207
0
    node_stack_.pop();
1208
1209
0
    if (num_states_ >= table_.size() - (table_.size() >> 2)) {
1210
0
      expand_table();
1211
0
    }
1212
1213
0
    id_type num_siblings = 0;
1214
0
    for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1215
0
      ++num_siblings;
1216
0
    }
1217
1218
0
    id_type hash_id;
1219
0
    id_type match_id = find_node(node_id, &hash_id);
1220
0
    if (match_id != 0) {
1221
0
      is_intersections_.set(match_id, true);
1222
0
    } else {
1223
0
      id_type unit_id = 0;
1224
0
      for (id_type i = 0; i < num_siblings; ++i) {
1225
0
        unit_id = append_unit();
1226
0
      }
1227
0
      for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) {
1228
0
        units_[unit_id] = nodes_[i].unit();
1229
0
        labels_[unit_id] = nodes_[i].label();
1230
0
        --unit_id;
1231
0
      }
1232
0
      match_id = unit_id + 1;
1233
0
      table_[hash_id] = match_id;
1234
0
      ++num_states_;
1235
0
    }
1236
1237
0
    for (id_type i = node_id, next; i != 0; i = next) {
1238
0
      next = nodes_[i].sibling();
1239
0
      free_node(i);
1240
0
    }
1241
1242
0
    nodes_[node_stack_.top()].set_child(match_id);
1243
0
  }
1244
0
  node_stack_.pop();
1245
0
}
1246
1247
0
inline void DawgBuilder::expand_table() {
1248
0
  std::size_t table_size = table_.size() << 1;
1249
0
  table_.clear();
1250
0
  table_.resize(table_size, 0);
1251
1252
0
  for (std::size_t i = 1; i < units_.size(); ++i) {
1253
0
    id_type id = static_cast<id_type>(i);
1254
0
    if (labels_[id] == '\0' || units_[id].is_state()) {
1255
0
      id_type hash_id;
1256
0
      find_unit(id, &hash_id);
1257
0
      table_[hash_id] = id;
1258
0
    }
1259
0
  }
1260
0
}
1261
1262
0
inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const {
1263
0
  *hash_id = hash_unit(id) % table_.size();
1264
0
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1265
0
    id_type unit_id = table_[*hash_id];
1266
0
    if (unit_id == 0) {
1267
0
      break;
1268
0
    }
1269
1270
    // There must not be the same unit.
1271
0
  }
1272
0
  return 0;
1273
0
}
1274
1275
inline id_type DawgBuilder::find_node(id_type node_id,
1276
0
    id_type *hash_id) const {
1277
0
  *hash_id = hash_node(node_id) % table_.size();
1278
0
  for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) {
1279
0
    id_type unit_id = table_[*hash_id];
1280
0
    if (unit_id == 0) {
1281
0
      break;
1282
0
    }
1283
1284
0
    if (are_equal(node_id, unit_id)) {
1285
0
      return unit_id;
1286
0
    }
1287
0
  }
1288
0
  return 0;
1289
0
}
1290
1291
0
inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const {
1292
0
  for (id_type i = nodes_[node_id].sibling(); i != 0;
1293
0
      i = nodes_[i].sibling()) {
1294
0
    if (units_[unit_id].has_sibling() == false) {
1295
0
      return false;
1296
0
    }
1297
0
    ++unit_id;
1298
0
  }
1299
0
  if (units_[unit_id].has_sibling() == true) {
1300
0
    return false;
1301
0
  }
1302
1303
0
  for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) {
1304
0
    if (nodes_[i].unit() != units_[unit_id].unit() ||
1305
0
        nodes_[i].label() != labels_[unit_id]) {
1306
0
      return false;
1307
0
    }
1308
0
  }
1309
0
  return true;
1310
0
}
1311
1312
0
inline id_type DawgBuilder::hash_unit(id_type id) const {
1313
0
  id_type hash_value = 0;
1314
0
  for ( ; id != 0; ++id) {
1315
0
    id_type unit = units_[id].unit();
1316
0
    uchar_type label = labels_[id];
1317
0
    hash_value ^= hash((label << 24) ^ unit);
1318
1319
0
    if (units_[id].has_sibling() == false) {
1320
0
      break;
1321
0
    }
1322
0
  }
1323
0
  return hash_value;
1324
0
}
1325
1326
0
inline id_type DawgBuilder::hash_node(id_type id) const {
1327
0
  id_type hash_value = 0;
1328
0
  for ( ; id != 0; id = nodes_[id].sibling()) {
1329
0
    id_type unit = nodes_[id].unit();
1330
0
    uchar_type label = nodes_[id].label();
1331
0
    hash_value ^= hash((label << 24) ^ unit);
1332
0
  }
1333
0
  return hash_value;
1334
0
}
1335
1336
0
inline id_type DawgBuilder::append_unit() {
1337
0
  is_intersections_.append();
1338
0
  units_.append();
1339
0
  labels_.append();
1340
1341
0
  return static_cast<id_type>(is_intersections_.size() - 1);
1342
0
}
1343
1344
0
inline id_type DawgBuilder::append_node() {
1345
0
  id_type id;
1346
0
  if (recycle_bin_.empty()) {
1347
0
    id = static_cast<id_type>(nodes_.size());
1348
0
    nodes_.append();
1349
0
  } else {
1350
0
    id = recycle_bin_.top();
1351
0
    nodes_[id] = DawgNode();
1352
0
    recycle_bin_.pop();
1353
0
  }
1354
0
  return id;
1355
0
}
1356
1357
//
1358
// Unit of double-array builder.
1359
//
1360
1361
class DoubleArrayBuilderUnit {
1362
 public:
1363
0
  DoubleArrayBuilderUnit() : unit_(0) {}
1364
1365
0
  void set_has_leaf(bool has_leaf) {
1366
0
    if (has_leaf) {
1367
0
      unit_ |= 1U << 8;
1368
0
    } else {
1369
0
      unit_ &= ~(1U << 8);
1370
0
    }
1371
0
  }
1372
0
  void set_value(value_type value) {
1373
0
    unit_ = value | (1U << 31);
1374
0
  }
1375
0
  void set_label(uchar_type label) {
1376
0
    unit_ = (unit_ & ~0xFFU) | label;
1377
0
  }
1378
0
  void set_offset(id_type offset) {
1379
0
    if (offset >= 1U << 29) {
1380
0
      DARTS_THROW("failed to modify unit: too large offset");
1381
0
    }
1382
0
    unit_ &= (1U << 31) | (1U << 8) | 0xFF;
1383
0
    if (offset < 1U << 21) {
1384
0
      unit_ |= (offset << 10);
1385
0
    } else {
1386
0
      unit_ |= (offset << 2) | (1U << 9);
1387
0
    }
1388
0
  }
1389
1390
 private:
1391
  id_type unit_;
1392
1393
  // Copyable.
1394
};
1395
1396
//
1397
// Extra unit of double-array builder.
1398
//
1399
1400
class DoubleArrayBuilderExtraUnit {
1401
 public:
1402
0
  DoubleArrayBuilderExtraUnit() : prev_(0), next_(0),
1403
0
      is_fixed_(false), is_used_(false) {}
1404
1405
0
  void set_prev(id_type prev) {
1406
0
    prev_ = prev;
1407
0
  }
1408
0
  void set_next(id_type next) {
1409
0
    next_ = next;
1410
0
  }
1411
0
  void set_is_fixed(bool is_fixed) {
1412
0
    is_fixed_ = is_fixed;
1413
0
  }
1414
0
  void set_is_used(bool is_used) {
1415
0
    is_used_ = is_used;
1416
0
  }
1417
1418
0
  id_type prev() const {
1419
0
    return prev_;
1420
0
  }
1421
0
  id_type next() const {
1422
0
    return next_;
1423
0
  }
1424
0
  bool is_fixed() const {
1425
0
    return is_fixed_;
1426
0
  }
1427
0
  bool is_used() const {
1428
0
    return is_used_;
1429
0
  }
1430
1431
 private:
1432
  id_type prev_;
1433
  id_type next_;
1434
  bool is_fixed_;
1435
  bool is_used_;
1436
1437
  // Copyable.
1438
};
1439
1440
//
1441
// DAWG -> double-array converter.
1442
//
1443
1444
class DoubleArrayBuilder {
1445
 public:
1446
  explicit DoubleArrayBuilder(progress_func_type progress_func)
1447
0
      : progress_func_(progress_func), units_(), extras_(), labels_(),
1448
0
        table_(), extras_head_(0) {}
1449
0
  ~DoubleArrayBuilder() {
1450
0
    clear();
1451
0
  }
1452
1453
  template <typename T>
1454
  void build(const Keyset<T> &keyset);
1455
  void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const;
1456
1457
  void clear();
1458
1459
 private:
1460
  enum { BLOCK_SIZE = 256 };
1461
  enum { NUM_EXTRA_BLOCKS = 16 };
1462
  enum { NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS };
1463
1464
  enum { UPPER_MASK = 0xFF << 21 };
1465
  enum { LOWER_MASK = 0xFF };
1466
1467
  typedef DoubleArrayBuilderUnit unit_type;
1468
  typedef DoubleArrayBuilderExtraUnit extra_type;
1469
1470
  progress_func_type progress_func_;
1471
  AutoPool<unit_type> units_;
1472
  AutoArray<extra_type> extras_;
1473
  AutoPool<uchar_type> labels_;
1474
  AutoArray<id_type> table_;
1475
  id_type extras_head_;
1476
1477
  // Disallows copy and assignment.
1478
  DoubleArrayBuilder(const DoubleArrayBuilder &);
1479
  DoubleArrayBuilder &operator=(const DoubleArrayBuilder &);
1480
1481
0
  std::size_t num_blocks() const {
1482
0
    return units_.size() / BLOCK_SIZE;
1483
0
  }
1484
1485
0
  const extra_type &extras(id_type id) const {
1486
0
    return extras_[id % NUM_EXTRAS];
1487
0
  }
1488
0
  extra_type &extras(id_type id) {
1489
0
    return extras_[id % NUM_EXTRAS];
1490
0
  }
1491
1492
  template <typename T>
1493
  void build_dawg(const Keyset<T> &keyset, DawgBuilder *dawg_builder);
1494
  void build_from_dawg(const DawgBuilder &dawg);
1495
  void build_from_dawg(const DawgBuilder &dawg,
1496
      id_type dawg_id, id_type dic_id);
1497
  id_type arrange_from_dawg(const DawgBuilder &dawg,
1498
      id_type dawg_id, id_type dic_id);
1499
1500
  template <typename T>
1501
  void build_from_keyset(const Keyset<T> &keyset);
1502
  template <typename T>
1503
  void build_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1504
      std::size_t end, std::size_t depth, id_type dic_id);
1505
  template <typename T>
1506
  id_type arrange_from_keyset(const Keyset<T> &keyset, std::size_t begin,
1507
      std::size_t end, std::size_t depth, id_type dic_id);
1508
1509
  id_type find_valid_offset(id_type id) const;
1510
  bool is_valid_offset(id_type id, id_type offset) const;
1511
1512
  void reserve_id(id_type id);
1513
  void expand_units();
1514
1515
  void fix_all_blocks();
1516
  void fix_block(id_type block_id);
1517
};
1518
1519
template <typename T>
1520
0
void DoubleArrayBuilder::build(const Keyset<T> &keyset) {
1521
0
  if (keyset.has_values()) {
1522
0
    Details::DawgBuilder dawg_builder;
1523
0
    build_dawg(keyset, &dawg_builder);
1524
0
    build_from_dawg(dawg_builder);
1525
0
    dawg_builder.clear();
1526
0
  } else {
1527
0
    build_from_keyset(keyset);
1528
0
  }
1529
0
}
1530
1531
inline void DoubleArrayBuilder::copy(std::size_t *size_ptr,
1532
0
    DoubleArrayUnit **buf_ptr) const {
1533
0
  if (size_ptr != NULL) {
1534
0
    *size_ptr = units_.size();
1535
0
  }
1536
0
  if (buf_ptr != NULL) {
1537
0
    *buf_ptr = new DoubleArrayUnit[units_.size()];
1538
0
    unit_type *units = reinterpret_cast<unit_type *>(*buf_ptr);
1539
0
    for (std::size_t i = 0; i < units_.size(); ++i) {
1540
0
      units[i] = units_[i];
1541
0
    }
1542
0
  }
1543
0
}
1544
1545
0
inline void DoubleArrayBuilder::clear() {
1546
0
  units_.clear();
1547
0
  extras_.clear();
1548
0
  labels_.clear();
1549
0
  table_.clear();
1550
0
  extras_head_ = 0;
1551
0
}
1552
1553
template <typename T>
1554
void DoubleArrayBuilder::build_dawg(const Keyset<T> &keyset,
1555
0
    DawgBuilder *dawg_builder) {
1556
0
  dawg_builder->init();
1557
0
  for (std::size_t i = 0; i < keyset.num_keys(); ++i) {
1558
0
    dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i));
1559
0
    if (progress_func_ != NULL) {
1560
0
      progress_func_(i + 1, keyset.num_keys() + 1);
1561
0
    }
1562
0
  }
1563
0
  dawg_builder->finish();
1564
0
}
1565
1566
0
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) {
1567
0
  std::size_t num_units = 1;
1568
0
  while (num_units < dawg.size()) {
1569
0
    num_units <<= 1;
1570
0
  }
1571
0
  units_.reserve(num_units);
1572
1573
0
  table_.reset(new id_type[dawg.num_intersections()]);
1574
0
  for (std::size_t i = 0; i < dawg.num_intersections(); ++i) {
1575
0
    table_[i] = 0;
1576
0
  }
1577
1578
0
  extras_.reset(new extra_type[NUM_EXTRAS]);
1579
1580
0
  reserve_id(0);
1581
0
  extras(0).set_is_used(true);
1582
0
  units_[0].set_offset(1);
1583
0
  units_[0].set_label('\0');
1584
1585
0
  if (dawg.child(dawg.root()) != 0) {
1586
0
    build_from_dawg(dawg, dawg.root(), 0);
1587
0
  }
1588
1589
0
  fix_all_blocks();
1590
1591
0
  extras_.clear();
1592
0
  labels_.clear();
1593
0
  table_.clear();
1594
0
}
1595
1596
inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg,
1597
0
    id_type dawg_id, id_type dic_id) {
1598
0
  id_type dawg_child_id = dawg.child(dawg_id);
1599
0
  if (dawg.is_intersection(dawg_child_id)) {
1600
0
    id_type intersection_id = dawg.intersection_id(dawg_child_id);
1601
0
    id_type offset = table_[intersection_id];
1602
0
    if (offset != 0) {
1603
0
      offset ^= dic_id;
1604
0
      if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) {
1605
0
        if (dawg.is_leaf(dawg_child_id)) {
1606
0
          units_[dic_id].set_has_leaf(true);
1607
0
        }
1608
0
        units_[dic_id].set_offset(offset);
1609
0
        return;
1610
0
      }
1611
0
    }
1612
0
  }
1613
1614
0
  id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id);
1615
0
  if (dawg.is_intersection(dawg_child_id)) {
1616
0
    table_[dawg.intersection_id(dawg_child_id)] = offset;
1617
0
  }
1618
1619
0
  do {
1620
0
    uchar_type child_label = dawg.label(dawg_child_id);
1621
0
    id_type dic_child_id = offset ^ child_label;
1622
0
    if (child_label != '\0') {
1623
0
      build_from_dawg(dawg, dawg_child_id, dic_child_id);
1624
0
    }
1625
0
    dawg_child_id = dawg.sibling(dawg_child_id);
1626
0
  } while (dawg_child_id != 0);
1627
0
}
1628
1629
inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg,
1630
0
    id_type dawg_id, id_type dic_id) {
1631
0
  labels_.resize(0);
1632
1633
0
  id_type dawg_child_id = dawg.child(dawg_id);
1634
0
  while (dawg_child_id != 0) {
1635
0
    labels_.append(dawg.label(dawg_child_id));
1636
0
    dawg_child_id = dawg.sibling(dawg_child_id);
1637
0
  }
1638
1639
0
  id_type offset = find_valid_offset(dic_id);
1640
0
  units_[dic_id].set_offset(dic_id ^ offset);
1641
1642
0
  dawg_child_id = dawg.child(dawg_id);
1643
0
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1644
0
    id_type dic_child_id = offset ^ labels_[i];
1645
0
    reserve_id(dic_child_id);
1646
1647
0
    if (dawg.is_leaf(dawg_child_id)) {
1648
0
      units_[dic_id].set_has_leaf(true);
1649
0
      units_[dic_child_id].set_value(dawg.value(dawg_child_id));
1650
0
    } else {
1651
0
      units_[dic_child_id].set_label(labels_[i]);
1652
0
    }
1653
1654
0
    dawg_child_id = dawg.sibling(dawg_child_id);
1655
0
  }
1656
0
  extras(offset).set_is_used(true);
1657
1658
0
  return offset;
1659
0
}
1660
1661
template <typename T>
1662
0
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset) {
1663
0
  std::size_t num_units = 1;
1664
0
  while (num_units < keyset.num_keys()) {
1665
0
    num_units <<= 1;
1666
0
  }
1667
0
  units_.reserve(num_units);
1668
1669
0
  extras_.reset(new extra_type[NUM_EXTRAS]);
1670
1671
0
  reserve_id(0);
1672
0
  extras(0).set_is_used(true);
1673
0
  units_[0].set_offset(1);
1674
0
  units_[0].set_label('\0');
1675
1676
0
  if (keyset.num_keys() > 0) {
1677
0
    build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0);
1678
0
  }
1679
1680
0
  fix_all_blocks();
1681
1682
0
  extras_.clear();
1683
0
  labels_.clear();
1684
0
}
1685
1686
template <typename T>
1687
void DoubleArrayBuilder::build_from_keyset(const Keyset<T> &keyset,
1688
0
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1689
0
  id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id);
1690
1691
0
  while (begin < end) {
1692
0
    if (keyset.keys(begin, depth) != '\0') {
1693
0
      break;
1694
0
    }
1695
0
    ++begin;
1696
0
  }
1697
0
  if (begin == end) {
1698
0
    return;
1699
0
  }
1700
1701
0
  std::size_t last_begin = begin;
1702
0
  uchar_type last_label = keyset.keys(begin, depth);
1703
0
  while (++begin < end) {
1704
0
    uchar_type label = keyset.keys(begin, depth);
1705
0
    if (label != last_label) {
1706
0
      build_from_keyset(keyset, last_begin, begin,
1707
0
          depth + 1, offset ^ last_label);
1708
0
      last_begin = begin;
1709
0
      last_label = keyset.keys(begin, depth);
1710
0
    }
1711
0
  }
1712
0
  build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label);
1713
0
}
1714
1715
template <typename T>
1716
id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset<T> &keyset,
1717
0
    std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) {
1718
0
  labels_.resize(0);
1719
1720
0
  value_type value = -1;
1721
0
  for (std::size_t i = begin; i < end; ++i) {
1722
0
    uchar_type label = keyset.keys(i, depth);
1723
0
    if (label == '\0') {
1724
0
      if (keyset.has_lengths() && depth < keyset.lengths(i)) {
1725
0
        DARTS_THROW("failed to build double-array: "
1726
0
            "invalid null character");
1727
0
      } else if (keyset.values(i) < 0) {
1728
0
        DARTS_THROW("failed to build double-array: negative value");
1729
0
      }
1730
1731
0
      if (value == -1) {
1732
0
        value = keyset.values(i);
1733
0
      }
1734
0
      if (progress_func_ != NULL) {
1735
0
        progress_func_(i + 1, keyset.num_keys() + 1);
1736
0
      }
1737
0
    }
1738
1739
0
    if (labels_.empty()) {
1740
0
      labels_.append(label);
1741
0
    } else if (label != labels_[labels_.size() - 1]) {
1742
0
      if (label < labels_[labels_.size() - 1]) {
1743
0
        DARTS_THROW("failed to build double-array: wrong key order");
1744
0
      }
1745
0
      labels_.append(label);
1746
0
    }
1747
0
  }
1748
1749
0
  id_type offset = find_valid_offset(dic_id);
1750
0
  units_[dic_id].set_offset(dic_id ^ offset);
1751
1752
0
  for (std::size_t i = 0; i < labels_.size(); ++i) {
1753
0
    id_type dic_child_id = offset ^ labels_[i];
1754
0
    reserve_id(dic_child_id);
1755
0
    if (labels_[i] == '\0') {
1756
0
      units_[dic_id].set_has_leaf(true);
1757
0
      units_[dic_child_id].set_value(value);
1758
0
    } else {
1759
0
      units_[dic_child_id].set_label(labels_[i]);
1760
0
    }
1761
0
  }
1762
0
  extras(offset).set_is_used(true);
1763
1764
0
  return offset;
1765
0
}
1766
1767
0
inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const {
1768
0
  if (extras_head_ >= units_.size()) {
1769
0
    return units_.size() | (id & LOWER_MASK);
1770
0
  }
1771
1772
0
  id_type unfixed_id = extras_head_;
1773
0
  do {
1774
0
    id_type offset = unfixed_id ^ labels_[0];
1775
0
    if (is_valid_offset(id, offset)) {
1776
0
      return offset;
1777
0
    }
1778
0
    unfixed_id = extras(unfixed_id).next();
1779
0
  } while (unfixed_id != extras_head_);
1780
1781
0
  return units_.size() | (id & LOWER_MASK);
1782
0
}
1783
1784
inline bool DoubleArrayBuilder::is_valid_offset(id_type id,
1785
0
    id_type offset) const {
1786
0
  if (extras(offset).is_used()) {
1787
0
    return false;
1788
0
  }
1789
1790
0
  id_type rel_offset = id ^ offset;
1791
0
  if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) {
1792
0
    return false;
1793
0
  }
1794
1795
0
  for (std::size_t i = 1; i < labels_.size(); ++i) {
1796
0
    if (extras(offset ^ labels_[i]).is_fixed()) {
1797
0
      return false;
1798
0
    }
1799
0
  }
1800
1801
0
  return true;
1802
0
}
1803
1804
0
inline void DoubleArrayBuilder::reserve_id(id_type id) {
1805
0
  if (id >= units_.size()) {
1806
0
    expand_units();
1807
0
  }
1808
1809
0
  if (id == extras_head_) {
1810
0
    extras_head_ = extras(id).next();
1811
0
    if (extras_head_ == id) {
1812
0
      extras_head_ = units_.size();
1813
0
    }
1814
0
  }
1815
0
  extras(extras(id).prev()).set_next(extras(id).next());
1816
0
  extras(extras(id).next()).set_prev(extras(id).prev());
1817
0
  extras(id).set_is_fixed(true);
1818
0
}
1819
1820
0
inline void DoubleArrayBuilder::expand_units() {
1821
0
  id_type src_num_units = units_.size();
1822
0
  id_type src_num_blocks = num_blocks();
1823
1824
0
  id_type dest_num_units = src_num_units + BLOCK_SIZE;
1825
0
  id_type dest_num_blocks = src_num_blocks + 1;
1826
1827
0
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1828
0
    fix_block(src_num_blocks - NUM_EXTRA_BLOCKS);
1829
0
  }
1830
1831
0
  units_.resize(dest_num_units);
1832
1833
0
  if (dest_num_blocks > NUM_EXTRA_BLOCKS) {
1834
0
    for (std::size_t id = src_num_units; id < dest_num_units; ++id) {
1835
0
      extras(id).set_is_used(false);
1836
0
      extras(id).set_is_fixed(false);
1837
0
    }
1838
0
  }
1839
1840
0
  for (id_type i = src_num_units + 1; i < dest_num_units; ++i) {
1841
0
    extras(i - 1).set_next(i);
1842
0
    extras(i).set_prev(i - 1);
1843
0
  }
1844
1845
0
  extras(src_num_units).set_prev(dest_num_units - 1);
1846
0
  extras(dest_num_units - 1).set_next(src_num_units);
1847
1848
0
  extras(src_num_units).set_prev(extras(extras_head_).prev());
1849
0
  extras(dest_num_units - 1).set_next(extras_head_);
1850
1851
0
  extras(extras(extras_head_).prev()).set_next(src_num_units);
1852
0
  extras(extras_head_).set_prev(dest_num_units - 1);
1853
0
}
1854
1855
0
inline void DoubleArrayBuilder::fix_all_blocks() {
1856
0
  id_type begin = 0;
1857
0
  if (num_blocks() > NUM_EXTRA_BLOCKS) {
1858
0
    begin = num_blocks() - NUM_EXTRA_BLOCKS;
1859
0
  }
1860
0
  id_type end = num_blocks();
1861
1862
0
  for (id_type block_id = begin; block_id != end; ++block_id) {
1863
0
    fix_block(block_id);
1864
0
  }
1865
0
}
1866
1867
0
inline void DoubleArrayBuilder::fix_block(id_type block_id) {
1868
0
  id_type begin = block_id * BLOCK_SIZE;
1869
0
  id_type end = begin + BLOCK_SIZE;
1870
1871
0
  id_type unused_offset = 0;
1872
0
  for (id_type offset = begin; offset != end; ++offset) {
1873
0
    if (!extras(offset).is_used()) {
1874
0
      unused_offset = offset;
1875
0
      break;
1876
0
    }
1877
0
  }
1878
1879
0
  for (id_type id = begin; id != end; ++id) {
1880
0
    if (!extras(id).is_fixed()) {
1881
0
      reserve_id(id);
1882
0
      units_[id].set_label(static_cast<uchar_type>(id ^ unused_offset));
1883
0
    }
1884
0
  }
1885
0
}
1886
1887
}  // namespace Details
1888
1889
//
1890
// Member function build() of DoubleArrayImpl.
1891
//
1892
1893
template <typename A, typename B, typename T, typename C>
1894
int DoubleArrayImpl<A, B, T, C>::build(std::size_t num_keys,
1895
    const key_type * const *keys, const std::size_t *lengths,
1896
0
    const value_type *values, Details::progress_func_type progress_func) {
1897
0
  Details::Keyset<value_type> keyset(num_keys, keys, lengths, values);
1898
1899
0
  Details::DoubleArrayBuilder builder(progress_func);
1900
0
  builder.build(keyset);
1901
1902
0
  std::size_t size = 0;
1903
0
  unit_type *buf = NULL;
1904
0
  builder.copy(&size, &buf);
1905
1906
0
  clear();
1907
1908
0
  size_ = size;
1909
0
  array_ = buf;
1910
0
  buf_ = buf;
1911
1912
0
  if (progress_func != NULL) {
1913
0
    progress_func(num_keys + 1, num_keys + 1);
1914
0
  }
1915
1916
0
  return 0;
1917
0
}
1918
1919
}  // namespace Darts
1920
1921
#undef DARTS_INT_TO_STR
1922
#undef DARTS_LINE_TO_STR
1923
#undef DARTS_LINE_STR
1924
#undef DARTS_THROW
1925
1926
#endif  // DARTS_H_