Coverage Report

Created: 2026-05-30 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/sentencepiece/third_party/esaxx/esa.hxx
Line
Count
Source
1
/*
2
 * esa.hxx
3
 * Copyright (c) 2010 Daisuke Okanohara All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person
6
 * obtaining a copy of this software and associated documentation
7
 * files (the "Software"), to deal in the Software without
8
 * restriction, including without limitation the rights to use,
9
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following
12
 * conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be
15
 * included in all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
 * OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
27
#ifndef _ESA_HXX
28
#define _ESA_HXX
29
30
#include <vector>
31
#include <utility>
32
#include <cassert>
33
#include "sais.hxx"
34
35
namespace esaxx_private {
36
template<typename string_type, typename sarray_type, typename index_type>
37
6.80k
index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){
38
6.80k
  if (n == 0){
39
0
    return 0;
40
0
  }
41
6.80k
  sarray_type Psi = L;
42
6.80k
  Psi[SA[0]] = SA[n-1];
43
6.34M
  for (index_type i = 1; i < n; ++i){
44
6.34M
    Psi[SA[i]] = SA[i-1];
45
6.34M
  }
46
47
  // Compare at most 2n log n charcters. Practically fastest
48
  // "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09
49
6.80k
  sarray_type PLCP = R;
50
6.80k
  index_type h = 0;
51
6.35M
  for (index_type i = 0; i < n; ++i){
52
6.34M
    index_type j = Psi[i];
53
12.5M
    while (i+h < n && j+h < n && 
54
12.4M
     T[i+h] == T[j+h]){
55
6.15M
      ++h;
56
6.15M
    }
57
6.34M
    PLCP[i] = h;
58
6.34M
    if (h > 0) --h;
59
6.34M
  }
60
61
6.80k
  sarray_type H = L;
62
6.35M
  for (index_type i = 0; i < n; ++i){
63
6.34M
    H[i] = PLCP[SA[i]];
64
6.34M
  }
65
6.80k
  H[0] = -1;
66
67
6.80k
  std::vector<std::pair<index_type, index_type> > S;
68
6.80k
  S.push_back(std::make_pair((index_type)-1, (index_type)-1));
69
6.80k
  size_t nodeNum = 0;
70
6.35M
  for (index_type i = 0; ; ++i){
71
6.35M
    std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]);
72
6.35M
    std::pair<index_type, index_type> cand(S.back());
73
16.4M
    while (cand.second > cur.second){
74
10.1M
      if (i - cand.first > 1){
75
3.79M
  L[nodeNum] = cand.first;
76
3.79M
  R[nodeNum] = i;
77
3.79M
  D[nodeNum] = cand.second;
78
3.79M
  ++nodeNum;
79
3.79M
      }
80
10.1M
      cur.first = cand.first;
81
10.1M
      S.pop_back();
82
10.1M
      cand = S.back();
83
10.1M
    }
84
6.35M
    if (cand.second < cur.second){
85
3.79M
      S.push_back(cur);
86
3.79M
    }
87
6.35M
    if (i == n) break;
88
6.34M
    S.push_back(std::make_pair(i, n - SA[i] + 1));
89
6.34M
  }
90
6.80k
  return nodeNum;
91
6.80k
}
Unexecuted instantiation: long esaxx_private::suffixtree<std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<long*>, long>(std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, long)
int esaxx_private::suffixtree<std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<int*>, int>(std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, int)
Line
Count
Source
37
6.80k
index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){
38
6.80k
  if (n == 0){
39
0
    return 0;
40
0
  }
41
6.80k
  sarray_type Psi = L;
42
6.80k
  Psi[SA[0]] = SA[n-1];
43
6.34M
  for (index_type i = 1; i < n; ++i){
44
6.34M
    Psi[SA[i]] = SA[i-1];
45
6.34M
  }
46
47
  // Compare at most 2n log n charcters. Practically fastest
48
  // "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09
49
6.80k
  sarray_type PLCP = R;
50
6.80k
  index_type h = 0;
51
6.35M
  for (index_type i = 0; i < n; ++i){
52
6.34M
    index_type j = Psi[i];
53
12.5M
    while (i+h < n && j+h < n && 
54
12.4M
     T[i+h] == T[j+h]){
55
6.15M
      ++h;
56
6.15M
    }
57
6.34M
    PLCP[i] = h;
58
6.34M
    if (h > 0) --h;
59
6.34M
  }
60
61
6.80k
  sarray_type H = L;
62
6.35M
  for (index_type i = 0; i < n; ++i){
63
6.34M
    H[i] = PLCP[SA[i]];
64
6.34M
  }
65
6.80k
  H[0] = -1;
66
67
6.80k
  std::vector<std::pair<index_type, index_type> > S;
68
6.80k
  S.push_back(std::make_pair((index_type)-1, (index_type)-1));
69
6.80k
  size_t nodeNum = 0;
70
6.35M
  for (index_type i = 0; ; ++i){
71
6.35M
    std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]);
72
6.35M
    std::pair<index_type, index_type> cand(S.back());
73
16.4M
    while (cand.second > cur.second){
74
10.1M
      if (i - cand.first > 1){
75
3.79M
  L[nodeNum] = cand.first;
76
3.79M
  R[nodeNum] = i;
77
3.79M
  D[nodeNum] = cand.second;
78
3.79M
  ++nodeNum;
79
3.79M
      }
80
10.1M
      cur.first = cand.first;
81
10.1M
      S.pop_back();
82
10.1M
      cand = S.back();
83
10.1M
    }
84
6.35M
    if (cand.second < cur.second){
85
3.79M
      S.push_back(cur);
86
3.79M
    }
87
6.35M
    if (i == n) break;
88
6.34M
    S.push_back(std::make_pair(i, n - SA[i] + 1));
89
6.34M
  }
90
6.80k
  return nodeNum;
91
6.80k
}
92
}
93
94
/**
95
 * @brief Build an enhanced suffix array of a given string in linear time
96
 * For an input text T, esaxx() builds an enhancd suffix array in linear time. 
97
 * i-th internal node is represented as a triple (L[i], R[i], D[i]); 
98
 *   L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1]
99
 *   D[i] is the depth of the internal node
100
 * The number of internal node is at most N-1 and return the actual number by 
101
 * @param T[0...n-1]  The input string. (random access iterator)
102
 * @param SA[0...n-1] The output suffix array (random access iterator)
103
 * @param L[0...n-1]  The output left boundary of internal node (random access iterator)
104
 * @param R[0...n-1]  The output right boundary of internal node (random access iterator)
105
 * @param D[0...n-1]  The output depth of internal node (random access iterator)
106
 * @param n The length of the input string
107
 * @param k The alphabet size
108
 * @pram nodeNum The output the number of internal node
109
 * @return 0 if succeded, -1 or -2 otherwise 
110
 */
111
112
template<typename string_type, typename sarray_type, typename index_type>
113
int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D,
114
6.80k
     index_type n, index_type k, index_type& nodeNum) {
115
6.80k
  if ((n < 0) || (k <= 0)) return -1;
116
6.80k
  int err = saisxx(T, SA, n, k);
117
6.80k
  if (err != 0){
118
0
    return err;
119
0
  }
120
6.80k
  nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n);
121
6.80k
  return 0;
122
6.80k
}
Unexecuted instantiation: int esaxx<std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<long*>, long>(std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, std::__1::__wrap_iter<long*>, long, long, long&)
int esaxx<std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<int*>, int>(std::__1::__wrap_iter<unsigned int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, std::__1::__wrap_iter<int*>, int, int, int&)
Line
Count
Source
114
6.80k
     index_type n, index_type k, index_type& nodeNum) {
115
6.80k
  if ((n < 0) || (k <= 0)) return -1;
116
6.80k
  int err = saisxx(T, SA, n, k);
117
6.80k
  if (err != 0){
118
0
    return err;
119
0
  }
120
6.80k
  nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n);
121
6.80k
  return 0;
122
6.80k
}
123
124
125
#endif // _ESA_HXX