/src/keystone/llvm/include/llvm/ADT/edit_distance.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines a Levenshtein distance function that works for any two |
11 | | // sequences, with each element of each sequence being analogous to a character |
12 | | // in a string. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #ifndef LLVM_ADT_EDIT_DISTANCE_H |
17 | | #define LLVM_ADT_EDIT_DISTANCE_H |
18 | | |
19 | | #include "llvm/ADT/ArrayRef.h" |
20 | | #include <algorithm> |
21 | | #include <memory> |
22 | | |
23 | | namespace llvm_ks { |
24 | | |
25 | | /// \brief Determine the edit distance between two sequences. |
26 | | /// |
27 | | /// \param FromArray the first sequence to compare. |
28 | | /// |
29 | | /// \param ToArray the second sequence to compare. |
30 | | /// |
31 | | /// \param AllowReplacements whether to allow element replacements (change one |
32 | | /// element into another) as a single operation, rather than as two operations |
33 | | /// (an insertion and a removal). |
34 | | /// |
35 | | /// \param MaxEditDistance If non-zero, the maximum edit distance that this |
36 | | /// routine is allowed to compute. If the edit distance will exceed that |
37 | | /// maximum, returns \c MaxEditDistance+1. |
38 | | /// |
39 | | /// \returns the minimum number of element insertions, removals, or (if |
40 | | /// \p AllowReplacements is \c true) replacements needed to transform one of |
41 | | /// the given sequences into the other. If zero, the sequences are identical. |
42 | | template<typename T> |
43 | | unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray, |
44 | | bool AllowReplacements = true, |
45 | 0 | unsigned MaxEditDistance = 0) { |
46 | | // The algorithm implemented below is the "classic" |
47 | | // dynamic-programming algorithm for computing the Levenshtein |
48 | | // distance, which is described here: |
49 | | // |
50 | | // http://en.wikipedia.org/wiki/Levenshtein_distance |
51 | | // |
52 | | // Although the algorithm is typically described using an m x n |
53 | | // array, only one row plus one element are used at a time, so this |
54 | | // implementation just keeps one vector for the row. To update one entry, |
55 | | // only the entries to the left, top, and top-left are needed. The left |
56 | | // entry is in Row[x-1], the top entry is what's in Row[x] from the last |
57 | | // iteration, and the top-left entry is stored in Previous. |
58 | 0 | typename ArrayRef<T>::size_type m = FromArray.size(); |
59 | 0 | typename ArrayRef<T>::size_type n = ToArray.size(); |
60 | |
|
61 | 0 | const unsigned SmallBufferSize = 64; |
62 | 0 | unsigned SmallBuffer[SmallBufferSize]; |
63 | 0 | std::unique_ptr<unsigned[]> Allocated; |
64 | 0 | unsigned *Row = SmallBuffer; |
65 | 0 | if (n + 1 > SmallBufferSize) { |
66 | 0 | Row = new unsigned[n + 1]; |
67 | 0 | Allocated.reset(Row); |
68 | 0 | } |
69 | |
|
70 | 0 | for (unsigned i = 1; i <= n; ++i) |
71 | 0 | Row[i] = i; |
72 | |
|
73 | 0 | for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) { |
74 | 0 | Row[0] = y; |
75 | 0 | unsigned BestThisRow = Row[0]; |
76 | |
|
77 | 0 | unsigned Previous = y - 1; |
78 | 0 | for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) { |
79 | 0 | int OldRow = Row[x]; |
80 | 0 | if (AllowReplacements) { |
81 | 0 | Row[x] = std::min( |
82 | 0 | Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u), |
83 | 0 | std::min(Row[x-1], Row[x])+1); |
84 | 0 | } |
85 | 0 | else { |
86 | 0 | if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous; |
87 | 0 | else Row[x] = std::min(Row[x-1], Row[x]) + 1; |
88 | 0 | } |
89 | 0 | Previous = OldRow; |
90 | 0 | BestThisRow = std::min(BestThisRow, Row[x]); |
91 | 0 | } |
92 | |
|
93 | 0 | if (MaxEditDistance && BestThisRow > MaxEditDistance) |
94 | 0 | return MaxEditDistance + 1; |
95 | 0 | } |
96 | | |
97 | 0 | unsigned Result = Row[n]; |
98 | 0 | return Result; |
99 | 0 | } |
100 | | |
101 | | } // End llvm namespace |
102 | | |
103 | | #endif |