/src/aspell/modules/speller/default/leditdist.hpp
Line | Count | Source |
1 | | |
2 | | #ifndef __aspeller_leditdist_hh__ |
3 | | #define __aspeller_leditdist_hh__ |
4 | | |
5 | | #include "weights.hpp" |
6 | | |
7 | | namespace aspeller { |
8 | | |
9 | | // limit_edit_distance finds the shortest edit distance but will |
10 | | // stop and return a number at least as large as LARGE_NUM if it has |
11 | | // to do more edits than a set limit. |
12 | | // Note that this does NOT mean that the score returned is <= limit*w.max |
13 | | // as "sub" vs "submarine" will return 6*(cost of insertion) no matter what |
14 | | // the limit is. |
15 | | // The edit distance is |
16 | | // (cost of swap)(# of swaps) + (cost of deletion)(# of deletions) |
17 | | // + (cost of insertion)(# of insertions) |
18 | | // + (cost of substitutions)(# of substitutions) |
19 | | |
20 | | // Preconditions: |
21 | | // max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15 |
22 | | // if violated than an incorrect result may be returned (which may be negative) |
23 | | // due to overflow of a short integer |
24 | | // (limit+1)*w.min < limit*w.max |
25 | | // limit <= 5 (use edit_distance if limit > 5) |
26 | | // where w.min and w.max is the minimum and maximum cost of an edit |
27 | | // respectfully. |
28 | | |
29 | | // The running time is asymptotically bounded above by |
30 | | // (3^l)*n where l is the limit and n is the maximum of strlen(a),strlen(b) |
31 | | // Based on my informal tests, however, the n does not really matter |
32 | | // and the running time is more like (3^l). |
33 | | |
34 | | // limit_edit_distance, based on my informal tests, turns out to be |
35 | | // faster than edit_dist for l < 5. For l == 5 it is about the |
36 | | // smaller for short strings (<= 5) and less than for longer strings |
37 | | |
38 | | // limit2_edit_distance(a,b,w) = limit_edit_distance(a,b,2,w) |
39 | | // but is roughly 2/3's faster |
40 | | |
41 | | struct EditDist { |
42 | | int score; |
43 | | const char * stopped_at; |
44 | 10.2k | EditDist() {} |
45 | | EditDist(int s, const char * p) |
46 | 32.8M | : score(s), stopped_at(p) {} |
47 | 35.0M | operator int () const {return score;} |
48 | | }; |
49 | | |
50 | | static const int LARGE_NUM = 0xFFFFF; |
51 | | // this needs to be SMALLER than INT_MAX since it may be incremented |
52 | | // a few times |
53 | | |
54 | | int limit_edit_distance(const char * a, const char * b, int limit, |
55 | | const EditDistanceWeights & w |
56 | | = EditDistanceWeights()); |
57 | | |
58 | | EditDist limit0_edit_distance(const char * a, const char * b, |
59 | | const EditDistanceWeights & w |
60 | | = EditDistanceWeights()); |
61 | | |
62 | | EditDist limit1_edit_distance(const char * a, const char * b, |
63 | | const EditDistanceWeights & w |
64 | | = EditDistanceWeights()); |
65 | | |
66 | | EditDist limit2_edit_distance(const char * a, const char * b, |
67 | | const EditDistanceWeights & w |
68 | | = EditDistanceWeights()); |
69 | | |
70 | | } |
71 | | |
72 | | #endif |