/src/php-src/ext/standard/levenshtein.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Hartmut Holzgraefe <hholzgra@php.net> | |
14 | | +----------------------------------------------------------------------+ |
15 | | */ |
16 | | |
17 | | #include "php.h" |
18 | | |
19 | | /* {{{ reference_levdist |
20 | | * reference implementation, only optimized for memory usage, not speed */ |
21 | | static zend_long reference_levdist(const zend_string *string1, const zend_string *string2, zend_long cost_ins, zend_long cost_rep, zend_long cost_del ) |
22 | 0 | { |
23 | 0 | zend_long *p1, *p2, *tmp; |
24 | 0 | zend_long c0, c1, c2; |
25 | 0 | size_t i1, i2; |
26 | |
|
27 | 0 | if (ZSTR_LEN(string1) == 0) { |
28 | 0 | return ZSTR_LEN(string2) * cost_ins; |
29 | 0 | } |
30 | 0 | if (ZSTR_LEN(string2) == 0) { |
31 | 0 | return ZSTR_LEN(string1) * cost_del; |
32 | 0 | } |
33 | | |
34 | | /* When all costs are equal, levenshtein fulfills the requirements of a metric, which means |
35 | | * that the distance is symmetric. If string1 is shorter than string 2 we can save memory (and CPU time) |
36 | | * by having shorter rows (p1 & p2). */ |
37 | 0 | if (ZSTR_LEN(string1) < ZSTR_LEN(string2) && cost_ins == cost_rep && cost_rep == cost_del) { |
38 | 0 | const zend_string *tmp = string1; |
39 | 0 | string1 = string2; |
40 | 0 | string2 = tmp; |
41 | 0 | } |
42 | |
|
43 | 0 | p1 = safe_emalloc((ZSTR_LEN(string2) + 1), sizeof(zend_long), 0); |
44 | 0 | p2 = safe_emalloc((ZSTR_LEN(string2) + 1), sizeof(zend_long), 0); |
45 | |
|
46 | 0 | for (i2 = 0; i2 <= ZSTR_LEN(string2); i2++) { |
47 | 0 | p1[i2] = i2 * cost_ins; |
48 | 0 | } |
49 | 0 | for (i1 = 0; i1 < ZSTR_LEN(string1) ; i1++) { |
50 | 0 | p2[0] = p1[0] + cost_del; |
51 | |
|
52 | 0 | for (i2 = 0; i2 < ZSTR_LEN(string2); i2++) { |
53 | 0 | c0 = p1[i2] + ((ZSTR_VAL(string1)[i1] == ZSTR_VAL(string2)[i2]) ? 0 : cost_rep); |
54 | 0 | c1 = p1[i2 + 1] + cost_del; |
55 | 0 | if (c1 < c0) { |
56 | 0 | c0 = c1; |
57 | 0 | } |
58 | 0 | c2 = p2[i2] + cost_ins; |
59 | 0 | if (c2 < c0) { |
60 | 0 | c0 = c2; |
61 | 0 | } |
62 | 0 | p2[i2 + 1] = c0; |
63 | 0 | } |
64 | 0 | tmp = p1; |
65 | 0 | p1 = p2; |
66 | 0 | p2 = tmp; |
67 | 0 | } |
68 | 0 | c0 = p1[ZSTR_LEN(string2)]; |
69 | |
|
70 | 0 | efree(p1); |
71 | 0 | efree(p2); |
72 | |
|
73 | 0 | return c0; |
74 | 0 | } |
75 | | /* }}} */ |
76 | | |
77 | | /* {{{ Calculate Levenshtein distance between two strings */ |
78 | | PHP_FUNCTION(levenshtein) |
79 | 0 | { |
80 | 0 | zend_string *string1, *string2; |
81 | 0 | zend_long cost_ins = 1; |
82 | 0 | zend_long cost_rep = 1; |
83 | 0 | zend_long cost_del = 1; |
84 | |
|
85 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|lll", &string1, &string2, &cost_ins, &cost_rep, &cost_del) == FAILURE) { |
86 | 0 | RETURN_THROWS(); |
87 | 0 | } |
88 | | |
89 | | |
90 | 0 | RETURN_LONG(reference_levdist(string1, string2, cost_ins, cost_rep, cost_del)); |
91 | 0 | } |
92 | | /* }}} */ |