/src/php-src/ext/standard/levenshtein.c
Line | Count | Source |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright © The PHP Group and Contributors. | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to the Modified BSD License that is | |
6 | | | bundled with this package in the file LICENSE, and is available | |
7 | | | through the World Wide Web at <https://www.php.net/license/>. | |
8 | | | | |
9 | | | SPDX-License-Identifier: BSD-3-Clause | |
10 | | +----------------------------------------------------------------------+ |
11 | | | Author: Hartmut Holzgraefe <hholzgra@php.net> | |
12 | | +----------------------------------------------------------------------+ |
13 | | */ |
14 | | |
15 | | #include "php.h" |
16 | | |
17 | | /* {{{ reference_levdist |
18 | | * reference implementation, only optimized for memory usage, not speed */ |
19 | | static zend_long reference_levdist(const zend_string *string1, const zend_string *string2, zend_long cost_ins, zend_long cost_rep, zend_long cost_del ) |
20 | 0 | { |
21 | 0 | zend_long *p1, *p2, *tmp; |
22 | 0 | zend_long c0, c1, c2; |
23 | 0 | size_t i1, i2; |
24 | |
|
25 | 0 | if (ZSTR_LEN(string1) == 0) { |
26 | 0 | return ZSTR_LEN(string2) * cost_ins; |
27 | 0 | } |
28 | 0 | if (ZSTR_LEN(string2) == 0) { |
29 | 0 | return ZSTR_LEN(string1) * cost_del; |
30 | 0 | } |
31 | | |
32 | | /* When all costs are equal, levenshtein fulfills the requirements of a metric, which means |
33 | | * that the distance is symmetric. If string1 is shorter than string 2 we can save memory (and CPU time) |
34 | | * by having shorter rows (p1 & p2). */ |
35 | 0 | if (ZSTR_LEN(string1) < ZSTR_LEN(string2) && cost_ins == cost_rep && cost_rep == cost_del) { |
36 | 0 | const zend_string *tmp = string1; |
37 | 0 | string1 = string2; |
38 | 0 | string2 = tmp; |
39 | 0 | } |
40 | |
|
41 | 0 | p1 = safe_emalloc((ZSTR_LEN(string2) + 1), sizeof(zend_long), 0); |
42 | 0 | p2 = safe_emalloc((ZSTR_LEN(string2) + 1), sizeof(zend_long), 0); |
43 | |
|
44 | 0 | for (i2 = 0; i2 <= ZSTR_LEN(string2); i2++) { |
45 | 0 | p1[i2] = i2 * cost_ins; |
46 | 0 | } |
47 | 0 | for (i1 = 0; i1 < ZSTR_LEN(string1) ; i1++) { |
48 | 0 | p2[0] = p1[0] + cost_del; |
49 | |
|
50 | 0 | for (i2 = 0; i2 < ZSTR_LEN(string2); i2++) { |
51 | 0 | c0 = p1[i2] + ((ZSTR_VAL(string1)[i1] == ZSTR_VAL(string2)[i2]) ? 0 : cost_rep); |
52 | 0 | c1 = p1[i2 + 1] + cost_del; |
53 | 0 | if (c1 < c0) { |
54 | 0 | c0 = c1; |
55 | 0 | } |
56 | 0 | c2 = p2[i2] + cost_ins; |
57 | 0 | if (c2 < c0) { |
58 | 0 | c0 = c2; |
59 | 0 | } |
60 | 0 | p2[i2 + 1] = c0; |
61 | 0 | } |
62 | 0 | tmp = p1; |
63 | 0 | p1 = p2; |
64 | 0 | p2 = tmp; |
65 | 0 | } |
66 | 0 | c0 = p1[ZSTR_LEN(string2)]; |
67 | |
|
68 | 0 | efree(p1); |
69 | 0 | efree(p2); |
70 | |
|
71 | 0 | return c0; |
72 | 0 | } |
73 | | /* }}} */ |
74 | | |
75 | | /* {{{ Calculate Levenshtein distance between two strings */ |
76 | | PHP_FUNCTION(levenshtein) |
77 | 0 | { |
78 | 0 | zend_string *string1, *string2; |
79 | 0 | zend_long cost_ins = 1; |
80 | 0 | zend_long cost_rep = 1; |
81 | 0 | zend_long cost_del = 1; |
82 | |
|
83 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|lll", &string1, &string2, &cost_ins, &cost_rep, &cost_del) == FAILURE) { |
84 | 0 | RETURN_THROWS(); |
85 | 0 | } |
86 | | |
87 | | |
88 | 0 | RETURN_LONG(reference_levdist(string1, string2, cost_ins, cost_rep, cost_del)); |
89 | 0 | } |
90 | | /* }}} */ |