/src/php-src/ext/standard/soundex.c
Line | Count | Source |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright © The PHP Group and Contributors. | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to the Modified BSD License that is | |
6 | | | bundled with this package in the file LICENSE, and is available | |
7 | | | through the World Wide Web at <https://www.php.net/license/>. | |
8 | | | | |
9 | | | SPDX-License-Identifier: BSD-3-Clause | |
10 | | +----------------------------------------------------------------------+ |
11 | | | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> | |
12 | | +----------------------------------------------------------------------+ |
13 | | */ |
14 | | |
15 | | #include "php.h" |
16 | | #include <stdlib.h> |
17 | | #include <ctype.h> |
18 | | |
19 | | /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */ |
20 | | /* {{{ Calculate the soundex key of a string */ |
21 | | PHP_FUNCTION(soundex) |
22 | 0 | { |
23 | 0 | char *str; |
24 | 0 | size_t i, _small, str_len, code, last; |
25 | 0 | char soundex[4 + 1]; |
26 | |
|
27 | 0 | static const char soundex_table[26] = |
28 | 0 | {0, /* A */ |
29 | 0 | '1', /* B */ |
30 | 0 | '2', /* C */ |
31 | 0 | '3', /* D */ |
32 | 0 | 0, /* E */ |
33 | 0 | '1', /* F */ |
34 | 0 | '2', /* G */ |
35 | 0 | 0, /* H */ |
36 | 0 | 0, /* I */ |
37 | 0 | '2', /* J */ |
38 | 0 | '2', /* K */ |
39 | 0 | '4', /* L */ |
40 | 0 | '5', /* M */ |
41 | 0 | '5', /* N */ |
42 | 0 | 0, /* O */ |
43 | 0 | '1', /* P */ |
44 | 0 | '2', /* Q */ |
45 | 0 | '6', /* R */ |
46 | 0 | '2', /* S */ |
47 | 0 | '3', /* T */ |
48 | 0 | 0, /* U */ |
49 | 0 | '1', /* V */ |
50 | 0 | 0, /* W */ |
51 | 0 | '2', /* X */ |
52 | 0 | 0, /* Y */ |
53 | 0 | '2'}; /* Z */ |
54 | |
|
55 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
56 | 0 | Z_PARAM_STRING(str, str_len) |
57 | 0 | ZEND_PARSE_PARAMETERS_END(); |
58 | | |
59 | | /* build soundex string */ |
60 | 0 | last = -1; |
61 | 0 | for (i = 0, _small = 0; i < str_len && _small < 4; i++) { |
62 | | /* convert chars to upper case and strip non-letter chars */ |
63 | | /* BUG: should also map here accented letters used in non */ |
64 | | /* English words or names (also found in English text!): */ |
65 | | /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */ |
66 | 0 | code = toupper((unsigned char)str[i]); |
67 | 0 | if (code >= 'A' && code <= 'Z') { |
68 | 0 | if (_small == 0) { |
69 | | /* remember first valid char */ |
70 | 0 | soundex[_small++] = (char)code; |
71 | 0 | last = soundex_table[code - 'A']; |
72 | 0 | } |
73 | 0 | else { |
74 | | /* ignore sequences of consonants with same soundex */ |
75 | | /* code in trail, and vowels unless they separate */ |
76 | | /* consonant letters */ |
77 | 0 | code = soundex_table[code - 'A']; |
78 | 0 | if (code != last) { |
79 | 0 | if (code != 0) { |
80 | 0 | soundex[_small++] = (char)code; |
81 | 0 | } |
82 | 0 | last = code; |
83 | 0 | } |
84 | 0 | } |
85 | 0 | } |
86 | 0 | } |
87 | | /* pad with '0' and terminate with 0 ;-) */ |
88 | 0 | while (_small < 4) { |
89 | 0 | soundex[_small++] = '0'; |
90 | 0 | } |
91 | 0 | soundex[_small] = '\0'; |
92 | |
|
93 | 0 | RETURN_STRINGL(soundex, _small); |
94 | 0 | } |
95 | | /* }}} */ |