/src/php-src/ext/standard/soundex.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> | |
14 | | +----------------------------------------------------------------------+ |
15 | | */ |
16 | | |
17 | | #include "php.h" |
18 | | #include <stdlib.h> |
19 | | #include <ctype.h> |
20 | | |
21 | | /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */ |
22 | | /* {{{ Calculate the soundex key of a string */ |
23 | | PHP_FUNCTION(soundex) |
24 | 0 | { |
25 | 0 | char *str; |
26 | 0 | size_t i, _small, str_len, code, last; |
27 | 0 | char soundex[4 + 1]; |
28 | |
|
29 | 0 | static const char soundex_table[26] = |
30 | 0 | {0, /* A */ |
31 | 0 | '1', /* B */ |
32 | 0 | '2', /* C */ |
33 | 0 | '3', /* D */ |
34 | 0 | 0, /* E */ |
35 | 0 | '1', /* F */ |
36 | 0 | '2', /* G */ |
37 | 0 | 0, /* H */ |
38 | 0 | 0, /* I */ |
39 | 0 | '2', /* J */ |
40 | 0 | '2', /* K */ |
41 | 0 | '4', /* L */ |
42 | 0 | '5', /* M */ |
43 | 0 | '5', /* N */ |
44 | 0 | 0, /* O */ |
45 | 0 | '1', /* P */ |
46 | 0 | '2', /* Q */ |
47 | 0 | '6', /* R */ |
48 | 0 | '2', /* S */ |
49 | 0 | '3', /* T */ |
50 | 0 | 0, /* U */ |
51 | 0 | '1', /* V */ |
52 | 0 | 0, /* W */ |
53 | 0 | '2', /* X */ |
54 | 0 | 0, /* Y */ |
55 | 0 | '2'}; /* Z */ |
56 | |
|
57 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
58 | 0 | Z_PARAM_STRING(str, str_len) |
59 | 0 | ZEND_PARSE_PARAMETERS_END(); |
60 | | |
61 | | /* build soundex string */ |
62 | 0 | last = -1; |
63 | 0 | for (i = 0, _small = 0; i < str_len && _small < 4; i++) { |
64 | | /* convert chars to upper case and strip non-letter chars */ |
65 | | /* BUG: should also map here accented letters used in non */ |
66 | | /* English words or names (also found in English text!): */ |
67 | | /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */ |
68 | 0 | code = toupper((int)(unsigned char)str[i]); |
69 | 0 | if (code >= 'A' && code <= 'Z') { |
70 | 0 | if (_small == 0) { |
71 | | /* remember first valid char */ |
72 | 0 | soundex[_small++] = (char)code; |
73 | 0 | last = soundex_table[code - 'A']; |
74 | 0 | } |
75 | 0 | else { |
76 | | /* ignore sequences of consonants with same soundex */ |
77 | | /* code in trail, and vowels unless they separate */ |
78 | | /* consonant letters */ |
79 | 0 | code = soundex_table[code - 'A']; |
80 | 0 | if (code != last) { |
81 | 0 | if (code != 0) { |
82 | 0 | soundex[_small++] = (char)code; |
83 | 0 | } |
84 | 0 | last = code; |
85 | 0 | } |
86 | 0 | } |
87 | 0 | } |
88 | 0 | } |
89 | | /* pad with '0' and terminate with 0 ;-) */ |
90 | 0 | while (_small < 4) { |
91 | 0 | soundex[_small++] = '0'; |
92 | 0 | } |
93 | 0 | soundex[_small] = '\0'; |
94 | |
|
95 | 0 | RETURN_STRINGL(soundex, _small); |
96 | 0 | } |
97 | | /* }}} */ |