/src/aspell/modules/speller/default/phonetic.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2000 by Kevin Atkinson under the terms of the LGPL |
2 | | |
3 | | #include "language.hpp" |
4 | | #include "phonetic.hpp" |
5 | | #include "phonet.hpp" |
6 | | |
7 | | #include "file_util.hpp" |
8 | | #include "file_data_util.hpp" |
9 | | #include "clone_ptr-t.hpp" |
10 | | |
11 | | namespace aspeller { |
12 | | |
13 | | class SimpileSoundslike : public Soundslike { |
14 | | private: |
15 | | const Language * lang; |
16 | | char first[256]; |
17 | | char rest[256]; |
18 | | public: |
19 | 20 | SimpileSoundslike(const Language * l) : lang(l) {} |
20 | | |
21 | 20 | PosibErr<void> setup(Conv &) { |
22 | 20 | memcpy(first, lang->sl_first_, 256); |
23 | 20 | memcpy(rest, lang->sl_rest_, 256); |
24 | 20 | return no_err; |
25 | 20 | } |
26 | | |
27 | 20 | String soundslike_chars() const { |
28 | 20 | bool chars_set[256] = {0}; |
29 | 5.14k | for (int i = 0; i != 256; ++i) |
30 | 5.12k | { |
31 | 5.12k | char c = first[i]; |
32 | 5.12k | if (c) chars_set[static_cast<unsigned char>(c)] = true; |
33 | 5.12k | c = rest[i]; |
34 | 5.12k | if (c) chars_set[static_cast<unsigned char>(c)] = true; |
35 | 5.12k | } |
36 | 20 | String chars_list; |
37 | 5.14k | for (int i = 0; i != 256; ++i) |
38 | 5.12k | { |
39 | 5.12k | if (chars_set[i]) |
40 | 500 | chars_list += static_cast<char>(i); |
41 | 5.12k | } |
42 | 20 | return chars_list; |
43 | 20 | } |
44 | | |
45 | | char * to_soundslike(char * res, const char * str, int size) const |
46 | 623k | { |
47 | 623k | char prev, cur = '\0'; |
48 | | |
49 | 623k | const char * i = str; |
50 | 688k | while (*i) { |
51 | 687k | cur = first[static_cast<unsigned char>(*i++)]; |
52 | 687k | if (cur) {*res++ = cur; break;} |
53 | 687k | } |
54 | 623k | prev = cur; |
55 | | |
56 | 3.02M | while (*i) { |
57 | 2.39M | cur = rest[static_cast<unsigned char>(*i++)]; |
58 | 2.39M | if (cur && cur != prev) *res++ = cur; |
59 | 2.39M | prev = cur; |
60 | 2.39M | } |
61 | 623k | *res = '\0'; |
62 | 623k | return res; |
63 | 623k | } |
64 | | |
65 | 40 | const char * name () const { |
66 | 40 | return "simple"; |
67 | 40 | } |
68 | 20 | const char * version() const { |
69 | 20 | return "2.0"; |
70 | 20 | } |
71 | | }; |
72 | | |
73 | | class NoSoundslike : public Soundslike { |
74 | | private: |
75 | | const Language * lang; |
76 | | public: |
77 | 0 | NoSoundslike(const Language * l) : lang(l) {} |
78 | | |
79 | 0 | PosibErr<void> setup(Conv &) {return no_err;} |
80 | | |
81 | 0 | String soundslike_chars() const { |
82 | 0 | return get_clean_chars(*lang); |
83 | 0 | } |
84 | | |
85 | | char * to_soundslike(char * res, const char * str, int size) const |
86 | 0 | { |
87 | 0 | return lang->LangImpl::to_clean(res, str); |
88 | 0 | } |
89 | | |
90 | 0 | const char * name() const { |
91 | 0 | return "none"; |
92 | 0 | } |
93 | 0 | const char * version() const { |
94 | 0 | return "1.0"; |
95 | 0 | } |
96 | | }; |
97 | | |
98 | | class StrippedSoundslike : public Soundslike { |
99 | | private: |
100 | | const Language * lang; |
101 | | public: |
102 | 0 | StrippedSoundslike(const Language * l) : lang(l) {} |
103 | | |
104 | 0 | PosibErr<void> setup(Conv &) {return no_err;} |
105 | | |
106 | 0 | String soundslike_chars() const { |
107 | 0 | return get_stripped_chars(*lang); |
108 | 0 | } |
109 | | |
110 | | char * to_soundslike(char * res, const char * str, int size) const |
111 | 0 | { |
112 | 0 | return lang->LangImpl::to_stripped(res, str); |
113 | 0 | } |
114 | | |
115 | 0 | const char * name() const { |
116 | 0 | return "stripped"; |
117 | 0 | } |
118 | 0 | const char * version() const { |
119 | 0 | return "1.0"; |
120 | 0 | } |
121 | | }; |
122 | | |
123 | | class PhonetSoundslike : public Soundslike { |
124 | | |
125 | | const Language * lang; |
126 | | StackPtr<PhonetParms> phonet_parms; |
127 | | |
128 | | public: |
129 | | |
130 | 698 | PhonetSoundslike(const Language * l) : lang(l) {} |
131 | | |
132 | 698 | PosibErr<void> setup(Conv & iconv) { |
133 | 698 | String file; |
134 | 698 | file += lang->data_dir(); |
135 | 698 | file += '/'; |
136 | 698 | file += lang->name(); |
137 | 698 | file += "_phonet.dat"; |
138 | 698 | PosibErr<PhonetParms *> pe = new_phonet(file, iconv, lang); |
139 | 698 | if (pe.has_err()) return pe; |
140 | 698 | phonet_parms.reset(pe); |
141 | 698 | return no_err; |
142 | 698 | } |
143 | | |
144 | | |
145 | | String soundslike_chars() const |
146 | 698 | { |
147 | 698 | bool chars_set[256] = {0}; |
148 | 698 | String chars_list; |
149 | 698 | for (const char * * i = phonet_parms->rules + 1; |
150 | 73.9k | *(i-1) != PhonetParms::rules_end; |
151 | 73.2k | i += 2) |
152 | 73.2k | { |
153 | 146k | for (const char * j = *i; *j; ++j) |
154 | 73.2k | { |
155 | 73.2k | chars_set[static_cast<unsigned char>(*j)] = true; |
156 | 73.2k | } |
157 | 73.2k | } |
158 | 179k | for (int i = 0; i != 256; ++i) |
159 | 178k | { |
160 | 178k | if (chars_set[i]) |
161 | 12.5k | chars_list += static_cast<char>(i); |
162 | 178k | } |
163 | 698 | return chars_list; |
164 | 698 | } |
165 | | |
166 | | char * to_soundslike(char * res, const char * str, int size) const |
167 | 232k | { |
168 | 232k | int new_size = phonet(str, res, size, *phonet_parms); |
169 | 232k | return res + new_size; |
170 | 232k | } |
171 | | |
172 | | const char * name() const |
173 | 2.09k | { |
174 | 2.09k | return "phonet"; |
175 | 2.09k | } |
176 | | const char * version() const |
177 | 1.39k | { |
178 | 1.39k | return phonet_parms->version.c_str(); |
179 | 1.39k | } |
180 | | }; |
181 | | |
182 | | |
183 | | PosibErr<Soundslike *> new_soundslike(ParmString name, |
184 | | Conv & iconv, |
185 | | const Language * lang) |
186 | 718 | { |
187 | 718 | Soundslike * sl; |
188 | 718 | if (name == "simple" || name == "generic") { |
189 | 20 | sl = new SimpileSoundslike(lang); |
190 | 698 | } else if (name == "stripped") { |
191 | 0 | sl = new StrippedSoundslike(lang); |
192 | 698 | } else if (name == "none") { |
193 | 0 | sl = new NoSoundslike(lang); |
194 | 698 | } else if (name == lang->name()) { |
195 | 698 | sl = new PhonetSoundslike(lang); |
196 | 698 | } else { |
197 | 0 | abort(); // FIXME |
198 | 0 | } |
199 | 718 | PosibErrBase pe = sl->setup(iconv); |
200 | 718 | if (pe.has_err()) { |
201 | 0 | delete sl; |
202 | 0 | return pe; |
203 | 718 | } else { |
204 | 718 | return sl; |
205 | 718 | } |
206 | 718 | } |
207 | | } |
208 | | |