/src/aspell/lib/find_speller.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of The New Aspell |
2 | | // Copyright (C) 2000-2001 by Kevin Atkinson under the GNU LGPL |
3 | | // license version 2.0 or 2.1. You should have received a copy of the |
4 | | // LGPL license along with this library if you did not you can find it |
5 | | // at http://www.gnu.org/. |
6 | | |
7 | | #include <assert.h> |
8 | | #include <string.h> |
9 | | |
10 | | // POSIX includes |
11 | | #include <sys/types.h> |
12 | | #include <dirent.h> |
13 | | |
14 | | #include "asc_ctype.hpp" |
15 | | #include "can_have_error.hpp" |
16 | | #include "config.hpp" |
17 | | #include "convert.hpp" |
18 | | #include "enumeration.hpp" |
19 | | #include "errors.hpp" |
20 | | #include "filter.hpp" |
21 | | #include "fstream.hpp" |
22 | | #include "getdata.hpp" |
23 | | #include "info.hpp" |
24 | | #include "speller.hpp" |
25 | | #include "stack_ptr.hpp" |
26 | | #include "string_enumeration.hpp" |
27 | | #include "string_list.hpp" |
28 | | #include "string_map.hpp" |
29 | | |
30 | | #include "gettext.h" |
31 | | |
32 | | #if 0 |
33 | | #include "preload.h" |
34 | | #define LT_NON_POSIX_NAMESPACE 1 |
35 | | #ifdef USE_LTDL |
36 | | #include <ltdl.h> |
37 | | #endif |
38 | | #endif |
39 | | |
40 | | using namespace acommon; |
41 | | |
42 | | namespace acommon { |
43 | | |
44 | | static void free_lt_handle(SpellerLtHandle h) |
45 | 0 | { |
46 | | #ifdef USE_LTDL |
47 | | int s; |
48 | | s = lt_dlclose((lt_dlhandle)h); |
49 | | assert (s == 0); |
50 | | s = lt_dlexit(); |
51 | | assert (s == 0); |
52 | | #endif |
53 | 0 | } |
54 | | |
55 | | extern "C" |
56 | | Speller * libaspell_speller_default_LTX_new_speller_class(SpellerLtHandle); |
57 | | |
58 | | PosibErr<Speller *> get_speller_class(Config * config) |
59 | 470 | { |
60 | 470 | String name = config->retrieve("module"); |
61 | 470 | assert(name == "default"); |
62 | 470 | return libaspell_speller_default_LTX_new_speller_class(0); |
63 | | #if 0 |
64 | | unsigned int i; |
65 | | for (i = 0; i != aspell_speller_funs_size; ++i) { |
66 | | if (strcmp(name.c_str(), aspell_speller_funs[i].name) == 0) { |
67 | | return (*aspell_speller_funs[i].fun)(config, 0); |
68 | | } |
69 | | } |
70 | | |
71 | | #ifdef USE_LTDL |
72 | | int s = lt_dlinit(); |
73 | | assert(s == 0); |
74 | | String libname; |
75 | | libname = LIBDIR "/libaspell_"; |
76 | | libname += name; |
77 | | libname += ".la"; |
78 | | lt_dlhandle h = lt_dlopen (libname.c_str()); |
79 | | if (h == 0) |
80 | | return (new CanHaveErrorImpl()) |
81 | | ->set_error(cant_load_module, name.c_str()); |
82 | | lt_ptr_t fun = lt_dlsym (h, "new_aspell_speller_class"); |
83 | | assert (fun != 0); |
84 | | CanHaveError * m = (*(NewSpellerClass)(fun))(config, h); |
85 | | assert (m != 0); |
86 | | if (m->error_number() != 0) |
87 | | free_lt_handle(h); |
88 | | return m; |
89 | | #else |
90 | | return (new CanHaveErrorImpl()) |
91 | | ->set_error(cant_load_module, name.c_str()); |
92 | | #endif |
93 | | #endif |
94 | 470 | } |
95 | | |
96 | | // Note this writes all over str |
97 | | static void split_string_list(StringList & list, ParmString str) |
98 | 972 | { |
99 | 972 | const char * s0 = str; |
100 | 972 | const char * s1; |
101 | 1.92k | while (true) { |
102 | 1.93k | while (*s0 != '\0' && asc_isspace(*s0)) ++s0; |
103 | 1.92k | if (*s0 == '\0') break; |
104 | 954 | s1 = s0; |
105 | 4.28k | while (!asc_isspace(*s1)) ++s1; |
106 | 954 | String temp(s0,s1-s0); |
107 | 954 | list.add(temp); |
108 | 954 | if (*s1 != '\0') |
109 | 954 | s0 = s1 + 1; |
110 | 954 | } |
111 | 972 | } |
112 | | |
113 | | enum IsBetter {BetterMatch, WorseMatch, SameMatch}; |
114 | | |
115 | | struct Better |
116 | | { |
117 | | unsigned int cur_rank; |
118 | | unsigned int best_rank; |
119 | | unsigned int worst_rank; |
120 | | virtual void init() = 0; |
121 | | virtual void set_best_from_cur() = 0; |
122 | | virtual void set_cur_rank() = 0; |
123 | | IsBetter better_match(IsBetter prev); |
124 | | virtual ~Better(); |
125 | | }; |
126 | | |
127 | 1.94k | Better::~Better() {} |
128 | | |
129 | | IsBetter Better::better_match (IsBetter prev) |
130 | 90.1k | { |
131 | 90.1k | if (prev == WorseMatch) |
132 | 50.9k | return prev; |
133 | 39.2k | set_cur_rank(); |
134 | 39.2k | if (cur_rank >= worst_rank) |
135 | 18.3k | return WorseMatch; |
136 | 20.8k | else if (cur_rank < best_rank) |
137 | 2.47k | return BetterMatch; |
138 | 18.3k | else if (cur_rank == best_rank) |
139 | 18.3k | return prev; |
140 | 0 | else // cur_rank > best_rank |
141 | 0 | if (prev == SameMatch) |
142 | 0 | return WorseMatch; |
143 | 0 | else |
144 | 0 | return BetterMatch; |
145 | 39.2k | } |
146 | | |
147 | | struct BetterList : public Better |
148 | | { |
149 | | const char * cur; |
150 | | StringList list; |
151 | | const char * best; |
152 | | BetterList(); |
153 | | void init(); |
154 | | void set_best_from_cur(); |
155 | | void set_cur_rank(); |
156 | | }; |
157 | | |
158 | | BetterList::BetterList() |
159 | 972 | { |
160 | 972 | } |
161 | | |
162 | 969 | void BetterList::init() { |
163 | 969 | StringListEnumeration es = list.elements_obj(); |
164 | 969 | worst_rank = 0; |
165 | 2.40k | while ( (es.next()) != 0) |
166 | 1.43k | ++worst_rank; |
167 | 969 | best_rank = worst_rank; |
168 | 969 | } |
169 | | |
170 | | void BetterList::set_best_from_cur() |
171 | 1.80k | { |
172 | 1.80k | best_rank = cur_rank; |
173 | 1.80k | best = cur; |
174 | 1.80k | } |
175 | | |
176 | | void BetterList::set_cur_rank() |
177 | 26.7k | { |
178 | 26.7k | StringListEnumeration es = list.elements_obj(); |
179 | 26.7k | const char * m; |
180 | 26.7k | cur_rank = 0; |
181 | 59.5k | while ( (m = es.next()) != 0 && strcmp(m, cur) != 0) |
182 | 32.8k | ++cur_rank; |
183 | 26.7k | } |
184 | | |
185 | | struct BetterSize : public Better |
186 | | { |
187 | | unsigned int cur; |
188 | | const char * cur_str; |
189 | | char req_type; |
190 | | unsigned int requested; |
191 | | unsigned int size; |
192 | | unsigned int best; |
193 | | const char * best_str; |
194 | | void init(); |
195 | | void set_best_from_cur(); |
196 | | void set_cur_rank(); |
197 | | }; |
198 | | |
199 | | |
200 | 483 | void BetterSize::init() { |
201 | 483 | worst_rank = 0xFFF; |
202 | 483 | best_rank = worst_rank; |
203 | 483 | } |
204 | | |
205 | | void BetterSize::set_best_from_cur() |
206 | 904 | { |
207 | 904 | best_rank = cur_rank; |
208 | 904 | best = cur; |
209 | 904 | best_str = cur_str; |
210 | 904 | } |
211 | | |
212 | | void BetterSize::set_cur_rank() |
213 | 4.17k | { |
214 | 4.17k | int diff = cur - requested; |
215 | 4.17k | int sign; |
216 | 4.17k | if (diff < 0) { |
217 | 216 | cur_rank = -diff; |
218 | 216 | sign = -1; |
219 | 3.95k | } else { |
220 | 3.95k | cur_rank = diff; |
221 | 3.95k | sign = 1; |
222 | 3.95k | } |
223 | 4.17k | cur_rank <<= 1; |
224 | 4.17k | if ((sign == -1 && req_type == '+') || (sign == 1 && req_type == '-')) |
225 | 51 | cur_rank |= 0x1; |
226 | 4.12k | else if ((sign == -1 && req_type == '>') || (sign == 1 && req_type == '<')) |
227 | 250 | cur_rank |= 0x100; |
228 | 4.17k | } |
229 | | |
230 | | struct BetterVariety : public Better |
231 | | { |
232 | | const char * cur; |
233 | | StringList list; |
234 | | const char * best; |
235 | 486 | BetterVariety() {} |
236 | | void init(); |
237 | | void set_best_from_cur(); |
238 | | void set_cur_rank(); |
239 | | }; |
240 | | |
241 | 486 | void BetterVariety::init() { |
242 | 486 | worst_rank = 3; |
243 | 486 | best_rank = 3; |
244 | 486 | } |
245 | | |
246 | | void BetterVariety::set_best_from_cur() |
247 | 904 | { |
248 | 904 | best_rank = cur_rank; |
249 | 904 | best = cur; |
250 | 904 | } |
251 | | |
252 | | void BetterVariety::set_cur_rank() |
253 | 8.34k | { |
254 | 8.34k | if (strlen(cur) == 0) { |
255 | 4.17k | cur_rank = 2; |
256 | 4.17k | } else { |
257 | 4.17k | StringListEnumeration es = list.elements_obj(); |
258 | 4.17k | const char * m; |
259 | 4.17k | cur_rank = 3; |
260 | 4.17k | unsigned list_size = 0, num = 0; |
261 | 4.17k | while ( (m = es.next()) != 0 ) { |
262 | 384 | ++list_size; |
263 | 384 | unsigned s = strlen(m); |
264 | 384 | const char * c = cur; |
265 | 384 | unsigned p; |
266 | 384 | bool match = false; |
267 | 384 | num = 0; |
268 | 1.19k | for (; *c != '\0'; c += p) { |
269 | 806 | ++num; |
270 | 806 | p = strcspn(c, "-"); |
271 | 806 | if (p == s && memcmp(m, c, s) == 0) {match = true; break;} |
272 | 806 | if (c[p] == '-') p++; |
273 | 806 | } |
274 | 384 | if (!match) goto fail; |
275 | 0 | cur_rank = 0; |
276 | 0 | } |
277 | 3.78k | if (cur_rank == 0 && num != list_size) cur_rank = 1; |
278 | 3.78k | } |
279 | 7.95k | return; |
280 | 7.95k | fail: |
281 | 384 | cur_rank = 3; |
282 | 384 | } |
283 | | |
284 | | PosibErr<Config *> find_word_list(Config * c) |
285 | 523 | { |
286 | 523 | StackPtr<Config> config(new_config()); |
287 | 523 | RET_ON_ERR(config->read_in_settings(c)); |
288 | 489 | String dict_name; |
289 | | |
290 | 489 | if (config->have("master")) { |
291 | 3 | dict_name = config->retrieve("master"); |
292 | | |
293 | 486 | } else { |
294 | | |
295 | | //////////////////////////////////////////////////////////////////// |
296 | | // |
297 | | // Give first preference to an exact match for the language-country |
298 | | // code, then give preference to those in the alternate code list |
299 | | // in the order they are presented, then if there is no match |
300 | | // look for one for just language. If that fails give up. |
301 | | // Once the best matching code is found, try to find a matching |
302 | | // variety if one exists, other wise look for one with no variety. |
303 | | // |
304 | | |
305 | 486 | BetterList b_code; |
306 | | //BetterList b_jargon; |
307 | 486 | BetterVariety b_variety; |
308 | 486 | BetterList b_module; |
309 | 486 | BetterSize b_size; |
310 | 486 | Better * better[4] = {&b_code,&b_variety,&b_module,&b_size}; |
311 | 486 | const DictInfo * best = 0; |
312 | | |
313 | | // |
314 | | // retrieve and normalize code |
315 | | // |
316 | 486 | const char * p; |
317 | 486 | String code; |
318 | 486 | PosibErr<String> str = config->retrieve("lang"); |
319 | 486 | p = str.data.c_str(); |
320 | 1.45k | while (asc_isalpha(*p)) |
321 | 965 | code += asc_tolower(*p++); |
322 | 486 | String lang = code; |
323 | 486 | bool have_country = false; |
324 | 486 | if (*p == '-' || *p == '_') { |
325 | 472 | ++p; |
326 | 472 | have_country = true; |
327 | 472 | code += '_'; |
328 | 1.41k | while (asc_isalpha(*p)) |
329 | 945 | code += asc_toupper(*p++); |
330 | 472 | } |
331 | | |
332 | | // |
333 | | // Retrieve acceptable code search orders |
334 | | // |
335 | 486 | String lang_country_list; |
336 | 486 | if (have_country) { |
337 | 472 | lang_country_list = code; |
338 | 472 | lang_country_list += ' '; |
339 | 472 | } |
340 | 486 | String lang_only_list = lang; |
341 | 486 | lang_only_list += ' '; |
342 | | |
343 | | // read retrieve lang_country_list and lang_only_list from file(s) |
344 | | // FIXME: Write Me |
345 | | |
346 | | // |
347 | 486 | split_string_list(b_code.list, lang_country_list); |
348 | 486 | split_string_list(b_code.list, lang_only_list); |
349 | 486 | b_code.init(); |
350 | | |
351 | | // |
352 | | // Retrieve Variety |
353 | | // |
354 | 486 | config->retrieve_list("variety", &b_variety.list); |
355 | 486 | if (b_variety.list.empty() && config->have("jargon")) |
356 | 44 | b_variety.list.add(config->retrieve("jargon")); |
357 | 486 | b_variety.init(); |
358 | 486 | str.data.clear(); |
359 | | |
360 | | // |
361 | | // Retrieve module list |
362 | | // |
363 | 486 | if (config->have("module")) |
364 | 2 | b_module.list.add(config->retrieve("module")); |
365 | 484 | else if (config->have("module-search-order")) |
366 | 0 | config->retrieve_list("module-search-order", &b_module.list); |
367 | 486 | { |
368 | 486 | RET_ON_ERR_SET(get_module_info_list(config), const ModuleInfoList *, modules); |
369 | 483 | StackPtr<ModuleInfoEnumeration> els(modules->elements()); |
370 | 483 | const ModuleInfo * entry; |
371 | 966 | while ( (entry = els->next()) != 0) |
372 | 483 | b_module.list.add(entry->name); |
373 | 483 | } |
374 | 0 | b_module.init(); |
375 | | |
376 | | // |
377 | | // Retrieve size |
378 | | // |
379 | 483 | str = config->retrieve("size"); |
380 | 483 | p = str.data.c_str(); |
381 | 483 | if (p[0] == '+' || p[0] == '-' || p[0] == '<' || p[0] == '>') { |
382 | 483 | b_size.req_type = p[0]; |
383 | 483 | ++p; |
384 | 483 | } else { |
385 | 0 | b_size.req_type = '+'; |
386 | 0 | } |
387 | 483 | if (!asc_isdigit(p[0]) || !asc_isdigit(p[1]) || p[2] != '\0') |
388 | 0 | return make_err(aerror_bad_value, "size", str, "valid"); |
389 | 483 | b_size.requested = atoi(p); |
390 | 483 | b_size.init(); |
391 | | |
392 | | // |
393 | | // |
394 | | // |
395 | | |
396 | 483 | const DictInfoList * dlist = get_dict_info_list(config); |
397 | 483 | DictInfoEnumeration * dels = dlist->elements(); |
398 | 483 | const DictInfo * entry; |
399 | | |
400 | 23.0k | while ( (entry = dels->next()) != 0) { |
401 | | |
402 | 22.5k | b_code .cur = entry->code; |
403 | 22.5k | b_module.cur = entry->module->name; |
404 | | |
405 | 22.5k | b_variety.cur = entry->variety; |
406 | | |
407 | 22.5k | b_size.cur_str = entry->size_str; |
408 | 22.5k | b_size.cur = entry->size; |
409 | | |
410 | | // |
411 | | // check to see if we got a better match than the current |
412 | | // best_match if any |
413 | | // |
414 | | |
415 | 22.5k | IsBetter is_better = SameMatch; |
416 | 112k | for (int i = 0; i != 4; ++i) |
417 | 90.1k | is_better = better[i]->better_match(is_better); |
418 | | |
419 | 22.5k | if (is_better == BetterMatch) { |
420 | 4.52k | for (int i = 0; i != 4; ++i) |
421 | 3.61k | better[i]->set_best_from_cur(); |
422 | 904 | best = entry; |
423 | 904 | } |
424 | 22.5k | } |
425 | | |
426 | 483 | delete dels; |
427 | | |
428 | | // |
429 | | // set config to best match |
430 | | // |
431 | 483 | if (best != 0) { |
432 | 467 | String main_wl,flags; |
433 | 467 | RET_ON_ERR(get_dict_file_name(best, main_wl, flags)); |
434 | 467 | dict_name = best->name; |
435 | 467 | config->replace("lang", b_code.best); |
436 | 467 | config->replace("language-tag", b_code.best); |
437 | 467 | config->replace("master", main_wl.c_str()); |
438 | 467 | config->replace("master-flags", flags.c_str()); |
439 | 467 | config->replace("module", b_module.best); |
440 | 467 | config->replace("jargon", b_variety.best); |
441 | 467 | config->replace("clear-variety", ""); |
442 | 467 | unsigned p; |
443 | 934 | for (const char * c = b_module.best; *c != '\0'; c += p) { |
444 | 467 | p = strcspn(c, "-"); |
445 | 467 | config->replace("add-variety", String(c, p)); |
446 | 467 | } |
447 | 467 | config->replace("size", b_size.best_str); |
448 | 467 | } else { |
449 | 16 | return make_err(no_wordlist_for_lang, code); |
450 | 16 | } |
451 | 483 | } |
452 | | |
453 | 470 | RET_ON_ERR_SET(get_dict_aliases(config), const StringMap *, dict_aliases); |
454 | 470 | const char * val = dict_aliases->lookup(dict_name); |
455 | 470 | if (val) config->replace("master", val); |
456 | 470 | return config.release(); |
457 | 470 | } |
458 | | |
459 | | PosibErr<void> reload_filters(Speller * m) |
460 | 457 | { |
461 | 457 | m->to_internal_->filter.clear(); |
462 | 457 | m->from_internal_->filter.clear(); |
463 | | // Add enocder and decoder filters if any |
464 | 457 | RET_ON_ERR(setup_filter(m->to_internal_->filter, m->config(), |
465 | 457 | true, false, false)); |
466 | 454 | RET_ON_ERR(setup_filter(m->from_internal_->filter, m->config(), |
467 | 454 | false, false, true)); |
468 | 454 | return no_err; |
469 | 454 | } |
470 | | |
471 | | PosibErr<Speller *> new_speller(Config * c0) |
472 | 523 | { |
473 | 523 | aspell_gettext_init(); |
474 | | |
475 | 523 | RET_ON_ERR_SET(find_word_list(c0), Config *, c); |
476 | 470 | StackPtr<Speller> m(get_speller_class(c)); |
477 | 470 | RET_ON_ERR(m->setup(c)); |
478 | | |
479 | 457 | RET_ON_ERR(reload_filters(m)); |
480 | | |
481 | 454 | return m.release(); |
482 | 457 | } |
483 | | |
484 | | void delete_speller(Speller * m) |
485 | 0 | { |
486 | 0 | SpellerLtHandle h = ((Speller *)(m))->lt_handle(); |
487 | 0 | delete m; |
488 | 0 | if (h != 0) free_lt_handle(h); |
489 | 0 | } |
490 | | } |