Coverage Report

Created: 2025-08-26 06:57

/src/aspell/lib/find_speller.cpp
Line
Count
Source (jump to first uncovered line)
1
// This file is part of The New Aspell
2
// Copyright (C) 2000-2001 by Kevin Atkinson under the GNU LGPL
3
// license version 2.0 or 2.1.  You should have received a copy of the
4
// LGPL license along with this library if you did not you can find it
5
// at http://www.gnu.org/.
6
7
#include <assert.h>
8
#include <string.h>
9
10
// POSIX includes
11
#include <sys/types.h>
12
#include <dirent.h>
13
14
#include "asc_ctype.hpp"
15
#include "can_have_error.hpp"
16
#include "config.hpp"
17
#include "convert.hpp"
18
#include "enumeration.hpp"
19
#include "errors.hpp"
20
#include "filter.hpp"
21
#include "fstream.hpp"
22
#include "getdata.hpp"
23
#include "info.hpp"
24
#include "speller.hpp"
25
#include "stack_ptr.hpp"
26
#include "string_enumeration.hpp"
27
#include "string_list.hpp"
28
#include "string_map.hpp"
29
30
#include "gettext.h"
31
32
#if 0
33
#include "preload.h"
34
#define LT_NON_POSIX_NAMESPACE 1
35
#ifdef USE_LTDL
36
#include <ltdl.h>
37
#endif
38
#endif
39
40
using namespace acommon;
41
42
namespace acommon {
43
44
  static void free_lt_handle(SpellerLtHandle h) 
45
0
  {
46
#ifdef USE_LTDL
47
    int s;
48
    s = lt_dlclose((lt_dlhandle)h);
49
    assert (s == 0);
50
    s = lt_dlexit();
51
    assert (s == 0);
52
#endif
53
0
  }
54
55
  extern "C" 
56
  Speller * libaspell_speller_default_LTX_new_speller_class(SpellerLtHandle);
57
  
58
  PosibErr<Speller *> get_speller_class(Config * config)
59
470
  {
60
470
    String name = config->retrieve("module");
61
470
    assert(name == "default");
62
470
    return libaspell_speller_default_LTX_new_speller_class(0);
63
#if 0
64
    unsigned int i; 
65
    for (i = 0; i != aspell_speller_funs_size; ++i) {
66
      if (strcmp(name.c_str(), aspell_speller_funs[i].name) == 0) {
67
  return (*aspell_speller_funs[i].fun)(config, 0);
68
      }
69
    }
70
  
71
#ifdef USE_LTDL
72
    int s = lt_dlinit();
73
    assert(s == 0);
74
    String libname;
75
    libname  = LIBDIR "/libaspell_";
76
    libname += name;
77
    libname += ".la";
78
    lt_dlhandle h = lt_dlopen (libname.c_str());
79
    if (h == 0)
80
      return (new CanHaveErrorImpl())
81
  ->set_error(cant_load_module, name.c_str());
82
    lt_ptr_t fun = lt_dlsym (h, "new_aspell_speller_class");
83
    assert (fun != 0);
84
    CanHaveError * m = (*(NewSpellerClass)(fun))(config, h);
85
    assert (m != 0);
86
    if (m->error_number() != 0)
87
      free_lt_handle(h);
88
    return m;
89
#else
90
    return (new CanHaveErrorImpl())
91
      ->set_error(cant_load_module, name.c_str());
92
#endif
93
#endif
94
470
  }
95
96
  // Note this writes all over str
97
  static void split_string_list(StringList & list, ParmString str)
98
972
  {
99
972
    const char * s0 = str;
100
972
    const char * s1;
101
1.92k
    while (true) {
102
1.93k
      while (*s0 != '\0' && asc_isspace(*s0)) ++s0;
103
1.92k
      if (*s0 == '\0') break;
104
954
      s1 = s0;
105
4.28k
      while (!asc_isspace(*s1)) ++s1;
106
954
      String temp(s0,s1-s0);
107
954
      list.add(temp);
108
954
      if (*s1 != '\0')
109
954
  s0 = s1 + 1;
110
954
    }
111
972
  }
112
113
  enum IsBetter {BetterMatch, WorseMatch, SameMatch};
114
115
  struct Better
116
  {
117
    unsigned int cur_rank;
118
    unsigned int best_rank;
119
    unsigned int worst_rank;
120
    virtual void init() = 0;
121
    virtual void set_best_from_cur() = 0;
122
    virtual void set_cur_rank() = 0;
123
    IsBetter better_match(IsBetter prev);  
124
    virtual ~Better();
125
  };
126
127
1.94k
  Better::~Better() {}
128
129
  IsBetter Better::better_match (IsBetter prev)
130
90.1k
  {
131
90.1k
    if (prev == WorseMatch)
132
50.9k
      return prev;
133
39.2k
    set_cur_rank();
134
39.2k
    if (cur_rank >= worst_rank)
135
18.3k
      return WorseMatch;
136
20.8k
    else if (cur_rank < best_rank)
137
2.47k
      return BetterMatch;
138
18.3k
    else if (cur_rank == best_rank)
139
18.3k
      return prev;
140
0
    else // cur_rank > best_rank
141
0
      if (prev == SameMatch)
142
0
  return WorseMatch;
143
0
      else
144
0
  return BetterMatch;
145
39.2k
  }
146
147
  struct BetterList : public Better
148
  {
149
    const char *         cur;
150
    StringList           list;
151
    const char *         best;
152
    BetterList();
153
    void init();
154
    void set_best_from_cur();
155
    void set_cur_rank();
156
  };
157
158
  BetterList::BetterList() 
159
972
  {
160
972
  }
161
162
969
  void BetterList::init() {
163
969
    StringListEnumeration es = list.elements_obj();
164
969
    worst_rank = 0;
165
2.40k
    while ( (es.next()) != 0)
166
1.43k
      ++worst_rank;
167
969
    best_rank = worst_rank;
168
969
  }
169
170
  void BetterList::set_best_from_cur() 
171
1.80k
  {
172
1.80k
    best_rank = cur_rank;
173
1.80k
    best = cur;
174
1.80k
  }
175
176
  void BetterList::set_cur_rank() 
177
26.7k
  {
178
26.7k
    StringListEnumeration es = list.elements_obj();
179
26.7k
    const char * m;
180
26.7k
    cur_rank = 0;
181
59.5k
    while ( (m = es.next()) != 0 && strcmp(m, cur) != 0)
182
32.8k
      ++cur_rank;
183
26.7k
  }
184
185
  struct BetterSize : public Better
186
  {
187
    unsigned int         cur;
188
    const char *         cur_str;
189
    char                 req_type;
190
    unsigned int         requested;
191
    unsigned int         size;
192
    unsigned int         best;
193
    const char *         best_str;
194
    void init();
195
    void set_best_from_cur();
196
    void set_cur_rank();
197
  };
198
199
200
483
  void BetterSize::init() {
201
483
    worst_rank = 0xFFF;
202
483
    best_rank = worst_rank;
203
483
  }
204
205
  void BetterSize::set_best_from_cur() 
206
904
  {
207
904
    best_rank = cur_rank;
208
904
    best = cur;
209
904
    best_str = cur_str;
210
904
  }
211
212
  void BetterSize::set_cur_rank() 
213
4.17k
  {
214
4.17k
    int diff = cur - requested;
215
4.17k
    int sign;
216
4.17k
    if (diff < 0) {
217
216
      cur_rank = -diff;
218
216
      sign = -1;
219
3.95k
    } else {
220
3.95k
      cur_rank = diff;
221
3.95k
      sign = 1;
222
3.95k
    }
223
4.17k
    cur_rank <<= 1;
224
4.17k
    if ((sign == -1 && req_type == '+') || (sign == 1 && req_type == '-'))
225
51
      cur_rank |= 0x1;
226
4.12k
    else if ((sign == -1 && req_type == '>') || (sign == 1 && req_type == '<'))
227
250
      cur_rank |= 0x100;
228
4.17k
  }
229
230
  struct BetterVariety : public Better
231
  {
232
    const char *         cur;
233
    StringList           list;
234
    const char *         best;
235
486
    BetterVariety() {}
236
    void init();
237
    void set_best_from_cur();
238
    void set_cur_rank();
239
  };
240
241
486
  void BetterVariety::init() {
242
486
    worst_rank = 3;
243
486
    best_rank = 3;
244
486
  }
245
246
  void BetterVariety::set_best_from_cur() 
247
904
  {
248
904
    best_rank = cur_rank;
249
904
    best = cur;
250
904
  }
251
252
  void BetterVariety::set_cur_rank() 
253
8.34k
  {
254
8.34k
    if (strlen(cur) == 0) {
255
4.17k
      cur_rank = 2; 
256
4.17k
    } else {
257
4.17k
      StringListEnumeration es = list.elements_obj();
258
4.17k
      const char * m;
259
4.17k
      cur_rank = 3;
260
4.17k
      unsigned list_size = 0, num = 0;
261
4.17k
      while ( (m = es.next()) != 0 ) {
262
384
        ++list_size;
263
384
        unsigned s = strlen(m);
264
384
        const char * c = cur;
265
384
        unsigned p;
266
384
        bool match = false;
267
384
        num = 0;
268
1.19k
        for (; *c != '\0'; c += p) {
269
806
          ++num;
270
806
          p = strcspn(c, "-");
271
806
          if (p == s && memcmp(m, c, s) == 0) {match = true; break;}
272
806
          if (c[p] == '-') p++;
273
806
        }
274
384
        if (!match) goto fail;
275
0
        cur_rank = 0;
276
0
      }
277
3.78k
      if (cur_rank == 0 && num != list_size) cur_rank = 1;
278
3.78k
    }
279
7.95k
    return;
280
7.95k
  fail:
281
384
    cur_rank = 3;
282
384
  }
283
284
  PosibErr<Config *> find_word_list(Config * c) 
285
523
  {
286
523
    StackPtr<Config> config(new_config());
287
523
    RET_ON_ERR(config->read_in_settings(c));
288
489
    String dict_name;
289
290
489
    if (config->have("master")) {
291
3
      dict_name = config->retrieve("master");
292
293
486
    } else {
294
295
      ////////////////////////////////////////////////////////////////////
296
      //
297
      // Give first preference to an exact match for the language-country
298
      // code, then give preference to those in the alternate code list
299
      // in the order they are presented, then if there is no match
300
      // look for one for just language.  If that fails give up.
301
      // Once the best matching code is found, try to find a matching
302
      // variety if one exists, other wise look for one with no variety.
303
      //
304
305
486
      BetterList b_code;
306
      //BetterList b_jargon;
307
486
      BetterVariety b_variety;
308
486
      BetterList b_module;
309
486
      BetterSize b_size;
310
486
      Better * better[4] = {&b_code,&b_variety,&b_module,&b_size};
311
486
      const DictInfo * best = 0;
312
313
      //
314
      // retrieve and normalize code
315
      //
316
486
      const char * p;
317
486
      String code;
318
486
      PosibErr<String> str = config->retrieve("lang");
319
486
      p = str.data.c_str();
320
1.45k
      while (asc_isalpha(*p))
321
965
        code += asc_tolower(*p++);
322
486
      String lang = code;
323
486
      bool have_country = false;
324
486
      if (*p == '-' || *p == '_') {
325
472
        ++p;
326
472
        have_country = true;
327
472
        code += '_'; 
328
1.41k
        while (asc_isalpha(*p))
329
945
          code += asc_toupper(*p++);
330
472
      }
331
  
332
      //
333
      // Retrieve acceptable code search orders
334
      //
335
486
      String lang_country_list;
336
486
      if (have_country) {
337
472
        lang_country_list = code;
338
472
        lang_country_list += ' ';
339
472
      }
340
486
      String lang_only_list = lang;
341
486
      lang_only_list += ' ';
342
343
      // read retrieve lang_country_list and lang_only_list from file(s)
344
      // FIXME: Write Me
345
346
      //
347
486
      split_string_list(b_code.list, lang_country_list);
348
486
      split_string_list(b_code.list, lang_only_list);
349
486
      b_code.init();
350
351
      //
352
      // Retrieve Variety
353
      // 
354
486
      config->retrieve_list("variety", &b_variety.list);
355
486
      if (b_variety.list.empty() && config->have("jargon")) 
356
44
        b_variety.list.add(config->retrieve("jargon"));
357
486
      b_variety.init();
358
486
      str.data.clear();
359
360
      //
361
      // Retrieve module list
362
      //
363
486
      if (config->have("module"))
364
2
        b_module.list.add(config->retrieve("module"));
365
484
      else if (config->have("module-search-order"))
366
0
        config->retrieve_list("module-search-order", &b_module.list);
367
486
      {
368
486
        RET_ON_ERR_SET(get_module_info_list(config), const ModuleInfoList *, modules);
369
483
        StackPtr<ModuleInfoEnumeration> els(modules->elements());
370
483
        const ModuleInfo * entry;
371
966
        while ( (entry = els->next()) != 0)
372
483
          b_module.list.add(entry->name);
373
483
      }
374
0
      b_module.init();
375
376
      //
377
      // Retrieve size
378
      //
379
483
      str = config->retrieve("size");
380
483
      p = str.data.c_str();
381
483
      if (p[0] == '+' || p[0] == '-' || p[0] == '<' || p[0] == '>') {
382
483
        b_size.req_type = p[0];
383
483
        ++p;
384
483
      } else {
385
0
        b_size.req_type = '+';
386
0
      }
387
483
      if (!asc_isdigit(p[0]) || !asc_isdigit(p[1]) || p[2] != '\0')
388
0
        return make_err(aerror_bad_value, "size", str, "valid");
389
483
      b_size.requested = atoi(p);
390
483
      b_size.init();
391
392
      //
393
      // 
394
      //
395
396
483
      const DictInfoList * dlist = get_dict_info_list(config);
397
483
      DictInfoEnumeration * dels = dlist->elements();
398
483
      const DictInfo * entry;
399
400
23.0k
      while ( (entry = dels->next()) != 0) {
401
402
22.5k
        b_code  .cur = entry->code;
403
22.5k
        b_module.cur = entry->module->name;
404
405
22.5k
        b_variety.cur = entry->variety;
406
    
407
22.5k
        b_size.cur_str = entry->size_str;
408
22.5k
        b_size.cur     = entry->size;
409
410
        //
411
        // check to see if we got a better match than the current
412
        // best_match if any
413
        //
414
415
22.5k
        IsBetter is_better = SameMatch;
416
112k
        for (int i = 0; i != 4; ++i)
417
90.1k
          is_better = better[i]->better_match(is_better);
418
    
419
22.5k
        if (is_better == BetterMatch) {
420
4.52k
          for (int i = 0; i != 4; ++i)
421
3.61k
            better[i]->set_best_from_cur();
422
904
          best = entry;
423
904
        }
424
22.5k
      }
425
426
483
      delete dels;
427
428
      //
429
      // set config to best match
430
      //
431
483
      if (best != 0) {
432
467
        String main_wl,flags;
433
467
        RET_ON_ERR(get_dict_file_name(best, main_wl, flags));
434
467
        dict_name = best->name;
435
467
        config->replace("lang", b_code.best);
436
467
        config->replace("language-tag", b_code.best);
437
467
        config->replace("master", main_wl.c_str());
438
467
        config->replace("master-flags", flags.c_str());
439
467
        config->replace("module", b_module.best);
440
467
        config->replace("jargon", b_variety.best);
441
467
        config->replace("clear-variety", "");
442
467
        unsigned p;
443
934
        for (const char * c = b_module.best; *c != '\0'; c += p) {
444
467
          p = strcspn(c, "-");
445
467
          config->replace("add-variety", String(c, p));
446
467
        }
447
467
        config->replace("size", b_size.best_str);
448
467
      } else {
449
16
        return make_err(no_wordlist_for_lang, code);
450
16
      }
451
483
    }
452
453
470
    RET_ON_ERR_SET(get_dict_aliases(config), const StringMap *, dict_aliases);
454
470
    const char * val = dict_aliases->lookup(dict_name);
455
470
    if (val) config->replace("master", val);
456
470
    return config.release();
457
470
  }
458
459
  PosibErr<void> reload_filters(Speller * m) 
460
457
  {
461
457
    m->to_internal_->filter.clear();
462
457
    m->from_internal_->filter.clear();
463
    // Add enocder and decoder filters if any
464
457
    RET_ON_ERR(setup_filter(m->to_internal_->filter, m->config(), 
465
457
          true, false, false));
466
454
    RET_ON_ERR(setup_filter(m->from_internal_->filter, m->config(), 
467
454
          false, false, true));
468
454
    return no_err;
469
454
  }
470
471
  PosibErr<Speller *> new_speller(Config * c0) 
472
523
  {
473
523
    aspell_gettext_init();
474
475
523
    RET_ON_ERR_SET(find_word_list(c0), Config *, c);
476
470
    StackPtr<Speller> m(get_speller_class(c));
477
470
    RET_ON_ERR(m->setup(c));
478
479
457
    RET_ON_ERR(reload_filters(m));
480
    
481
454
    return m.release();
482
457
  }
483
484
  void delete_speller(Speller * m) 
485
0
  {
486
0
    SpellerLtHandle h = ((Speller *)(m))->lt_handle();
487
0
    delete m;
488
0
    if (h != 0) free_lt_handle(h);
489
0
  }
490
}