Coverage Report

Created: 2024-09-08 06:23

/src/aspell/modules/speller/default/suggest.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2000-2005 by Kevin Atkinson under the terms of the LGPL
2
3
// suggest.cpp Suggestion code for Aspell
4
5
// The magic behind my spell checker comes from merging Lawrence
6
// Philips excellent metaphone algorithm and Ispell's near miss
7
// strategy which is inserting a space or hyphen, interchanging two
8
// adjacent letters, changing one letter, deleting a letter, or adding
9
// a letter.
10
// 
11
// The process goes something like this.
12
// 
13
// 1.     Convert the misspelled word to its soundslike equivalent (its
14
//        metaphone for English words).
15
// 
16
// 2.     Find words that have the same soundslike pattern.
17
//
18
// 3.     Find words that have similar soundslike patterns. A similar
19
//        soundlike pattern is a pattern that is obtained by
20
//        interchanging two adjacent letters, changing one letter,
21
//        deleting a letter, or adding a letter.
22
//
23
// 4.     Score the result list and return the words with the lowest
24
//        score. The score is roughly the weighed average of the edit
25
//        distance of the word to the misspelled word, the soundslike
26
//        equivalent of the two words, and the phoneme of the two words.
27
//        The edit distance is the weighed total of the number of
28
//        deletions, insertions, exchanges, or adjacent swaps needed to
29
//        make one string equivalent to the other.
30
//
31
// Please note that the soundlike equivalent is a rough approximation
32
// of how the words sounds. It is not the phoneme of the word by any
33
// means.  For more information on the metaphone algorithm please see
34
// the file metaphone.cc which included a detailed description of it.
35
//
36
// NOTE: It is assumed that that strlen(soundslike) <= strlen(word)
37
//       for any possible word
38
39
// POSSIBLE OPTIMIZATION:
40
//   store the number of letters that are the same as the previous 
41
//     soundslike so that it can possible be skipped
42
43
#include "getdata.hpp"
44
45
#include "fstream.hpp"
46
47
#include "speller_impl.hpp"
48
#include "asuggest.hpp"
49
#include "basic_list.hpp"
50
#include "clone_ptr-t.hpp"
51
#include "config.hpp"
52
#include "data.hpp"
53
#include "editdist.hpp"
54
#include "editdist2.hpp"
55
#include "errors.hpp"
56
#include "file_data_util.hpp"
57
#include "hash-t.hpp"
58
#include "language.hpp"
59
#include "leditdist.hpp"
60
#include "speller_impl.hpp"
61
#include "stack_ptr.hpp"
62
#include "suggest.hpp"
63
#include "vararray.hpp"
64
#include "string_list.hpp"
65
66
#include "gettext.h"
67
68
//#include "iostream.hpp"
69
//#define DEBUG_SUGGEST
70
71
using namespace aspeller;
72
using namespace acommon;
73
using std::pair;
74
75
namespace {
76
77
  template <class Iterator>
78
655k
  inline Iterator preview_next (Iterator i) {
79
655k
    return ++i;
80
655k
  }
81
  
82
  //
83
  // OriginalWord stores information about the original misspelled word
84
  //   for convince and speed.
85
  //
86
  struct OriginalWord {
87
    String   word;
88
    String   lower;
89
    String   clean;
90
    String   soundslike;
91
    CasePattern  case_pattern;
92
8.21k
    OriginalWord() {}
93
  };
94
95
  //
96
  // struct ScoreWordSound - used for storing the possible words while
97
  //   they are being processed.
98
  //
99
100
  static const char * NO_SOUNDSLIKE = "";
101
102
  class Working;
103
104
  enum SpecialEdit {None, Split, CamelSplit, CamelJoin, CamelOffByOne};
105
106
23.5k
  static inline int special_score(const EditDistanceWeights & w, SpecialEdit e) {
107
23.5k
    switch (e) {
108
22.5k
    case Split:
109
22.5k
      return w.max + 2;
110
45
    case CamelJoin:
111
45
      return w.max + 1;
112
786
    case CamelSplit:
113
786
      return w.max + 1;
114
200
    case CamelOffByOne:
115
200
      return w.swap - 1;
116
0
    default:
117
0
      abort();
118
23.5k
    }
119
23.5k
  }
120
121
  struct SpecialTypoScore {
122
    int score;
123
    bool is_overall_score;
124
1.79M
    operator bool() const {return score < LARGE_NUM;}
125
    SpecialTypoScore()
126
977k
      : score(LARGE_NUM), is_overall_score(false) {}
127
    SpecialTypoScore(int s, bool q)
128
42.2k
      : score(s), is_overall_score(q) {}
129
  };
130
  
131
1.01M
  static inline SpecialTypoScore special_typo_score(const TypoEditDistanceInfo & w, SpecialEdit e) {
132
1.01M
    switch (e) {
133
977k
    case None:
134
977k
      return SpecialTypoScore();
135
40.6k
    case Split:
136
40.6k
      return SpecialTypoScore(w.max + 2, true);
137
1.11k
    case CamelSplit:
138
1.11k
      return SpecialTypoScore(w.max + 1, true);
139
90
    case CamelJoin:
140
90
      return SpecialTypoScore(w.max + 1, true);
141
400
    case CamelOffByOne:
142
400
      return SpecialTypoScore(w.swap - 1, false);
143
0
    default:
144
0
      abort();
145
1.01M
    }
146
1.01M
  }
147
148
  struct ScoreWordSound {
149
    Working * src;
150
    char * word;
151
    char * word_clean;
152
    //unsigned word_size;
153
    const char * soundslike;
154
    int           score;
155
    int           adj_score;
156
    int           word_score;
157
    int           soundslike_score;
158
    bool          count;
159
    SpecialEdit   special_edit;
160
    bool          repl_table;
161
    WordEntry * repl_list;
162
12.1M
    ScoreWordSound(Working * s) : src(s), adj_score(LARGE_NUM), repl_list(0) {}
163
24.2M
    ~ScoreWordSound() {delete repl_list;}
164
  };
165
166
  inline int compare (const ScoreWordSound &lhs, 
167
          const ScoreWordSound &rhs) 
168
43.8M
  {
169
43.8M
    int temp = lhs.score - rhs.score;
170
43.8M
    if (temp) return temp;
171
33.0M
    return strcmp(lhs.word,rhs.word);
172
43.8M
  }
173
174
  inline int adj_score_lt(const ScoreWordSound &lhs,
175
                          const ScoreWordSound &rhs)
176
18.5M
  {
177
18.5M
    int temp = lhs.adj_score - rhs.adj_score;
178
18.5M
    if (temp) return temp < 0;
179
14.9M
    return strcmp(lhs.word,rhs.word) < 0;
180
18.5M
  }
181
182
  inline bool operator < (const ScoreWordSound & lhs, 
183
43.8M
        const ScoreWordSound & rhs) {
184
43.8M
    return compare(lhs, rhs) < 0;
185
43.8M
  }
186
187
  inline bool operator <= (const ScoreWordSound & lhs, 
188
0
         const ScoreWordSound & rhs) {
189
0
    return compare(lhs, rhs) <= 0;
190
0
  }
191
192
  inline bool operator == (const ScoreWordSound & lhs, 
193
0
         const ScoreWordSound & rhs) {
194
0
    return compare(lhs, rhs) == 0;
195
0
  }
196
197
  typedef BasicList<ScoreWordSound> NearMisses;
198
 
199
  class Sugs;
200
  
201
  class Working {
202
    friend class Sugs;
203
204
    const Language *     lang;
205
    OriginalWord         original;
206
    const SuggestParms * parms;
207
    SpellerImpl *        sp;
208
209
    String prefix;
210
    String suffix;
211
    bool have_presuf;
212
213
    int threshold;
214
    int adj_threshold;
215
    int try_harder;
216
217
    EditDist (* edit_dist_fun)(const char *, const char *,
218
                               const EditDistanceWeights &);
219
220
    unsigned int max_word_length;
221
222
    NearMisses         scored_near_misses;
223
    NearMisses         near_misses;
224
225
    char * temp_end;
226
227
    ObjStack           buffer;
228
    ObjStack           temp_buffer;
229
230
    static const bool do_count = true;
231
    static const bool dont_count = false;
232
233
    CheckInfo check_info[8];
234
235
2.21M
    void commit_temp(const char * b) {
236
2.21M
      if (temp_end) {
237
900k
        buffer.resize_temp(temp_end - b + 1);
238
900k
        buffer.commit_temp();
239
900k
        temp_end = 0; }}
240
22.2M
    void abort_temp() {
241
22.2M
      buffer.abort_temp();
242
22.2M
      temp_end = 0;}
243
0
    const char * to_soundslike_temp(const char * w, unsigned s, unsigned * len = 0) {
244
0
      char * sl = (char *)buffer.alloc_temp(s + 1);
245
0
      temp_end = lang->LangImpl::to_soundslike(sl, w, s);
246
0
      if (len) *len = temp_end - sl;
247
0
      return sl;}
248
1.31M
    const char * to_soundslike_temp(const WordEntry & sw) {
249
1.31M
      char * sl = (char *)buffer.alloc_temp(sw.word_size + 1);
250
1.31M
      temp_end = lang->LangImpl::to_soundslike(sl, sw.word, sw.word_size, sw.word_info);
251
1.31M
      if (temp_end == 0) return sw.word;
252
1.31M
      else return sl;}
253
277k
    const char * to_soundslike(const char * w, unsigned s) {
254
277k
      char * sl = (char *)buffer.alloc_temp(s + 1);
255
277k
      temp_end = lang->LangImpl::to_soundslike(sl, w, s);
256
277k
      commit_temp(sl);
257
277k
      return sl;}
258
259
    struct ScoreInfo {
260
      const char *  soundslike;
261
      int           word_score;
262
      int           soundslike_score;
263
      bool          count;
264
      SpecialEdit   special_edit;
265
      bool          repl_table;
266
      WordEntry *   repl_list;
267
      ScoreInfo()
268
        : soundslike(), word_score(LARGE_NUM), soundslike_score(LARGE_NUM),
269
15.5M
          count(true), special_edit(None), repl_table(false), repl_list() {}
270
    };
271
272
722k
    char * fix_case(char * str) {
273
722k
      lang->LangImpl::fix_case(original.case_pattern, str, str);
274
722k
      return str;
275
722k
    }
276
0
    const char * fix_case(const char * str, String & buf) {
277
0
      return lang->LangImpl::fix_case(original.case_pattern, str, buf);
278
0
    }
279
280
    char * fix_word(ObjStack & buf, ParmStr w);
281
282
    MutableString form_word(CheckInfo & ci);
283
    void try_word_n(ParmString str, const ScoreInfo & inf);
284
    bool check_word_s(ParmString word, CheckInfo * ci);
285
    unsigned check_word(char * word, char * word_end, CheckInfo * ci,
286
                        /* it WILL modify word */
287
                        unsigned pos = 1);
288
    void try_word_c(char * word, char * word_end, const ScoreInfo & inf);
289
290
3.63M
    void try_word(char * word, char * word_end, const ScoreInfo & inf) {
291
3.63M
      if (sp->unconditional_run_together_)
292
103k
        try_word_c(word,word_end,inf);
293
3.52M
      else
294
3.52M
        try_word_n(word,inf);
295
3.63M
    }
296
3.60M
    void try_word(char * word, char * word_end, int score) {
297
3.60M
      ScoreInfo inf;
298
3.60M
      inf.word_score = score;
299
3.60M
      try_word(word, word_end, inf);
300
3.60M
    }
301
302
    void add_sound(SpellerImpl::WS::const_iterator i,
303
                   WordEntry * sw, const char * sl, int score = LARGE_NUM);
304
    void add_nearmiss(char * word, unsigned word_size, WordInfo word_info,
305
                      const ScoreInfo &);
306
    void add_nearmiss_w(SpellerImpl::WS::const_iterator, const WordEntry & w,
307
                        const ScoreInfo &);
308
    void add_nearmiss_a(SpellerImpl::WS::const_iterator, const WordAff * w,
309
                        const ScoreInfo &);
310
0
    bool have_score(int score) {return score < LARGE_NUM;}
311
30.4M
    int needed_level(int want, int soundslike_score) {
312
      // (word_weight*??? + soundlike_weight*soundslike_score)/100 <= want
313
      // word_weight*??? + soundlike_weight*soundslike_score <= want*100
314
      // word_weight*??? <= want*100 - soundlike_weight*soundslike_score
315
      // ??? <= (want*100 - soundlike_weight*soundslike_score) / word_weight
316
      // level = ceil(???/edit_distance_weights.min)
317
30.4M
      int n = 100*want - parms->soundslike_weight*soundslike_score;
318
30.4M
      if (n <= 0) return 0;
319
28.6M
      int d = parms->word_weight*parms->edit_distance_weights.min;
320
28.6M
      return (n-1)/d+1; // roundup
321
30.4M
    }
322
17.1M
    int weighted_average(int soundslike_score, int word_score) {
323
17.1M
      return (parms->word_weight*word_score 
324
17.1M
        + parms->soundslike_weight*soundslike_score)/100;
325
17.1M
    }
326
776k
    int adj_wighted_average(int soundslike_score, int word_score, int one_edit_max) {
327
776k
      int soundslike_weight = parms->soundslike_weight;
328
776k
      int word_weight = parms->word_weight;
329
776k
      if (word_score <= one_edit_max) {
330
216k
        const int factor = word_score < 100 ? 8 : 2;
331
216k
        soundslike_weight = (parms->soundslike_weight+factor-1)/factor;
332
216k
      }
333
      // NOTE: Theoretical if the soundslike is might be beneficial to
334
      // adjust the word score so it doesn't contribute as much.  If
335
      // the score is already around 100 (one edit dist) then it may
336
      // not be a good idea to lower it more, but if the word score is
337
      // 200 or more then it might make sence to reduce it some.
338
      // HOWEVER, this will likely not work well, espacally with small
339
      // words and there are just too many words with the same
340
      // soundlike.  In any case that what the special "soundslike"
341
      // and "bad-spellers" mode is for.
342
776k
      return (word_weight*word_score
343
776k
              + soundslike_weight*soundslike_score)/100;
344
776k
    }
345
24.4k
    int skip_first_couple(NearMisses::iterator & i) {
346
24.4k
      int k = 0;
347
24.4k
      InsensitiveCompare cmp(lang);
348
24.4k
      const char * prev_word = "";
349
655k
      while (preview_next(i) != scored_near_misses.end()) 
350
  // skip over the first couple of items as they should
351
  // not be counted in the threshold score.
352
650k
      {
353
650k
  if (!i->count || cmp(prev_word, i->word) == 0) {
354
589k
    ++i;
355
589k
  } else if (k == parms->skip) {
356
19.6k
    break;
357
41.6k
  } else {
358
41.6k
          prev_word = i->word;
359
41.6k
    ++k;
360
41.6k
    ++i;
361
41.6k
  }
362
650k
      }
363
24.4k
      return k;
364
24.4k
    }
365
366
    void try_camel_word(String & word, SpecialEdit edit);
367
368
    void try_split();
369
    void try_camel_edits();
370
    void try_one_edit_word();
371
    void try_scan();
372
    void try_scan_root();
373
    void try_repl();
374
    void try_ngram();
375
376
    void score_list();
377
    void fine_tune_score(int thres);
378
  public:
379
    Working(SpellerImpl * m, const Language *l,
380
      const String & w, const SuggestParms * p)
381
      : lang(l), original(), parms(p), sp(m), have_presuf(false) 
382
      , threshold(1), max_word_length(0)
383
8.21k
    {
384
8.21k
      memset(static_cast<void *>(check_info), 0, sizeof(check_info));
385
8.21k
      original.word = w;
386
8.21k
      l->to_lower(original.lower, w.str());
387
8.21k
      l->to_clean(original.clean, w.str());
388
8.21k
      l->to_soundslike(original.soundslike, w.str());
389
8.21k
      original.case_pattern = l->case_pattern(w);
390
8.21k
      camel_case = parms->camel_case;
391
8.21k
    }
392
684
    void with_presuf(ParmStr pre, ParmStr suf) {
393
684
      prefix = pre;
394
684
      suffix = suf;
395
684
      have_presuf = true;
396
684
    }
397
    bool camel_case;
398
    // `this` is expected to be allocated with new and its ownership
399
    // will be transferred to the returning Sugs object
400
    Sugs * suggestions(); 
401
  };
402
403
  struct Suggestion {
404
    const char * word;
405
    const ScoreWordSound * inf;
406
0
    double distance() const {
407
0
      return inf->adj_score/100.0;
408
0
    }
409
0
    double normalized_score() const {
410
0
      return 100.0/(inf->adj_score + 100);
411
0
    }
412
0
    Suggestion() : word(), inf() {}
413
    Suggestion(const char * word, const ScoreWordSound * inf)
414
498k
      : word(word), inf(inf) {}
415
  };
416
417
  struct SavedBufs : public Vector<ObjStack::Memory *> {
418
8.96k
    void reset() {
419
8.96k
      for (Vector<ObjStack::Memory *>::iterator i = begin(), e = end();
420
17.1k
           i != e; ++i)
421
8.21k
        ObjStack::dealloc(*i);
422
8.96k
      clear();
423
8.96k
    }
424
1.43k
    ~SavedBufs() {
425
1.43k
      reset();
426
1.43k
    }
427
  };
428
429
  class SuggestionsImpl;
430
431
  class Sugs {
432
  public:
433
    Vector<Working *> srcs;
434
    NearMisses scored_near_misses;
435
436
684
    void merge(Sugs & other) {
437
684
      srcs.insert(srcs.end(), other.srcs.begin(), other.srcs.end());
438
684
      other.srcs.clear();
439
684
      scored_near_misses.merge(other.scored_near_misses, adj_score_lt);
440
684
    }
441
442
    void transfer(SuggestionsImpl &, int limit);
443
    
444
8.21k
    Sugs(Working * s) {
445
8.21k
      srcs.push_back(s);
446
8.21k
    }
447
8.21k
    ~Sugs() {
448
16.4k
      for (Vector<Working *>::iterator i = srcs.begin(), e = srcs.end(); i != e; ++i) {
449
8.21k
        delete *i;
450
8.21k
        *i = NULL;
451
8.21k
      }
452
8.21k
    }
453
  };
454
455
  class SuggestionsImpl : public SuggestionsData, public Vector<Suggestion> {
456
  public:
457
    SavedBufs   saved_bufs_;
458
    NearMisses  saved_near_misses_;
459
    ObjStack    buf;
460
1.43k
    SuggestionsImpl() {}
461
  private:
462
    SuggestionsImpl(const SuggestionsImpl &);
463
  public:
464
7.53k
    void reset() {
465
7.53k
      clear();
466
7.53k
      buf.reset();
467
7.53k
      saved_bufs_.reset();
468
7.53k
      saved_near_misses_.clear();
469
7.53k
    }
470
0
    void get_words(Convert * conv, Vector<CharVector> & res) {
471
0
      res.clear();
472
0
      res.reserve(size());
473
0
      if (conv) {
474
0
        for (iterator i = begin(), e = end(); i != e; ++i) {
475
0
          res.push_back(CharVector());
476
          // len + 1 to also convert the null
477
0
          conv->convert(i->word, strlen(i->word) + 1, res.back());
478
0
        }
479
0
      } else {
480
0
        for (iterator i = begin(), e = end(); i != e; ++i) {
481
0
          res.push_back(CharVector());
482
0
          res.reserve(strlen(i->word) + 1);
483
0
          res.back().append(i->word);
484
0
          res.back().append('\0');
485
0
        }
486
0
      }
487
0
    }
488
0
    void get_normalized_scores(Vector<double> & res) {
489
0
      res.clear();
490
0
      res.reserve(size());
491
0
      for (iterator i = begin(), e = end(); i != e; ++i)
492
0
        res.push_back(i->normalized_score());
493
0
    }
494
0
    void get_distances(Vector<double> & res) {
495
0
      res.clear();
496
0
      res.reserve(size());
497
0
      for (iterator i = begin(), e = end(); i != e; ++i)
498
0
        res.push_back(i->distance());
499
0
    }
500
  };
501
502
8.21k
  Sugs * Working::suggestions() {
503
504
8.21k
    Sugs * sug = new Sugs(this);
505
506
8.21k
    if (original.word.size() * parms->edit_distance_weights.max >= 0x8000)
507
370
      return sug; // to prevent overflow in the editdist functions
508
509
7.84k
    try_split();
510
511
7.84k
    try_camel_edits();
512
513
7.84k
    if (parms->use_repl_table) {
514
515
#ifdef DEBUG_SUGGEST
516
      COUT.printl("TRYING REPLACEMENT TABLE");
517
#endif
518
519
740
      try_repl();
520
740
    }
521
522
7.84k
    if (parms->try_one_edit_word) {
523
524
#ifdef DEBUG_SUGGEST
525
      COUT.printl("TRYING ONE EDIT WORD");
526
#endif
527
528
7.84k
      try_one_edit_word();
529
7.84k
      score_list();
530
7.84k
      if (parms->check_after_one_edit_word) {
531
0
        if (try_harder <= 0) goto done;
532
0
      }
533
      // need to fine tune the score to account for special weights
534
      // applied to typos, otherwise some typos that produce very
535
      // different soundslike may be missed
536
7.84k
      fine_tune_score(LARGE_NUM);
537
7.84k
    }
538
539
7.84k
    if (parms->try_scan_0) {
540
      
541
#ifdef DEBUG_SUGGEST
542
      COUT.printl("TRYING SCAN 0");
543
#endif
544
372
      edit_dist_fun = limit0_edit_distance;
545
      
546
372
      if (sp->soundslike_root_only)
547
0
        try_scan_root();
548
372
      else
549
372
        try_scan();
550
551
372
      score_list();
552
      
553
372
    }
554
555
7.84k
    if (parms->try_scan_1) {
556
      
557
#ifdef DEBUG_SUGGEST
558
      COUT.printl("TRYING SCAN 1");
559
#endif
560
7.37k
      edit_dist_fun = limit1_edit_distance;
561
562
7.37k
      if (sp->soundslike_root_only)
563
740
        try_scan_root();
564
6.63k
      else
565
6.63k
        try_scan();
566
567
7.37k
      score_list();
568
      
569
7.37k
      if (try_harder <= 0) goto done;
570
571
7.37k
    }
572
573
1.82k
    if (parms->try_scan_2) {
574
575
#ifdef DEBUG_SUGGEST
576
      COUT.printl("TRYING SCAN 2");
577
#endif
578
579
1.44k
      edit_dist_fun = limit2_edit_distance;
580
581
1.44k
      if (sp->soundslike_root_only)
582
7
        try_scan_root();
583
1.44k
      else
584
1.44k
        try_scan();
585
586
1.44k
      score_list();
587
      
588
1.44k
      if (try_harder < parms->ngram_threshold) goto done;
589
590
1.44k
    }
591
592
992
    if (parms->try_ngram) {
593
594
#ifdef DEBUG_SUGGEST
595
      COUT.printl("TRYING NGRAM");
596
#endif
597
598
89
      try_ngram();
599
600
89
      score_list();
601
602
89
    }
603
604
7.84k
  done:
605
606
7.84k
    fine_tune_score(threshold);
607
7.84k
    scored_near_misses.sort(adj_score_lt);
608
7.84k
    sug->scored_near_misses.swap(scored_near_misses);
609
7.84k
    near_misses.clear();
610
7.84k
    return sug;
611
992
  }
612
613
  // Forms a word by combining CheckInfo fields.
614
  // Will grow the grow the temp in the buffer.  The final
615
  // word must be null terminated and committed.
616
  // It returns a MutableString of what was appended to the buffer.
617
  MutableString Working::form_word(CheckInfo & ci) 
618
110k
  {
619
110k
    size_t slen = ci.word.len - ci.pre_strip_len - ci.suf_strip_len;
620
110k
    size_t wlen = slen + ci.pre_add_len + ci.suf_add_len;
621
110k
    char * tmp = (char *)buffer.grow_temp(wlen);
622
110k
    if (ci.pre_add_len) 
623
1
      memcpy(tmp, ci.pre_add, ci.pre_add_len);
624
110k
    memcpy(tmp + ci.pre_add_len, ci.word.str + ci.pre_strip_len, slen);
625
110k
    if (ci.suf_add_len) 
626
714
      memcpy(tmp + ci.pre_add_len + slen, ci.suf_add, ci.suf_add_len);
627
110k
    return MutableString(tmp,wlen);
628
110k
  }
629
630
  void Working::try_word_n(ParmString str, const ScoreInfo & inf)
631
3.52M
  {
632
3.52M
    String word;
633
3.52M
    String buf;
634
3.52M
    WordEntry sw;
635
3.52M
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
636
20.4M
         i != sp->suggest_ws.end();
637
16.9M
         ++i)
638
16.9M
    {
639
16.9M
      (*i)->clean_lookup(str, sw);
640
17.1M
      for (;!sw.at_end(); sw.adv())
641
165k
        add_nearmiss_w(i, sw, inf);
642
16.9M
    }
643
3.52M
    if (sp->affix_compress) {
644
678k
      CheckInfo ci; memset(static_cast<void *>(&ci), 0, sizeof(ci));
645
678k
      bool res = lang->affix()->affix_check(LookupInfo(sp, LookupInfo::Clean), str, ci, 0);
646
678k
      if (!res) return;
647
919
      form_word(ci);
648
919
      char * end = (char *)buffer.grow_temp(1);
649
919
      char * tmp = (char *)buffer.temp_ptr();
650
919
      buffer.commit_temp();
651
919
      *end = '\0';
652
919
      add_nearmiss(tmp, end - tmp, 0, inf);
653
919
    }
654
3.52M
  }
655
656
  bool Working::check_word_s(ParmString word, CheckInfo * ci)
657
103k
  {
658
103k
    WordEntry sw;
659
103k
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
660
437k
         i != sp->suggest_ws.end();
661
334k
         ++i)
662
370k
    {
663
370k
      (*i)->clean_lookup(word, sw);
664
370k
      if (!sw.at_end()) {
665
36.6k
        ci->word = sw.word;
666
36.6k
        return true;
667
36.6k
      }
668
370k
    }
669
66.8k
    if (sp->affix_compress) {
670
0
      return lang->affix()->affix_check(LookupInfo(sp, LookupInfo::Clean), word, *ci, 0);
671
0
    }
672
66.8k
    return false;
673
66.8k
  }
674
675
  unsigned Working::check_word(char * word, char * word_end,  CheckInfo * ci,
676
                          /* it WILL modify word */
677
                          unsigned pos)
678
103k
  {
679
103k
    unsigned res = check_word_s(word, ci);
680
103k
    if (res) return pos + 1;
681
66.8k
    if (pos + 1 >= sp->run_together_limit_) return 0;
682
0
    for (char * i = word + sp->run_together_min_; 
683
0
         i <= word_end - sp->run_together_min_;
684
0
         ++i)
685
0
    {
686
0
      char t = *i;
687
0
      *i = '\0';
688
0
      res = check_word_s(word, ci);
689
0
      *i = t;
690
0
      if (!res) continue;
691
0
      res = check_word(i, word_end, ci + 1, pos + 1);
692
0
      if (res) return res;
693
0
    }
694
0
    memset(static_cast<void *>(ci), 0, sizeof(CheckInfo));
695
0
    return 0;
696
0
  }
697
698
  void Working::try_word_c(char * word, char * word_end, const ScoreInfo & inf)
699
103k
  {
700
103k
    unsigned res = check_word(word, word_end, check_info);
701
103k
    assert(res <= sp->run_together_limit_);
702
    //CERR.printf(">%s\n", word);
703
103k
    if (!res) return;
704
36.6k
    buffer.abort_temp();
705
36.6k
    MutableString tmp = form_word(check_info[0]);
706
36.6k
    CasePattern cp = lang->case_pattern(tmp, tmp.size);
707
109k
    for (unsigned i = 1; i <= res; ++i) {
708
73.2k
      char * t = form_word(check_info[i]);
709
73.2k
      if (cp == FirstUpper && lang->is_lower(t[1])) 
710
4.37k
        t[0] = lang->to_lower(t[0]);
711
73.2k
    }
712
36.6k
    char * end = (char *)buffer.grow_temp(1);
713
36.6k
    char * beg = (char *)buffer.temp_ptr(); // since the original string may of moved
714
36.6k
    *end = 0;
715
36.6k
    buffer.commit_temp();
716
36.6k
    add_nearmiss(beg, end - beg, 0, inf);
717
    //CERR.printl(tmp);
718
36.6k
    memset(static_cast<void *>(check_info), 0, sizeof(CheckInfo)*res);
719
36.6k
  }
720
721
  void Working::add_nearmiss(char * word, unsigned word_size,
722
                             WordInfo word_info,
723
                             const ScoreInfo & inf)
724
12.0M
  {
725
12.0M
    if (word_size * parms->edit_distance_weights.max >= 0x8000) 
726
204
      return; // to prevent overflow in the editdist functions
727
728
12.0M
    near_misses.push_front(ScoreWordSound(this));
729
12.0M
    ScoreWordSound & d = near_misses.front();
730
12.0M
    d.word = word;
731
12.0M
    d.soundslike = inf.soundslike;
732
733
12.0M
    d.word_score = inf.word_score;
734
12.0M
    d.soundslike_score = inf.soundslike_score;
735
736
12.0M
    if (!sp->have_soundslike) {
737
0
      if (d.word_score >= LARGE_NUM) d.word_score = d.soundslike_score;
738
0
      else if (d.soundslike_score >= LARGE_NUM) d.soundslike_score = d.word_score;
739
0
    }
740
741
12.0M
    unsigned int l = word_size;
742
12.0M
    if (l > max_word_length) max_word_length = l;
743
    
744
12.0M
    if (!(word_info & ALL_CLEAN)) {
745
6.63M
      d.word_clean = (char *)buffer.alloc(word_size + 1);
746
6.63M
      lang->LangImpl::to_clean((char *)d.word_clean, word);
747
6.63M
    } else {
748
5.45M
      d.word_clean = d.word;
749
5.45M
    }
750
751
12.0M
    if (!sp->have_soundslike && !d.soundslike)
752
0
      d.soundslike = d.word_clean;
753
    
754
12.0M
    d.special_edit = inf.special_edit;
755
12.0M
    d.repl_table = inf.repl_table;
756
12.0M
    d.count = inf.count;
757
12.0M
    d.repl_list = inf.repl_list;
758
12.0M
  }
759
760
  void Working::add_nearmiss_w(SpellerImpl::WS::const_iterator i,
761
                               const WordEntry & w, const ScoreInfo & inf0)
762
9.31M
  {
763
9.31M
    assert(w.word_size == strlen(w.word));
764
9.31M
    ScoreInfo inf = inf0;
765
9.31M
    if (w.what == WordEntry::Misspelled) {
766
0
      inf.repl_list = new WordEntry;
767
0
      const ReplacementDict * repl_dict
768
0
        = static_cast<const ReplacementDict *>(*i);
769
0
      repl_dict->repl_lookup(w, *inf.repl_list);
770
0
    }
771
9.31M
    add_nearmiss(buffer.dup(ParmString(w.word, w.word_size)), 
772
9.31M
                 w.word_size, w.word_info, inf);
773
9.31M
  }
774
  
775
  void Working::add_nearmiss_a(SpellerImpl::WS::const_iterator i,
776
                               const WordAff * w, const ScoreInfo & inf)
777
2.72M
  {
778
2.72M
    add_nearmiss(buffer.dup(w->word), w->word.size, 0, inf);
779
2.72M
  }
780
781
7.84k
  void Working::try_split() {
782
7.84k
    const String & word = original.word;
783
    
784
7.84k
    if (word.size() < 4 || parms->split_chars.empty()) return;
785
2.91k
    size_t i = 0;
786
    
787
2.91k
    String new_word_str;
788
2.91k
    String buf;
789
2.91k
    new_word_str.resize(word.size() + 1);
790
2.91k
    char * new_word = new_word_str.data();
791
2.91k
    memcpy(new_word, word.data(), word.size());
792
2.91k
    new_word[word.size() + 1] = '\0';
793
2.91k
    new_word[word.size() + 0] = new_word[word.size() - 1];
794
    
795
43.2k
    for (i = word.size() - 2; i >= 2; --i) {
796
40.2k
      new_word[i+1] = new_word[i];
797
40.2k
      new_word[i] = '\0';
798
      
799
40.2k
      if (sp->check(new_word) && sp->check(new_word + i + 1)) {
800
33.1k
        for (size_t j = 0; j != parms->split_chars.size(); ++j)
801
22.5k
        {
802
22.5k
          new_word[i] = parms->split_chars[j];
803
22.5k
          ScoreInfo inf;
804
22.5k
          inf.word_score = special_score(parms->edit_distance_weights, Split);
805
22.5k
          inf.soundslike_score = inf.word_score;
806
22.5k
          inf.soundslike = NO_SOUNDSLIKE;
807
22.5k
          inf.count = false;
808
22.5k
          inf.special_edit = Split;
809
22.5k
          add_nearmiss(buffer.dup(new_word), word.size() + 1, 0, inf);
810
22.5k
        }
811
10.5k
      }
812
40.2k
    }
813
2.91k
  }
814
815
15.9k
  void Working::try_camel_word(String & word, SpecialEdit edit) {
816
15.9k
    CheckInfo ci[8];
817
15.9k
    bool ok = sp->check(word.begin(), word.end(), false, sp->run_together_limit(), ci, ci + 8, NULL, NULL);
818
15.9k
    if (!ok) return;
819
1.03k
    ScoreInfo inf;
820
1.03k
    inf.word_score = special_score(parms->edit_distance_weights, edit);
821
1.03k
    inf.soundslike_score = inf.word_score;
822
1.03k
    inf.soundslike = NO_SOUNDSLIKE;
823
1.03k
    inf.count = false;
824
1.03k
    inf.special_edit = edit;
825
1.03k
    add_nearmiss(buffer.dup(word.c_str()), word.size() + 1, 0, inf);
826
1.03k
  }
827
828
7.84k
  void Working::try_camel_edits() {
829
7.84k
    if (!camel_case) return;
830
    
831
1.37k
    String word = original.word;
832
1.37k
    word.ensure_null_end();
833
834
12.5k
    for (size_t i = 1; i < word.size(); ++i) {
835
      // try splitting or joining a word by changing the case of a letter
836
11.1k
      SpecialEdit edit = None;
837
11.1k
      char save = word[i];
838
11.1k
      word[i] = lang->to_upper(word[i]);
839
11.1k
      if (word[i] != save) {
840
4.38k
        edit = CamelSplit;
841
6.81k
      } else {
842
6.81k
        word[i] = lang->to_lower(word[i]);
843
6.81k
        if (word[i] != save)
844
5.54k
          edit = CamelJoin;
845
6.81k
      }
846
11.1k
      try_camel_word(word, edit);
847
848
      //if the char was made lower now also try making an adjacent character uppercase
849
11.1k
      if (edit == CamelJoin) {
850
5.54k
        char save2 = word[i-1];
851
5.54k
        word[i-1] = lang->to_upper(word[i-1]);
852
5.54k
        if (word[i-1] != save2)
853
2.72k
          try_camel_word(word, CamelOffByOne);
854
5.54k
        word[i-1] = save2;
855
5.54k
        if (i+1 < word.size()) {
856
4.68k
          save2 = word[i+1];
857
4.68k
          word[i+1] = lang->to_upper(word[i+1]);
858
4.68k
          if (word[i+1] != save2)
859
1.99k
            try_camel_word(word, CamelOffByOne);
860
4.68k
          word[i+1] = save2;
861
4.68k
        }
862
5.54k
      }
863
      
864
11.1k
      word[i] = save;
865
11.1k
    }
866
1.37k
  }
867
868
  void Working::try_one_edit_word() 
869
7.84k
  {
870
7.84k
    const String & orig = original.clean;
871
7.84k
    const char * replace_list = lang->clean_chars();
872
7.84k
    char a,b;
873
7.84k
    const char * c;
874
7.84k
    VARARRAY(char, new_word, orig.size() + 2);
875
7.84k
    char * new_word_end = new_word + orig.size();
876
7.84k
    size_t i;
877
878
7.84k
    memcpy(new_word, orig.str(), orig.size() + 1);
879
880
    // Try word as is (in case of case difference etc)
881
882
7.84k
    try_word(new_word,  new_word_end, 0);
883
884
    // Change one letter
885
    
886
56.4k
    for (i = 0; i != orig.size(); ++i) {
887
1.69M
      for (c = replace_list; *c; ++c) {
888
1.64M
        if (*c == orig[i]) continue;
889
1.59M
        new_word[i] = *c;
890
1.59M
        try_word(new_word, new_word_end, parms->edit_distance_weights.sub);
891
1.59M
      }
892
48.5k
      new_word[i] = orig[i];
893
48.5k
    }
894
    
895
    // Interchange two adjacent letters.
896
    
897
50.2k
    for (i = 0; i+1 < orig.size(); ++i) {
898
42.4k
      a = new_word[i];
899
42.4k
      b = new_word[i+1];
900
42.4k
      new_word[i] = b;
901
42.4k
      new_word[i+1] = a;
902
42.4k
      try_word(new_word, new_word_end, parms->edit_distance_weights.swap);
903
42.4k
      new_word[i] = a;
904
42.4k
      new_word[i+1] = b;
905
42.4k
    }
906
907
    // Add one letter
908
909
7.84k
    *new_word_end = ' ';
910
7.84k
    new_word_end++;
911
7.84k
    *new_word_end = '\0';
912
7.84k
    i = new_word_end - new_word - 1;
913
56.4k
    while(true) {
914
1.96M
      for (c=replace_list; *c; ++c) {
915
1.90M
        new_word[i] = *c;
916
1.90M
        try_word(new_word, new_word_end, parms->edit_distance_weights.del1);
917
1.90M
      }
918
56.4k
      if (i == 0) break;
919
48.5k
      new_word[i] = new_word[i-1];
920
48.5k
      --i;
921
48.5k
    }
922
    
923
    // Delete one letter
924
925
7.84k
    if (orig.size() > 1) {
926
6.02k
      memcpy(new_word, orig.str(), orig.size() + 1);
927
6.02k
      new_word_end = new_word + orig.size() - 1;
928
6.02k
      a = *new_word_end;
929
6.02k
      *new_word_end = '\0';
930
6.02k
      i = orig.size() - 1;
931
48.4k
      while (true) {
932
48.4k
        try_word(new_word, new_word_end, parms->edit_distance_weights.del2);
933
48.4k
        if (i == 0) break;
934
42.4k
        b = a;
935
42.4k
        a = new_word[i-1];
936
42.4k
        new_word[i-1] = b;
937
42.4k
        --i;
938
42.4k
      }
939
6.02k
    }
940
7.84k
  }
941
942
  void Working::add_sound(SpellerImpl::WS::const_iterator i,
943
                          WordEntry * sw, const char * sl, int score)
944
1.93M
  {
945
1.93M
    WordEntry w;
946
1.93M
    (*i)->soundslike_lookup(*sw, w);
947
948
11.0M
    for (; !w.at_end(); w.adv()) {
949
950
9.14M
      ScoreInfo inf;
951
9.14M
      inf.soundslike = sl;
952
9.14M
      inf.soundslike_score = score;
953
9.14M
      add_nearmiss_w(i, w, inf);
954
      
955
9.14M
      if (w.aff[0]) {
956
442k
        String sl_buf;
957
442k
        temp_buffer.reset();
958
442k
        WordAff * exp_list;
959
442k
        exp_list = lang->affix()->expand(w.word, w.aff, temp_buffer);
960
3.16M
        for (WordAff * p = exp_list->next; p; p = p->next) {
961
2.72M
          add_nearmiss_a(i, p, ScoreInfo());
962
2.72M
        }
963
442k
      }
964
      
965
9.14M
    }
966
1.93M
  }
967
968
  void Working::try_scan() 
969
8.44k
  {
970
8.44k
    const char * original_soundslike = original.soundslike.str();
971
    
972
8.44k
    WordEntry * sw;
973
8.44k
    WordEntry w;
974
8.44k
    const char * sl = 0;
975
8.44k
    EditDist score;
976
8.44k
    unsigned int stopped_at = LARGE_NUM;
977
8.44k
    WordAff * exp_list;
978
8.44k
    WordAff single;
979
8.44k
    single.next = 0;
980
981
8.44k
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
982
50.6k
         i != sp->suggest_ws.end();
983
42.2k
         ++i) 
984
42.2k
    {
985
      //CERR.printf(">>%p %s\n", *i, typeid(**i).name());
986
42.2k
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
987
988
17.0M
      while ( (sw = els->next(stopped_at)) ) {
989
990
        //CERR.printf("[%s (%d) %d]\n", sw->word, sw->word_size, sw->what);
991
        //assert(strlen(sw->word) == sw->word_size);
992
          
993
17.0M
        if (sw->what != WordEntry::Word) {
994
17.0M
          sl = sw->word;
995
17.0M
          abort_temp();
996
17.0M
        } else if (!*sw->aff) {
997
0
          sl = to_soundslike_temp(*sw);
998
0
        } else {
999
0
          goto affix_case;
1000
0
        }
1001
1002
        //CERR.printf("SL = %s\n", sl);
1003
        
1004
17.0M
        score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1005
17.0M
        stopped_at = score.stopped_at - sl;
1006
17.0M
        if (score >= LARGE_NUM) continue;
1007
1.25M
        stopped_at = LARGE_NUM;
1008
1.25M
        commit_temp(sl);
1009
1.25M
        add_sound(i, sw, sl, score);
1010
1.25M
        continue;
1011
        
1012
0
      affix_case:
1013
        
1014
0
        temp_buffer.reset();
1015
        
1016
        // first expand any prefixes
1017
0
        if (sp->fast_scan) { // if fast_scan, then no prefixes
1018
0
          single.word.str = sw->word;
1019
0
          single.word.size = strlen(sw->word);
1020
0
          single.aff = (const unsigned char *)sw->aff;
1021
0
          exp_list = &single;
1022
0
        } else {
1023
0
          exp_list = lang->affix()->expand_prefix(sw->word, sw->aff, temp_buffer);
1024
0
        }
1025
        
1026
        // iterate through each semi-expanded word, any affix flags
1027
        // are now guaranteed to be suffixes
1028
0
        for (WordAff * p = exp_list; p; p = p->next)
1029
0
        {
1030
          // try the root word
1031
0
          unsigned sl_len;
1032
0
          sl = to_soundslike_temp(p->word.str, p->word.size, &sl_len);
1033
0
          score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1034
0
          stopped_at = score.stopped_at - sl;
1035
0
          stopped_at += p->word.size - sl_len;
1036
          
1037
0
          if (score < LARGE_NUM) {
1038
0
            commit_temp(sl);
1039
0
            ScoreInfo inf;
1040
0
            inf.soundslike = sl;
1041
0
            inf.soundslike_score = score;
1042
0
            add_nearmiss_a(i, p, inf);
1043
0
          }
1044
          
1045
          // expand any suffixes, using stopped_at as a hint to avoid
1046
          // unneeded expansions.  Note stopped_at is the last character
1047
          // looked at by limit_edit_dist.  Thus if the character
1048
          // at stopped_at is changed it might effect the result
1049
          // hence the "limit" is stopped_at + 1
1050
0
          if (p->word.size - lang->affix()->max_strip() > stopped_at)
1051
0
            exp_list = 0;
1052
0
          else
1053
0
            exp_list = lang->affix()->expand_suffix(p->word, p->aff, 
1054
0
                                                    temp_buffer, 
1055
0
                                                    stopped_at + 1);
1056
          
1057
          // reset stopped_at if necessary
1058
0
          if (score < LARGE_NUM) stopped_at = LARGE_NUM;
1059
          
1060
          // iterate through fully expanded words, if any
1061
0
          for (WordAff * q = exp_list; q; q = q->next) {
1062
0
            sl = to_soundslike_temp(q->word.str, q->word.size);
1063
0
            score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1064
0
            if (score >= LARGE_NUM) continue;
1065
0
            commit_temp(sl);
1066
0
            ScoreInfo inf;
1067
0
            inf.soundslike = sl;
1068
0
            inf.soundslike_score = score;
1069
0
            add_nearmiss_a(i, q, inf);
1070
0
          }
1071
0
        }
1072
0
      }
1073
42.2k
    }
1074
8.44k
  }
1075
1076
  void Working::try_scan_root() 
1077
747
  {
1078
1079
747
    WordEntry * sw;
1080
747
    WordEntry w;
1081
747
    const char * sl = 0;
1082
747
    EditDist score;
1083
747
    int stopped_at = LARGE_NUM;
1084
747
    GuessInfo gi;
1085
747
    lang->munch(original.word, &gi);
1086
747
    Vector<const char *> sls;
1087
747
    sls.push_back(original.soundslike.str());
1088
#ifdef DEBUG_SUGGEST
1089
    COUT.printf("will try soundslike: %s\n", sls.back());
1090
#endif
1091
747
    for (const aspeller::CheckInfo * ci = gi.head;
1092
856
         ci; 
1093
747
         ci = ci->next) 
1094
109
    {
1095
109
      sl = to_soundslike(ci->word.str, ci->word.len);
1096
109
      Vector<const char *>::iterator i = sls.begin();
1097
208
      while (i != sls.end() && strcmp(*i, sl) != 0) ++i;
1098
109
      if (i == sls.end()) {
1099
67
        sls.push_back(to_soundslike(ci->word.str, ci->word.len));
1100
#ifdef DEBUG_SUGGEST
1101
        COUT.printf("will try root soundslike: %s\n", sls.back());
1102
#endif
1103
67
      }
1104
109
    }
1105
747
    const char * * begin = sls.pbegin();
1106
747
    const char * * end   = sls.pend();
1107
747
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
1108
3.73k
         i != sp->suggest_ws.end();
1109
2.98k
         ++i) 
1110
2.98k
    {
1111
2.98k
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
1112
1113
2.00M
      while ( (sw = els->next(stopped_at)) ) {
1114
          
1115
2.00M
        if (sw->what != WordEntry::Word) {
1116
684k
          sl = sw->word;
1117
684k
          abort_temp();
1118
1.31M
        } else {
1119
1.31M
          sl = to_soundslike_temp(*sw);
1120
1.31M
        } 
1121
1122
2.00M
        stopped_at = LARGE_NUM;
1123
3.85M
        for (const char * * s = begin; s != end; ++s) {
1124
2.54M
          score = edit_dist_fun(sl, *s, 
1125
2.54M
                                parms->edit_distance_weights);
1126
2.54M
          if (score.stopped_at - sl < stopped_at)
1127
2.20M
            stopped_at = score.stopped_at - sl;
1128
2.54M
          if (score >= LARGE_NUM) continue;
1129
685k
          stopped_at = LARGE_NUM;
1130
685k
          commit_temp(sl);
1131
685k
          add_sound(i, sw, sl, score);
1132
          //CERR.printf("using %s: will add %s with score %d\n", *s, sl, (int)score);
1133
685k
          break;
1134
2.54M
        }
1135
2.00M
      }
1136
2.98k
    }
1137
747
  }
1138
1139
  struct ReplTry 
1140
  {
1141
    const char * begin;
1142
    const char * end;
1143
    const char * repl;
1144
    size_t repl_len;
1145
    ReplTry(const char * b, const char * e, const char * r)
1146
0
      : begin(b), end(e), repl(r), repl_len(strlen(r)) {}
1147
  };
1148
1149
  void Working::try_repl() 
1150
740
  {
1151
740
    String buf;
1152
740
    Vector<ReplTry> repl_try;
1153
740
    StackPtr<SuggestReplEnumeration> els(lang->repl());
1154
740
    const SuggestRepl * r = 0;
1155
740
    const char * word = original.clean.str();
1156
740
    const char * wend = word + original.clean.size();
1157
314k
    while (r = els->next(), r) 
1158
313k
    {
1159
313k
      const char * p = word;
1160
339k
      while ((p = strstr(p, r->substr))) {
1161
25.3k
        buf.clear();
1162
25.3k
        buf.append(word, p);
1163
25.3k
        buf.append(r->repl, strlen(r->repl));
1164
25.3k
        p += strlen(r->substr);
1165
25.3k
        buf.append(p, wend + 1);
1166
25.3k
        buf.ensure_null_end();
1167
        //COUT.printf("%s (%s) => %s (%s)\n", word, r->substr, buf.str(), r->repl);
1168
25.3k
        ScoreInfo inf;
1169
25.3k
        inf.word_score = parms->edit_distance_weights.sub*3/2;
1170
25.3k
        inf.repl_table = true;
1171
25.3k
        try_word(buf.pbegin(), buf.pend(), inf);
1172
25.3k
      }
1173
313k
    }
1174
740
  }
1175
1176
  // generate an n-gram score comparing s1 and s2
1177
  static int ngram(int n, char * s1, int l1, const char * s2, int l2)
1178
4.48M
  {
1179
4.48M
    int nscore = 0;
1180
4.48M
    int ns;
1181
4.75M
    for (int j=1;j<=n;j++) {
1182
4.75M
      ns = 0;
1183
64.8M
      for (int i=0;i<=(l1-j);i++) {
1184
60.0M
        char c = *(s1 + i + j);
1185
60.0M
        *(s1 + i + j) = '\0';
1186
60.0M
        if (strstr(s2,(s1+i))) ns++;
1187
60.0M
        *(s1 + i + j ) = c;
1188
60.0M
      }
1189
4.75M
      nscore = nscore + ns;
1190
4.75M
      if (ns < 2) break;
1191
4.75M
    }
1192
4.48M
    ns = 0;
1193
4.48M
    ns = (l2-l1)-2;
1194
4.48M
    return (nscore - ((ns > 0) ? ns : 0));
1195
4.48M
  }
1196
1197
  struct NGramScore {
1198
    SpellerImpl::WS::const_iterator i;
1199
    WordEntry info;
1200
    const char * soundslike;
1201
    int score;
1202
0
    NGramScore() {}
1203
    NGramScore(SpellerImpl::WS::const_iterator i0,
1204
               const WordEntry & info0, const char * sl, int score0)
1205
2.12k
      : i(i0), info(info0), soundslike(sl), score(score0) {}
1206
  };
1207
1208
1209
  void Working::try_ngram()
1210
89
  {
1211
89
    String original_soundslike = original.soundslike;
1212
89
    original_soundslike.ensure_null_end();
1213
89
    WordEntry * sw = 0;
1214
89
    const char * sl = 0;
1215
89
    typedef Vector<NGramScore> Candidates;
1216
89
    hash_set<const char *> already_have;
1217
89
    Candidates candidates;
1218
89
    int min_score = 0;
1219
89
    int count = 0;
1220
1221
89
    for (NearMisses::iterator i = scored_near_misses.begin();
1222
692k
         i != scored_near_misses.end(); ++i)
1223
692k
    {
1224
692k
      if (!i->soundslike)
1225
0
        i->soundslike = to_soundslike(i->word, strlen(i->word));
1226
692k
      already_have.insert(i->soundslike);
1227
692k
    }
1228
1229
89
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
1230
534
         i != sp->suggest_ws.end();
1231
445
         ++i) 
1232
445
    {
1233
445
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
1234
      
1235
4.52M
      while ( (sw = els->next(LARGE_NUM)) ) {
1236
1237
4.52M
        if (sw->what != WordEntry::Word) {
1238
4.52M
          abort_temp();
1239
4.52M
          sl = sw->word;
1240
4.52M
        } else {
1241
0
          sl = to_soundslike_temp(sw->word, sw->word_size);
1242
0
        }
1243
        
1244
4.52M
        if (already_have.have(sl)) continue;
1245
1246
4.48M
        int ng = ngram(3, original_soundslike.data(), original_soundslike.size(),
1247
4.48M
                       sl, strlen(sl));
1248
1249
4.48M
        if (ng > 0 && ng >= min_score) {
1250
2.12k
          commit_temp(sl);
1251
2.12k
          candidates.push_back(NGramScore(i, *sw, sl, ng));
1252
2.12k
          if (ng > min_score) count++;
1253
2.12k
          if (count >= parms->ngram_keep) {
1254
130
            int orig_min = min_score;
1255
130
            min_score = LARGE_NUM;
1256
130
            Candidates::iterator i = candidates.begin();
1257
130
            Candidates::iterator j = candidates.begin();
1258
2.24k
            for (; i != candidates.end(); ++i) {
1259
2.11k
              if (i->score == orig_min) continue;
1260
1.30k
              if (min_score > i->score) min_score = i->score;
1261
1.30k
              *j = *i;
1262
1.30k
              ++j;
1263
1.30k
            }
1264
130
            count = 0;
1265
130
            candidates.resize(j-candidates.begin());
1266
1.43k
            for (i = candidates.begin(); i != candidates.end(); ++i) {
1267
1.30k
              if (i->score != min_score) count++;
1268
1.30k
            }
1269
130
          }
1270
2.12k
        }
1271
4.48M
      }
1272
445
    }
1273
    
1274
89
    for (Candidates::iterator i = candidates.begin();
1275
1.39k
         i != candidates.end();
1276
1.30k
         ++i)
1277
1.30k
    {
1278
      //COUT.printf("ngram: %s %d\n", i->soundslike, i->score);
1279
1.30k
      add_sound(i->i, &i->info, i->soundslike);
1280
1.30k
    }
1281
89
  }
1282
  
1283
17.1k
  void Working::score_list() {
1284
1285
#  ifdef DEBUG_SUGGEST
1286
    COUT.printl("SCORING LIST");
1287
#  endif
1288
1289
17.1k
    try_harder = 3;
1290
17.1k
    if (near_misses.empty()) return;
1291
1292
13.2k
    NearMisses::iterator i;
1293
13.2k
    NearMisses::iterator prev;
1294
1295
13.2k
    near_misses.push_front(ScoreWordSound(this));
1296
    // the first item will NEVER be looked at.
1297
13.2k
    scored_near_misses.push_front(ScoreWordSound(this));
1298
13.2k
    scored_near_misses.front().score = -1;
1299
    // this item will only be looked at when sorting so 
1300
    // make it a small value to keep it at the front.
1301
1302
13.2k
    int try_for = (parms->word_weight*parms->edit_distance_weights.max)/100;
1303
28.1k
    while (true) {
1304
28.1k
      try_for += (parms->word_weight*parms->edit_distance_weights.max)/100;
1305
1306
      // put all pairs whose score <= initial_limit*max_weight
1307
      // into the scored list
1308
1309
28.1k
      prev = near_misses.begin();
1310
28.1k
      i = prev;
1311
28.1k
      ++i;
1312
23.8M
      while (i != near_misses.end()) {
1313
1314
        //CERR.printf("%s %s %s %d %d\n", i->word, i->word_clean, i->soundslike,
1315
        //            i->word_score, i->soundslike_score);
1316
1317
23.8M
        if (i->word_score >= LARGE_NUM) {
1318
16.1M
          int sl_score = i->soundslike_score < LARGE_NUM ? i->soundslike_score : 0;
1319
16.1M
          int level = needed_level(try_for, sl_score);
1320
          
1321
16.1M
          if (level >= int(sl_score/parms->edit_distance_weights.min)) 
1322
13.1M
            i->word_score = edit_distance(original.clean,
1323
13.1M
                                          i->word_clean,
1324
13.1M
                                          level, level,
1325
13.1M
                                          parms->edit_distance_weights);
1326
16.1M
        }
1327
        
1328
23.8M
        if (i->word_score >= LARGE_NUM) goto cont1;
1329
1330
12.5M
        if (i->soundslike_score >= LARGE_NUM) 
1331
3.33M
        {
1332
3.33M
          if (weighted_average(0, i->word_score) > try_for) goto cont1;
1333
1334
221k
          if (i->soundslike == 0) i->soundslike = to_soundslike(i->word, strlen(i->word));
1335
1336
221k
          i->soundslike_score = edit_distance(original.soundslike, i->soundslike, 
1337
221k
                                              parms->edit_distance_weights);
1338
221k
        }
1339
1340
9.44M
        i->score = weighted_average(i->soundslike_score, i->word_score);
1341
1342
9.44M
        if (i->score > try_for + parms->span) goto cont1;
1343
1344
        //CERR.printf("2>%s %s %s %d %d\n", i->word, i->word_clean, i->soundslike,
1345
        //            i->word_score, i->soundslike_score);
1346
1347
2.53M
        scored_near_misses.splice_into(near_misses,prev,i);
1348
        
1349
2.53M
        i = prev; // Yes this is right due to the slice
1350
2.53M
        ++i;
1351
1352
2.53M
        continue;
1353
        
1354
21.3M
      cont1:
1355
21.3M
        prev = i;
1356
21.3M
        ++i;
1357
21.3M
      }
1358
  
1359
28.1k
      scored_near_misses.sort();
1360
  
1361
28.1k
      i = scored_near_misses.begin();
1362
28.1k
      ++i;
1363
  
1364
28.1k
      if (i == scored_near_misses.end()) continue;
1365
  
1366
17.8k
      int k = skip_first_couple(i);
1367
  
1368
17.8k
      if ((k == parms->skip && i->score <= try_for) 
1369
17.8k
    || prev == near_misses.begin() ) // or no more left in near_misses
1370
13.2k
  break;
1371
17.8k
    }
1372
      
1373
13.2k
    threshold = i->score + parms->span;
1374
13.2k
    if (threshold < parms->edit_distance_weights.max)
1375
4.63k
      threshold = parms->edit_distance_weights.max;
1376
1377
#  ifdef DEBUG_SUGGEST
1378
    COUT << "Threshold is: " << threshold << "\n";
1379
    COUT << "try_for: " << try_for << "\n";
1380
    COUT << "Size of scored: " << scored_near_misses.size() << "\n";
1381
    COUT << "Size of ! scored: " << near_misses.size() << "\n";
1382
#  endif
1383
1384
    //if (threshold - try_for <=  parms->edit_distance_weights.max/2) return;
1385
      
1386
13.2k
    prev = near_misses.begin();
1387
13.2k
    i = prev;
1388
13.2k
    ++i;
1389
9.70M
    while (i != near_misses.end()) {
1390
  
1391
9.68M
      if (i->word_score >= LARGE_NUM) {
1392
1393
7.12M
        int sl_score = i->soundslike_score < LARGE_NUM ? i->soundslike_score : 0;
1394
7.12M
        int initial_level = needed_level(try_for, sl_score);
1395
7.12M
        int max_level = needed_level(threshold, sl_score);
1396
        
1397
7.12M
        if (initial_level < max_level)
1398
4.18M
          i->word_score = edit_distance(original.clean.c_str(),
1399
4.18M
                                        i->word_clean,
1400
4.18M
                                        initial_level+1,max_level,
1401
4.18M
                                        parms->edit_distance_weights);
1402
7.12M
      }
1403
1404
9.68M
      if (i->word_score >= LARGE_NUM) goto cont2;
1405
      
1406
4.33M
      if (i->soundslike_score >= LARGE_NUM) 
1407
2.53M
      {
1408
2.53M
        if (weighted_average(0, i->word_score) > threshold) goto cont2;
1409
1410
56.2k
        if (i->soundslike == 0) 
1411
56.2k
          i->soundslike = to_soundslike(i->word, strlen(i->word));
1412
        
1413
56.2k
        i->soundslike_score = edit_distance(original.soundslike, i->soundslike,
1414
56.2k
                                            parms->edit_distance_weights);
1415
56.2k
      }
1416
1417
1.85M
      i->score = weighted_average(i->soundslike_score, i->word_score);
1418
1419
1.85M
      if (i->score > threshold + parms->span) goto cont2;
1420
      
1421
720k
      scored_near_misses.splice_into(near_misses,prev,i);
1422
      
1423
720k
      i = prev; // Yes this is right due to the slice
1424
720k
      ++i;
1425
      
1426
720k
      continue;
1427
  
1428
8.96M
    cont2:
1429
8.96M
  prev = i;
1430
8.96M
  ++i;
1431
        
1432
8.96M
    }
1433
1434
13.2k
    near_misses.pop_front();
1435
1436
13.2k
    scored_near_misses.sort();
1437
13.2k
    scored_near_misses.pop_front();
1438
1439
13.2k
    if (near_misses.empty()) {
1440
6.70k
      try_harder = 1;
1441
6.70k
    } else {
1442
6.57k
      i = scored_near_misses.begin();
1443
6.57k
      skip_first_couple(i);
1444
6.57k
      ++i;
1445
6.57k
      try_harder = i == scored_near_misses.end() ? 2 : 0;
1446
6.57k
    }
1447
1448
#  ifdef DEBUG_SUGGEST
1449
    COUT << "Size of scored: " << scored_near_misses.size() << "\n";
1450
    COUT << "Size of ! scored: " << near_misses.size() << "\n";
1451
    COUT << "Try Harder: " << try_harder << "\n";
1452
#  endif
1453
13.2k
  }
1454
1455
15.6k
  void Working::fine_tune_score(int thres) {
1456
1457
15.6k
    NearMisses::iterator i;
1458
1459
15.6k
    if (parms->use_typo_analysis) {
1460
15.5k
      adj_threshold = 0;
1461
15.5k
      unsigned int j;
1462
      
1463
15.5k
      CharVector orig_norm, word;
1464
15.5k
      orig_norm.resize(original.word.size() + 1);
1465
158k
      for (j = 0; j != original.word.size(); ++j)
1466
142k
        orig_norm[j] = parms->ti->to_normalized(original.word[j]);
1467
15.5k
      orig_norm[j] = 0;
1468
15.5k
      ParmString orig(orig_norm.data(), j);
1469
15.5k
      word.resize(max_word_length + 1);
1470
      
1471
15.5k
      for (i = scored_near_misses.begin();
1472
1.03M
           i != scored_near_misses.end() && i->score <= thres;
1473
1.01M
           ++i)
1474
1.01M
      {
1475
1.01M
        SpecialTypoScore special = special_typo_score(*parms->ti, i->special_edit);
1476
1.01M
        if (special) {
1477
42.2k
          i->word_score = special.score;
1478
42.2k
          i->soundslike_score = i->word_score;
1479
42.2k
          i->adj_score = i->word_score;
1480
42.2k
        }
1481
1.01M
        if (i->adj_score >= LARGE_NUM) {
1482
776k
          if (!special) {
1483
2.50M
            for (j = 0; (i->word)[j] != 0; ++j)
1484
1.73M
              word[j] = parms->ti->to_normalized((i->word)[j]);
1485
776k
            word[j] = 0;
1486
776k
            int new_score = typo_edit_distance(ParmString(word.data(), j), orig, *parms->ti);
1487
            // if a repl. table was used we don't want to increase the score
1488
776k
            if (!i->repl_table || new_score < i->word_score)
1489
776k
              i->word_score = new_score;
1490
776k
          }
1491
776k
          if (!special.is_overall_score) 
1492
776k
            i->adj_score = adj_wighted_average(i->soundslike_score, i->word_score, parms->ti->max);
1493
776k
        }
1494
1.01M
        if (i->adj_score > adj_threshold)
1495
52.3k
          adj_threshold = i->adj_score;
1496
1.01M
      }
1497
15.5k
    } else {
1498
188
      for (i = scored_near_misses.begin();
1499
688k
           i != scored_near_misses.end() && i->score <= thres;
1500
688k
           ++i)
1501
688k
      {
1502
688k
        i->adj_score = i->score;
1503
688k
      }
1504
188
      adj_threshold = threshold;
1505
188
    }
1506
    
1507
1.78M
    for (; i != scored_near_misses.end(); ++i) {
1508
1.77M
      if (i->adj_score > adj_threshold)
1509
1.76M
        i->adj_score = LARGE_NUM;
1510
1.77M
    }
1511
15.6k
  }
1512
1513
  struct StrEquals {
1514
545k
    bool operator() (const char * x, const char * y) const {
1515
545k
      return strcmp(x,y) == 0;
1516
545k
    }
1517
  };
1518
  typedef hash_set<const char *,hash<const char *>,StrEquals> StrHashSet;
1519
1520
26.2k
  char * Working::fix_word(ObjStack & buf, ParmStr w) {
1521
26.2k
    size_t sz = prefix.size() + w.size() + suffix.size();
1522
26.2k
    char * word = static_cast<char *>(buf.alloc(sz + 1));
1523
26.2k
    char * i = word;
1524
26.2k
    memcpy(i, prefix.c_str(), prefix.size());
1525
26.2k
    i += prefix.size();
1526
26.2k
    memcpy(i, w.str(), w.size() + 1);
1527
26.2k
    fix_case(i);
1528
26.2k
    i += w.size();
1529
26.2k
    memcpy(i, suffix.c_str(), suffix.size() + 1);
1530
26.2k
    return word;
1531
26.2k
  }
1532
1533
7.53k
  void Sugs::transfer(SuggestionsImpl & res, int limit) {
1534
    // FIXME: double check that conv->in_code() is correct
1535
7.53k
    res.reset();
1536
//#  ifdef DEBUG_SUGGEST
1537
//    COUT << "\n" << "\n" 
1538
//   << original.word << '\t' 
1539
//   << original.soundslike << '\t'
1540
//   << "\n";
1541
//    String sl;
1542
//#  endif
1543
7.53k
    StrHashSet duplicates_check;
1544
7.53k
    pair<StrHashSet::iterator, bool> dup_pair;
1545
7.53k
    for (NearMisses::const_iterator i = scored_near_misses.begin();
1546
729k
   i != scored_near_misses.end() && res.size() < limit
1547
729k
           && ( i->adj_score < LARGE_NUM || res.size() < 3);
1548
722k
   ++i) {
1549
#    ifdef DEBUG_SUGGEST
1550
      //COUT.printf("%p %p: ",  i->word, i->soundslike);
1551
      COUT << i->word
1552
           << '\t' << i->adj_score 
1553
           << '\t' << i->score 
1554
           << '\t' << i->word_score
1555
           << '\t' << i->soundslike
1556
           << '\t' << i->soundslike_score << "\n";
1557
#    endif
1558
722k
      Working * src = i->src;
1559
722k
      if (i->repl_list != 0) {
1560
0
  do {
1561
0
          const char * word = i->src->fix_word(res.buf, i->repl_list->word);
1562
0
    dup_pair = duplicates_check.insert(word);
1563
0
    if (dup_pair.second) {
1564
0
            const char * pos = strchr(word, ' ');
1565
0
            bool in_dict;
1566
0
            if (pos == NULL)
1567
0
              in_dict = src->sp->check(word);
1568
0
            else
1569
0
              in_dict = src->sp->check(word, pos - word) && src->sp->check(pos + 1);
1570
0
            if (in_dict)
1571
0
              res.push_back(Suggestion(word,&*i));
1572
0
          }
1573
0
        } while (i->repl_list->adv());
1574
722k
      } else {
1575
722k
        char * word = src->have_presuf ? src->fix_word(res.buf, i->word) : src->fix_case(i->word);
1576
722k
  dup_pair = duplicates_check.insert(word);
1577
722k
  if (dup_pair.second)
1578
498k
          res.push_back(Suggestion(word,&*i));
1579
722k
      }
1580
722k
    }
1581
15.7k
    for (Vector<Working *>::iterator i = srcs.begin(), e = srcs.end(); i != e; ++i) {
1582
8.21k
      res.saved_bufs_.push_back((*i)->buffer.freeze());
1583
8.21k
    }
1584
7.53k
    res.saved_near_misses_.swap(scored_near_misses);
1585
7.53k
  }
1586
  
1587
  class SuggestionListImpl : public SuggestionList {
1588
    struct Parms {
1589
      typedef const char *                    Value;
1590
      typedef SuggestionsImpl::const_iterator Iterator;
1591
      Iterator end;
1592
7.53k
      Parms(Iterator e) : end(e) {}
1593
505k
      bool endf(Iterator e) const {return e == end;}
1594
7.53k
      Value end_state() const {return 0;}
1595
498k
      Value deref(Iterator i) const {return i->word;}
1596
    };
1597
  public:
1598
    SuggestionsImpl suggestions;
1599
1600
    //SuggestionList * clone() const {return new SuggestionListImpl(*this);}
1601
    //void assign(const SuggestionList * other) {
1602
    //  *this = *static_cast<const SuggestionListImpl *>(other);
1603
    //}
1604
1605
0
    bool empty() const { return suggestions.empty(); }
1606
0
    Size size() const { return suggestions.size(); }
1607
7.53k
    VirEmul * elements() const {
1608
7.53k
      return new MakeEnumeration<Parms, StringEnumeration>
1609
7.53k
  (suggestions.begin(), Parms(suggestions.end()));
1610
7.53k
    }
1611
  };
1612
1613
  class SuggestImpl : public Suggest {
1614
    SpellerImpl * speller_;
1615
    SuggestionListImpl  suggestion_list;
1616
    SuggestParms parms_;
1617
  public:
1618
1.43k
    SuggestImpl(SpellerImpl * sp) : speller_(sp) {}
1619
    PosibErr<void> setup(String mode = "");
1620
0
    PosibErr<void> set_mode(ParmString mode) {
1621
0
      return setup(mode);
1622
0
    }
1623
    SuggestionList & suggest(const char * word);
1624
    SuggestionsData & suggestions(const char * word);
1625
  };
1626
  
1627
  PosibErr<void> SuggestImpl::setup(String mode)
1628
1.43k
  {
1629
1.43k
    if (mode == "") 
1630
1.43k
      mode = speller_->config()->retrieve("sug-mode");
1631
    
1632
1.43k
    RET_ON_ERR(parms_.init(mode, speller_, speller_->config()));
1633
1634
1.42k
    return no_err;
1635
1.43k
  }
1636
1637
7.53k
  SuggestionList & SuggestImpl::suggest(const char * word) { 
1638
#   ifdef DEBUG_SUGGEST
1639
    COUT << "=========== begin suggest " << word << " ===========\n";
1640
#   endif
1641
7.53k
    Working * sug = new Working(speller_, &speller_->lang(),word, &parms_);
1642
7.53k
    Sugs * sugs = sug->suggestions();
1643
7.53k
    CheckInfo ci[8];
1644
7.53k
    SpellerImpl::CompoundInfo cpi;
1645
7.53k
    String buf = word;
1646
7.53k
    char * str = buf.mstr();
1647
7.53k
    speller_->check(str, str + buf.size(), false, speller_->run_together_limit(), ci, ci + 8, NULL, &cpi);
1648
7.53k
    if (cpi.count > 1 && cpi.incorrect_count == 1) {
1649
684
      CheckInfo * ci = cpi.first_incorrect;
1650
684
      String prefix(str, ci->word.str - str), middle(ci->word.str, ci->word.len), suffix(ci->word.str + ci->word.len);
1651
684
      sug = new Working(speller_, &speller_->lang(), middle, &parms_);
1652
684
      sug->camel_case = false;
1653
684
      sug->with_presuf(prefix, suffix);
1654
684
      Sugs * sugs2 = sug->suggestions();
1655
684
      sugs->merge(*sugs2);
1656
684
      delete sugs2;
1657
684
    }
1658
7.53k
    sugs->transfer(suggestion_list.suggestions, parms_.limit);
1659
7.53k
    delete sugs;
1660
#   ifdef DEBUG_SUGGEST
1661
    COUT << "^^^^^^^^^^^  end suggest " << word << "  ^^^^^^^^^^^\n";
1662
#   endif
1663
7.53k
    return suggestion_list;
1664
7.53k
  }
1665
1666
0
  SuggestionsData & SuggestImpl::suggestions(const char * word) {
1667
0
    suggest(word);
1668
0
    return suggestion_list.suggestions;
1669
0
  }
1670
}
1671
1672
namespace aspeller {
1673
1.43k
  PosibErr<Suggest *> new_default_suggest(SpellerImpl * m) {
1674
1.43k
    StackPtr<SuggestImpl> s(new SuggestImpl(m));
1675
1.43k
    RET_ON_ERR(s->setup());
1676
1.42k
    return s.release();
1677
1.43k
  }
1678
1679
1.43k
  PosibErr<void> SuggestParms::init(ParmString mode, SpellerImpl * sp) {
1680
1681
1.43k
    edit_distance_weights.del1 =  95;
1682
1.43k
    edit_distance_weights.del2 =  95;
1683
1.43k
    edit_distance_weights.swap =  90;
1684
1.43k
    edit_distance_weights.sub =  100;
1685
1.43k
    edit_distance_weights.similar = 10;
1686
1.43k
    edit_distance_weights.max = 100;
1687
1.43k
    edit_distance_weights.min =  90;
1688
1689
1.43k
    soundslike_weight = 50;
1690
1691
1.43k
    split_chars = " -";
1692
1.43k
    camel_case = false;
1693
1694
1.43k
    skip = 2;
1695
1.43k
    limit = 100;
1696
1.43k
    span = 50;
1697
1.43k
    ngram_keep = 10;
1698
1.43k
    use_typo_analysis = true;
1699
1.43k
    use_repl_table = sp->have_repl;
1700
1.43k
    try_one_edit_word = true; // always a good idea, even when
1701
                              // soundslike lookup is used
1702
1.43k
    check_after_one_edit_word = false;
1703
1.43k
    try_scan_0 = false;
1704
1.43k
    try_scan_1 = false;
1705
1.43k
    try_scan_2 = false;
1706
1.43k
    try_ngram = false;
1707
1.43k
    ngram_threshold = 2;
1708
1709
1.43k
    if (mode == "ultra") {
1710
4
      try_scan_0 = true;
1711
1.42k
    } else if (mode == "fast") {
1712
0
      try_scan_1 = true;
1713
1.42k
    } else if (mode == "normal") {
1714
1.40k
      try_scan_1 = true;
1715
1.40k
      try_scan_2 = true;
1716
1.40k
    } else if (mode == "slow") {
1717
12
      try_scan_2 = true;
1718
12
      try_ngram = true;
1719
12
      limit = 1000;
1720
12
      ngram_threshold = sp->have_soundslike ? 1 : 2;
1721
12
    } else if (mode == "bad-spellers") {
1722
6
      try_scan_2 = true;
1723
6
      try_ngram = true;
1724
6
      use_typo_analysis = false;
1725
6
      soundslike_weight = 55;
1726
6
      span = 125;
1727
6
      limit = 1000;
1728
6
      ngram_threshold = 1;
1729
6
    } else {
1730
1
      return make_err(bad_value, "sug-mode", mode, _("one of ultra, fast, normal, slow, or bad-spellers"));
1731
1
    }
1732
1733
1.42k
    if (!sp->have_soundslike) {
1734
      // in this case try_scan_0/1 will not get better results than
1735
      // try_one_edit_word
1736
0
      if (try_scan_0 || try_scan_1) {
1737
0
        check_after_one_edit_word = true;
1738
0
        try_scan_0 = false;
1739
0
        try_scan_1 = false;
1740
0
      }
1741
0
    }
1742
1743
1.42k
    word_weight = 100 - soundslike_weight;
1744
    
1745
1.42k
    return no_err;
1746
1.43k
  }
1747
1748
1.43k
  PosibErr<void> SuggestParms::init(ParmString mode, SpellerImpl * sp, Config * config) {
1749
1.43k
    RET_ON_ERR(init(mode,sp));
1750
1751
1.42k
    if (config->have("sug-typo-analysis"))
1752
0
      use_typo_analysis = config->retrieve_bool("sug-typo-analysis");
1753
1.42k
    if (config->have("sug-repl-table"))
1754
0
      use_repl_table = config->retrieve_bool("sug-repl-table");
1755
1756
1.42k
    camel_case = config->retrieve_bool("camel-case");
1757
1.42k
    if (camel_case)
1758
47
      split_chars.clear();
1759
1760
1.42k
    if (!camel_case || config->have("sug-split-char")) {
1761
1.38k
      StringList sl;
1762
1.38k
      config->retrieve_list("sug-split-char", &sl);
1763
1.38k
      StringListEnumeration els = sl.elements_obj();
1764
1.38k
      const char * s;
1765
1.38k
      split_chars.clear();
1766
4.76k
      while ((s = els.next()) != 0) {
1767
3.38k
        split_chars.push_back(*s);
1768
3.38k
      }
1769
1.38k
    }
1770
1771
1.42k
    if (use_typo_analysis) {
1772
1.42k
      String keyboard = config->retrieve("keyboard");
1773
1.42k
      RET_ON_ERR(aspeller::setup(ti, config, &sp->lang(), keyboard));
1774
1.42k
    }
1775
    
1776
1.42k
    return no_err;
1777
1.42k
  }
1778
  
1779
}
1780