Coverage Report

Created: 2023-12-08 06:59

/src/aspell/modules/speller/default/suggest.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2000-2005 by Kevin Atkinson under the terms of the LGPL
2
3
// suggest.cpp Suggestion code for Aspell
4
5
// The magic behind my spell checker comes from merging Lawrence
6
// Philips excellent metaphone algorithm and Ispell's near miss
7
// strategy which is inserting a space or hyphen, interchanging two
8
// adjacent letters, changing one letter, deleting a letter, or adding
9
// a letter.
10
// 
11
// The process goes something like this.
12
// 
13
// 1.     Convert the misspelled word to its soundslike equivalent (its
14
//        metaphone for English words).
15
// 
16
// 2.     Find words that have the same soundslike pattern.
17
//
18
// 3.     Find words that have similar soundslike patterns. A similar
19
//        soundlike pattern is a pattern that is obtained by
20
//        interchanging two adjacent letters, changing one letter,
21
//        deleting a letter, or adding a letter.
22
//
23
// 4.     Score the result list and return the words with the lowest
24
//        score. The score is roughly the weighed average of the edit
25
//        distance of the word to the misspelled word, the soundslike
26
//        equivalent of the two words, and the phoneme of the two words.
27
//        The edit distance is the weighed total of the number of
28
//        deletions, insertions, exchanges, or adjacent swaps needed to
29
//        make one string equivalent to the other.
30
//
31
// Please note that the soundlike equivalent is a rough approximation
32
// of how the words sounds. It is not the phoneme of the word by any
33
// means.  For more information on the metaphone algorithm please see
34
// the file metaphone.cc which included a detailed description of it.
35
//
36
// NOTE: It is assumed that that strlen(soundslike) <= strlen(word)
37
//       for any possible word
38
39
// POSSIBLE OPTIMIZATION:
40
//   store the number of letters that are the same as the previous 
41
//     soundslike so that it can possible be skipped
42
43
#include "getdata.hpp"
44
45
#include "fstream.hpp"
46
47
#include "speller_impl.hpp"
48
#include "asuggest.hpp"
49
#include "basic_list.hpp"
50
#include "clone_ptr-t.hpp"
51
#include "config.hpp"
52
#include "data.hpp"
53
#include "editdist.hpp"
54
#include "editdist2.hpp"
55
#include "errors.hpp"
56
#include "file_data_util.hpp"
57
#include "hash-t.hpp"
58
#include "language.hpp"
59
#include "leditdist.hpp"
60
#include "speller_impl.hpp"
61
#include "stack_ptr.hpp"
62
#include "suggest.hpp"
63
#include "vararray.hpp"
64
#include "string_list.hpp"
65
66
#include "gettext.h"
67
68
//#include "iostream.hpp"
69
//#define DEBUG_SUGGEST
70
71
using namespace aspeller;
72
using namespace acommon;
73
using std::pair;
74
75
namespace {
76
77
  template <class Iterator>
78
144k
  inline Iterator preview_next (Iterator i) {
79
144k
    return ++i;
80
144k
  }
81
  
82
  //
83
  // OriginalWord stores information about the original misspelled word
84
  //   for convince and speed.
85
  //
86
  struct OriginalWord {
87
    String   word;
88
    String   lower;
89
    String   clean;
90
    String   soundslike;
91
    CasePattern  case_pattern;
92
10.9k
    OriginalWord() {}
93
  };
94
95
  //
96
  // struct ScoreWordSound - used for storing the possible words while
97
  //   they are being processed.
98
  //
99
100
  static const char * NO_SOUNDSLIKE = "";
101
102
  class Working;
103
104
  enum SpecialEdit {None, Split, CamelSplit, CamelJoin, CamelOffByOne};
105
106
28.4k
  static inline int special_score(const EditDistanceWeights & w, SpecialEdit e) {
107
28.4k
    switch (e) {
108
26.6k
    case Split:
109
26.6k
      return w.max + 2;
110
1.27k
    case CamelJoin:
111
1.27k
      return w.max + 1;
112
52
    case CamelSplit:
113
52
      return w.max + 1;
114
483
    case CamelOffByOne:
115
483
      return w.swap - 1;
116
0
    default:
117
0
      abort();
118
28.4k
    }
119
28.4k
  }
120
121
  struct SpecialTypoScore {
122
    int score;
123
    bool is_overall_score;
124
2.45M
    operator bool() const {return score < LARGE_NUM;}
125
    SpecialTypoScore()
126
1.32M
      : score(LARGE_NUM), is_overall_score(false) {}
127
    SpecialTypoScore(int s, bool q)
128
55.6k
      : score(s), is_overall_score(q) {}
129
  };
130
  
131
1.38M
  static inline SpecialTypoScore special_typo_score(const TypoEditDistanceInfo & w, SpecialEdit e) {
132
1.38M
    switch (e) {
133
1.32M
    case None:
134
1.32M
      return SpecialTypoScore();
135
52.0k
    case Split:
136
52.0k
      return SpecialTypoScore(w.max + 2, true);
137
72
    case CamelSplit:
138
72
      return SpecialTypoScore(w.max + 1, true);
139
2.52k
    case CamelJoin:
140
2.52k
      return SpecialTypoScore(w.max + 1, true);
141
966
    case CamelOffByOne:
142
966
      return SpecialTypoScore(w.swap - 1, false);
143
0
    default:
144
0
      abort();
145
1.38M
    }
146
1.38M
  }
147
148
  struct ScoreWordSound {
149
    Working * src;
150
    char * word;
151
    char * word_clean;
152
    //unsigned word_size;
153
    const char * soundslike;
154
    int           score;
155
    int           adj_score;
156
    int           word_score;
157
    int           soundslike_score;
158
    bool          count;
159
    SpecialEdit   special_edit;
160
    bool          repl_table;
161
    WordEntry * repl_list;
162
12.4M
    ScoreWordSound(Working * s) : src(s), adj_score(LARGE_NUM), repl_list(0) {}
163
24.8M
    ~ScoreWordSound() {delete repl_list;}
164
  };
165
166
  inline int compare (const ScoreWordSound &lhs, 
167
          const ScoreWordSound &rhs) 
168
67.8M
  {
169
67.8M
    int temp = lhs.score - rhs.score;
170
67.8M
    if (temp) return temp;
171
45.7M
    return strcmp(lhs.word,rhs.word);
172
67.8M
  }
173
174
  inline int adj_score_lt(const ScoreWordSound &lhs,
175
                          const ScoreWordSound &rhs)
176
23.9M
  {
177
23.9M
    int temp = lhs.adj_score - rhs.adj_score;
178
23.9M
    if (temp) return temp < 0;
179
19.8M
    return strcmp(lhs.word,rhs.word) < 0;
180
23.9M
  }
181
182
  inline bool operator < (const ScoreWordSound & lhs, 
183
67.8M
        const ScoreWordSound & rhs) {
184
67.8M
    return compare(lhs, rhs) < 0;
185
67.8M
  }
186
187
  inline bool operator <= (const ScoreWordSound & lhs, 
188
0
         const ScoreWordSound & rhs) {
189
0
    return compare(lhs, rhs) <= 0;
190
0
  }
191
192
  inline bool operator == (const ScoreWordSound & lhs, 
193
0
         const ScoreWordSound & rhs) {
194
0
    return compare(lhs, rhs) == 0;
195
0
  }
196
197
  typedef BasicList<ScoreWordSound> NearMisses;
198
 
199
  class Sugs;
200
  
201
  class Working {
202
    friend class Sugs;
203
204
    const Language *     lang;
205
    OriginalWord         original;
206
    const SuggestParms * parms;
207
    SpellerImpl *        sp;
208
209
    String prefix;
210
    String suffix;
211
    bool have_presuf;
212
213
    int threshold;
214
    int adj_threshold;
215
    int try_harder;
216
217
    EditDist (* edit_dist_fun)(const char *, const char *,
218
                               const EditDistanceWeights &);
219
220
    unsigned int max_word_length;
221
222
    NearMisses         scored_near_misses;
223
    NearMisses         near_misses;
224
225
    char * temp_end;
226
227
    ObjStack           buffer;
228
    ObjStack           temp_buffer;
229
230
    static const bool do_count = true;
231
    static const bool dont_count = false;
232
233
    CheckInfo check_info[8];
234
235
2.35M
    void commit_temp(const char * b) {
236
2.35M
      if (temp_end) {
237
705k
        buffer.resize_temp(temp_end - b + 1);
238
705k
        buffer.commit_temp();
239
705k
        temp_end = 0; }}
240
47.0M
    void abort_temp() {
241
47.0M
      buffer.abort_temp();
242
47.0M
      temp_end = 0;}
243
0
    const char * to_soundslike_temp(const char * w, unsigned s, unsigned * len = 0) {
244
0
      char * sl = (char *)buffer.alloc_temp(s + 1);
245
0
      temp_end = lang->LangImpl::to_soundslike(sl, w, s);
246
0
      if (len) *len = temp_end - sl;
247
0
      return sl;}
248
526k
    const char * to_soundslike_temp(const WordEntry & sw) {
249
526k
      char * sl = (char *)buffer.alloc_temp(sw.word_size + 1);
250
526k
      temp_end = lang->LangImpl::to_soundslike(sl, sw.word, sw.word_size, sw.word_info);
251
526k
      if (temp_end == 0) return sw.word;
252
526k
      else return sl;}
253
318k
    const char * to_soundslike(const char * w, unsigned s) {
254
318k
      char * sl = (char *)buffer.alloc_temp(s + 1);
255
318k
      temp_end = lang->LangImpl::to_soundslike(sl, w, s);
256
318k
      commit_temp(sl);
257
318k
      return sl;}
258
259
    struct ScoreInfo {
260
      const char *  soundslike;
261
      int           word_score;
262
      int           soundslike_score;
263
      bool          count;
264
      SpecialEdit   special_edit;
265
      bool          repl_table;
266
      WordEntry *   repl_list;
267
      ScoreInfo()
268
        : soundslike(), word_score(LARGE_NUM), soundslike_score(LARGE_NUM),
269
17.5M
          count(true), special_edit(None), repl_table(false), repl_list() {}
270
    };
271
272
883k
    char * fix_case(char * str) {
273
883k
      lang->LangImpl::fix_case(original.case_pattern, str, str);
274
883k
      return str;
275
883k
    }
276
0
    const char * fix_case(const char * str, String & buf) {
277
0
      return lang->LangImpl::fix_case(original.case_pattern, str, buf);
278
0
    }
279
280
    char * fix_word(ObjStack & buf, ParmStr w);
281
282
    MutableString form_word(CheckInfo & ci);
283
    void try_word_n(ParmString str, const ScoreInfo & inf);
284
    bool check_word_s(ParmString word, CheckInfo * ci);
285
    unsigned check_word(char * word, char * word_end, CheckInfo * ci,
286
                        /* it WILL modify word */
287
                        unsigned pos = 1);
288
    void try_word_c(char * word, char * word_end, const ScoreInfo & inf);
289
290
5.46M
    void try_word(char * word, char * word_end, const ScoreInfo & inf) {
291
5.46M
      if (sp->unconditional_run_together_)
292
70.0k
        try_word_c(word,word_end,inf);
293
5.39M
      else
294
5.39M
        try_word_n(word,inf);
295
5.46M
    }
296
5.46M
    void try_word(char * word, char * word_end, int score) {
297
5.46M
      ScoreInfo inf;
298
5.46M
      inf.word_score = score;
299
5.46M
      try_word(word, word_end, inf);
300
5.46M
    }
301
302
    void add_sound(SpellerImpl::WS::const_iterator i,
303
                   WordEntry * sw, const char * sl, int score = LARGE_NUM);
304
    void add_nearmiss(char * word, unsigned word_size, WordInfo word_info,
305
                      const ScoreInfo &);
306
    void add_nearmiss_w(SpellerImpl::WS::const_iterator, const WordEntry & w,
307
                        const ScoreInfo &);
308
    void add_nearmiss_a(SpellerImpl::WS::const_iterator, const WordAff * w,
309
                        const ScoreInfo &);
310
0
    bool have_score(int score) {return score < LARGE_NUM;}
311
31.4M
    int needed_level(int want, int soundslike_score) {
312
      // (word_weight*??? + soundlike_weight*soundslike_score)/100 <= want
313
      // word_weight*??? + soundlike_weight*soundslike_score <= want*100
314
      // word_weight*??? <= want*100 - soundlike_weight*soundslike_score
315
      // ??? <= (want*100 - soundlike_weight*soundslike_score) / word_weight
316
      // level = ceil(???/edit_distance_weights.min)
317
31.4M
      int n = 100*want - parms->soundslike_weight*soundslike_score;
318
31.4M
      if (n <= 0) return 0;
319
30.4M
      int d = parms->word_weight*parms->edit_distance_weights.min;
320
30.4M
      return (n-1)/d+1; // roundup
321
31.4M
    }
322
21.2M
    int weighted_average(int soundslike_score, int word_score) {
323
21.2M
      return (parms->word_weight*word_score 
324
21.2M
        + parms->soundslike_weight*soundslike_score)/100;
325
21.2M
    }
326
1.07M
    int adj_wighted_average(int soundslike_score, int word_score, int one_edit_max) {
327
1.07M
      int soundslike_weight = parms->soundslike_weight;
328
1.07M
      int word_weight = parms->word_weight;
329
1.07M
      if (word_score <= one_edit_max) {
330
335k
        const int factor = word_score < 100 ? 8 : 2;
331
335k
        soundslike_weight = (parms->soundslike_weight+factor-1)/factor;
332
335k
      }
333
      // NOTE: Theoretical if the soundslike is might be beneficial to
334
      // adjust the word score so it doesn't contribute as much.  If
335
      // the score is already around 100 (one edit dist) then it may
336
      // not be a good idea to lower it more, but if the word score is
337
      // 200 or more then it might make sence to reduce it some.
338
      // HOWEVER, this will likely not work well, espacally with small
339
      // words and there are just too many words with the same
340
      // soundlike.  In any case that what the special "soundslike"
341
      // and "bad-spellers" mode is for.
342
1.07M
      return (word_weight*word_score
343
1.07M
              + soundslike_weight*soundslike_score)/100;
344
1.07M
    }
345
36.5k
    int skip_first_couple(NearMisses::iterator & i) {
346
36.5k
      int k = 0;
347
36.5k
      InsensitiveCompare cmp(lang);
348
36.5k
      const char * prev_word = "";
349
144k
      while (preview_next(i) != scored_near_misses.end()) 
350
  // skip over the first couple of items as they should
351
  // not be counted in the threshold score.
352
138k
      {
353
138k
  if (!i->count || cmp(prev_word, i->word) == 0) {
354
40.4k
    ++i;
355
97.9k
  } else if (k == parms->skip) {
356
30.0k
    break;
357
67.9k
  } else {
358
67.9k
          prev_word = i->word;
359
67.9k
    ++k;
360
67.9k
    ++i;
361
67.9k
  }
362
138k
      }
363
36.5k
      return k;
364
36.5k
    }
365
366
    void try_camel_word(String & word, SpecialEdit edit);
367
368
    void try_split();
369
    void try_camel_edits();
370
    void try_one_edit_word();
371
    void try_scan();
372
    void try_scan_root();
373
    void try_repl();
374
    void try_ngram();
375
376
    void score_list();
377
    void fine_tune_score(int thres);
378
  public:
379
    Working(SpellerImpl * m, const Language *l,
380
      const String & w, const SuggestParms * p)
381
      : lang(l), original(), parms(p), sp(m), have_presuf(false) 
382
      , threshold(1), max_word_length(0)
383
10.9k
    {
384
10.9k
      memset(static_cast<void *>(check_info), 0, sizeof(check_info));
385
10.9k
      original.word = w;
386
10.9k
      l->to_lower(original.lower, w.str());
387
10.9k
      l->to_clean(original.clean, w.str());
388
10.9k
      l->to_soundslike(original.soundslike, w.str());
389
10.9k
      original.case_pattern = l->case_pattern(w);
390
10.9k
      camel_case = parms->camel_case;
391
10.9k
    }
392
343
    void with_presuf(ParmStr pre, ParmStr suf) {
393
343
      prefix = pre;
394
343
      suffix = suf;
395
343
      have_presuf = true;
396
343
    }
397
    bool camel_case;
398
    // `this` is expected to be allocated with new and its ownership
399
    // will be transferred to the returning Sugs object
400
    Sugs * suggestions(); 
401
  };
402
403
  struct Suggestion {
404
    const char * word;
405
    const ScoreWordSound * inf;
406
0
    double distance() const {
407
0
      return inf->adj_score/100.0;
408
0
    }
409
0
    double normalized_score() const {
410
0
      return 100.0/(inf->adj_score + 100);
411
0
    }
412
0
    Suggestion() : word(), inf() {}
413
    Suggestion(const char * word, const ScoreWordSound * inf)
414
627k
      : word(word), inf(inf) {}
415
  };
416
417
  struct SavedBufs : public Vector<ObjStack::Memory *> {
418
11.9k
    void reset() {
419
11.9k
      for (Vector<ObjStack::Memory *>::iterator i = begin(), e = end();
420
22.8k
           i != e; ++i)
421
10.9k
        ObjStack::dealloc(*i);
422
11.9k
      clear();
423
11.9k
    }
424
1.40k
    ~SavedBufs() {
425
1.40k
      reset();
426
1.40k
    }
427
  };
428
429
  class SuggestionsImpl;
430
431
  class Sugs {
432
  public:
433
    Vector<Working *> srcs;
434
    NearMisses scored_near_misses;
435
436
343
    void merge(Sugs & other) {
437
343
      srcs.insert(srcs.end(), other.srcs.begin(), other.srcs.end());
438
343
      other.srcs.clear();
439
343
      scored_near_misses.merge(other.scored_near_misses, adj_score_lt);
440
343
    }
441
442
    void transfer(SuggestionsImpl &, int limit);
443
    
444
10.9k
    Sugs(Working * s) {
445
10.9k
      srcs.push_back(s);
446
10.9k
    }
447
10.9k
    ~Sugs() {
448
21.8k
      for (Vector<Working *>::iterator i = srcs.begin(), e = srcs.end(); i != e; ++i) {
449
10.9k
        delete *i;
450
10.9k
        *i = NULL;
451
10.9k
      }
452
10.9k
    }
453
  };
454
455
  class SuggestionsImpl : public SuggestionsData, public Vector<Suggestion> {
456
  public:
457
    SavedBufs   saved_bufs_;
458
    NearMisses  saved_near_misses_;
459
    ObjStack    buf;
460
1.40k
    SuggestionsImpl() {}
461
  private:
462
    SuggestionsImpl(const SuggestionsImpl &);
463
  public:
464
10.5k
    void reset() {
465
10.5k
      clear();
466
10.5k
      buf.reset();
467
10.5k
      saved_bufs_.reset();
468
10.5k
      saved_near_misses_.clear();
469
10.5k
    }
470
0
    void get_words(Convert * conv, Vector<CharVector> & res) {
471
0
      res.clear();
472
0
      res.reserve(size());
473
0
      if (conv) {
474
0
        for (iterator i = begin(), e = end(); i != e; ++i) {
475
0
          res.push_back(CharVector());
476
          // len + 1 to also convert the null
477
0
          conv->convert(i->word, strlen(i->word) + 1, res.back());
478
0
        }
479
0
      } else {
480
0
        for (iterator i = begin(), e = end(); i != e; ++i) {
481
0
          res.push_back(CharVector());
482
0
          res.reserve(strlen(i->word) + 1);
483
0
          res.back().append(i->word);
484
0
          res.back().append('\0');
485
0
        }
486
0
      }
487
0
    }
488
0
    void get_normalized_scores(Vector<double> & res) {
489
0
      res.clear();
490
0
      res.reserve(size());
491
0
      for (iterator i = begin(), e = end(); i != e; ++i)
492
0
        res.push_back(i->normalized_score());
493
0
    }
494
0
    void get_distances(Vector<double> & res) {
495
0
      res.clear();
496
0
      res.reserve(size());
497
0
      for (iterator i = begin(), e = end(); i != e; ++i)
498
0
        res.push_back(i->distance());
499
0
    }
500
  };
501
502
10.9k
  Sugs * Working::suggestions() {
503
504
10.9k
    Sugs * sug = new Sugs(this);
505
506
10.9k
    if (original.word.size() * parms->edit_distance_weights.max >= 0x8000)
507
257
      return sug; // to prevent overflow in the editdist functions
508
509
10.6k
    try_split();
510
511
10.6k
    try_camel_edits();
512
513
10.6k
    if (parms->use_repl_table) {
514
515
#ifdef DEBUG_SUGGEST
516
      COUT.printl("TRYING REPLACEMENT TABLE");
517
#endif
518
519
865
      try_repl();
520
865
    }
521
522
10.6k
    if (parms->try_one_edit_word) {
523
524
#ifdef DEBUG_SUGGEST
525
      COUT.printl("TRYING ONE EDIT WORD");
526
#endif
527
528
10.6k
      try_one_edit_word();
529
10.6k
      score_list();
530
10.6k
      if (parms->check_after_one_edit_word) {
531
0
        if (try_harder <= 0) goto done;
532
0
      }
533
      // need to fine tune the score to account for special weights
534
      // applied to typos, otherwise some typos that produce very
535
      // different soundslike may be missed
536
10.6k
      fine_tune_score(LARGE_NUM);
537
10.6k
    }
538
539
10.6k
    if (parms->try_scan_0) {
540
      
541
#ifdef DEBUG_SUGGEST
542
      COUT.printl("TRYING SCAN 0");
543
#endif
544
403
      edit_dist_fun = limit0_edit_distance;
545
      
546
403
      if (sp->soundslike_root_only)
547
359
        try_scan_root();
548
44
      else
549
44
        try_scan();
550
551
403
      score_list();
552
      
553
403
    }
554
555
10.6k
    if (parms->try_scan_1) {
556
      
557
#ifdef DEBUG_SUGGEST
558
      COUT.printl("TRYING SCAN 1");
559
#endif
560
9.88k
      edit_dist_fun = limit1_edit_distance;
561
562
9.88k
      if (sp->soundslike_root_only)
563
499
        try_scan_root();
564
9.38k
      else
565
9.38k
        try_scan();
566
567
9.88k
      score_list();
568
      
569
9.88k
      if (try_harder <= 0) goto done;
570
571
9.88k
    }
572
573
2.63k
    if (parms->try_scan_2) {
574
575
#ifdef DEBUG_SUGGEST
576
      COUT.printl("TRYING SCAN 2");
577
#endif
578
579
2.22k
      edit_dist_fun = limit2_edit_distance;
580
581
2.22k
      if (sp->soundslike_root_only)
582
10
        try_scan_root();
583
2.21k
      else
584
2.21k
        try_scan();
585
586
2.22k
      score_list();
587
      
588
2.22k
      if (try_harder < parms->ngram_threshold) goto done;
589
590
2.22k
    }
591
592
1.51k
    if (parms->try_ngram) {
593
594
#ifdef DEBUG_SUGGEST
595
      COUT.printl("TRYING NGRAM");
596
#endif
597
598
325
      try_ngram();
599
600
325
      score_list();
601
602
325
    }
603
604
10.6k
  done:
605
606
10.6k
    fine_tune_score(threshold);
607
10.6k
    scored_near_misses.sort(adj_score_lt);
608
10.6k
    sug->scored_near_misses.swap(scored_near_misses);
609
10.6k
    near_misses.clear();
610
10.6k
    return sug;
611
1.51k
  }
612
613
  // Forms a word by combining CheckInfo fields.
614
  // Will grow the grow the temp in the buffer.  The final
615
  // word must be null terminated and committed.
616
  // It returns a MutableString of what was appended to the buffer.
617
  MutableString Working::form_word(CheckInfo & ci) 
618
112k
  {
619
112k
    size_t slen = ci.word.len - ci.pre_strip_len - ci.suf_strip_len;
620
112k
    size_t wlen = slen + ci.pre_add_len + ci.suf_add_len;
621
112k
    char * tmp = (char *)buffer.grow_temp(wlen);
622
112k
    if (ci.pre_add_len) 
623
0
      memcpy(tmp, ci.pre_add, ci.pre_add_len);
624
112k
    memcpy(tmp + ci.pre_add_len, ci.word.str + ci.pre_strip_len, slen);
625
112k
    if (ci.suf_add_len) 
626
717
      memcpy(tmp + ci.pre_add_len + slen, ci.suf_add, ci.suf_add_len);
627
112k
    return MutableString(tmp,wlen);
628
112k
  }
629
630
  void Working::try_word_n(ParmString str, const ScoreInfo & inf)
631
5.39M
  {
632
5.39M
    String word;
633
5.39M
    String buf;
634
5.39M
    WordEntry sw;
635
5.39M
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
636
32.1M
         i != sp->suggest_ws.end();
637
26.7M
         ++i)
638
26.7M
    {
639
26.7M
      (*i)->clean_lookup(str, sw);
640
26.9M
      for (;!sw.at_end(); sw.adv())
641
212k
        add_nearmiss_w(i, sw, inf);
642
26.7M
    }
643
5.39M
    if (sp->affix_compress) {
644
227k
      CheckInfo ci; memset(static_cast<void *>(&ci), 0, sizeof(ci));
645
227k
      bool res = lang->affix()->affix_check(LookupInfo(sp, LookupInfo::Clean), str, ci, 0);
646
227k
      if (!res) return;
647
865
      form_word(ci);
648
865
      char * end = (char *)buffer.grow_temp(1);
649
865
      char * tmp = (char *)buffer.temp_ptr();
650
865
      buffer.commit_temp();
651
865
      *end = '\0';
652
865
      add_nearmiss(tmp, end - tmp, 0, inf);
653
865
    }
654
5.39M
  }
655
656
  bool Working::check_word_s(ParmString word, CheckInfo * ci)
657
70.0k
  {
658
70.0k
    WordEntry sw;
659
70.0k
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
660
227k
         i != sp->suggest_ws.end();
661
157k
         ++i)
662
194k
    {
663
194k
      (*i)->clean_lookup(word, sw);
664
194k
      if (!sw.at_end()) {
665
37.1k
        ci->word = sw.word;
666
37.1k
        return true;
667
37.1k
      }
668
194k
    }
669
32.8k
    if (sp->affix_compress) {
670
7.40k
      return lang->affix()->affix_check(LookupInfo(sp, LookupInfo::Clean), word, *ci, 0);
671
7.40k
    }
672
25.4k
    return false;
673
32.8k
  }
674
675
  unsigned Working::check_word(char * word, char * word_end,  CheckInfo * ci,
676
                          /* it WILL modify word */
677
                          unsigned pos)
678
70.0k
  {
679
70.0k
    unsigned res = check_word_s(word, ci);
680
70.0k
    if (res) return pos + 1;
681
32.8k
    if (pos + 1 >= sp->run_together_limit_) return 0;
682
0
    for (char * i = word + sp->run_together_min_; 
683
0
         i <= word_end - sp->run_together_min_;
684
0
         ++i)
685
0
    {
686
0
      char t = *i;
687
0
      *i = '\0';
688
0
      res = check_word_s(word, ci);
689
0
      *i = t;
690
0
      if (!res) continue;
691
0
      res = check_word(i, word_end, ci + 1, pos + 1);
692
0
      if (res) return res;
693
0
    }
694
0
    memset(static_cast<void *>(ci), 0, sizeof(CheckInfo));
695
0
    return 0;
696
0
  }
697
698
  void Working::try_word_c(char * word, char * word_end, const ScoreInfo & inf)
699
70.0k
  {
700
70.0k
    unsigned res = check_word(word, word_end, check_info);
701
70.0k
    assert(res <= sp->run_together_limit_);
702
    //CERR.printf(">%s\n", word);
703
70.0k
    if (!res) return;
704
37.1k
    buffer.abort_temp();
705
37.1k
    MutableString tmp = form_word(check_info[0]);
706
37.1k
    CasePattern cp = lang->case_pattern(tmp, tmp.size);
707
111k
    for (unsigned i = 1; i <= res; ++i) {
708
74.3k
      char * t = form_word(check_info[i]);
709
74.3k
      if (cp == FirstUpper && lang->is_lower(t[1])) 
710
3.18k
        t[0] = lang->to_lower(t[0]);
711
74.3k
    }
712
37.1k
    char * end = (char *)buffer.grow_temp(1);
713
37.1k
    char * beg = (char *)buffer.temp_ptr(); // since the original string may of moved
714
37.1k
    *end = 0;
715
37.1k
    buffer.commit_temp();
716
37.1k
    add_nearmiss(beg, end - beg, 0, inf);
717
    //CERR.printl(tmp);
718
37.1k
    memset(static_cast<void *>(check_info), 0, sizeof(CheckInfo)*res);
719
37.1k
  }
720
721
  void Working::add_nearmiss(char * word, unsigned word_size,
722
                             WordInfo word_info,
723
                             const ScoreInfo & inf)
724
12.3M
  {
725
12.3M
    if (word_size * parms->edit_distance_weights.max >= 0x8000) 
726
0
      return; // to prevent overflow in the editdist functions
727
728
12.3M
    near_misses.push_front(ScoreWordSound(this));
729
12.3M
    ScoreWordSound & d = near_misses.front();
730
12.3M
    d.word = word;
731
12.3M
    d.soundslike = inf.soundslike;
732
733
12.3M
    d.word_score = inf.word_score;
734
12.3M
    d.soundslike_score = inf.soundslike_score;
735
736
12.3M
    if (!sp->have_soundslike) {
737
0
      if (d.word_score >= LARGE_NUM) d.word_score = d.soundslike_score;
738
0
      else if (d.soundslike_score >= LARGE_NUM) d.soundslike_score = d.word_score;
739
0
    }
740
741
12.3M
    unsigned int l = word_size;
742
12.3M
    if (l > max_word_length) max_word_length = l;
743
    
744
12.3M
    if (!(word_info & ALL_CLEAN)) {
745
5.90M
      d.word_clean = (char *)buffer.alloc(word_size + 1);
746
5.90M
      lang->LangImpl::to_clean((char *)d.word_clean, word);
747
6.47M
    } else {
748
6.47M
      d.word_clean = d.word;
749
6.47M
    }
750
751
12.3M
    if (!sp->have_soundslike && !d.soundslike)
752
0
      d.soundslike = d.word_clean;
753
    
754
12.3M
    d.special_edit = inf.special_edit;
755
12.3M
    d.repl_table = inf.repl_table;
756
12.3M
    d.count = inf.count;
757
12.3M
    d.repl_list = inf.repl_list;
758
12.3M
  }
759
760
  void Working::add_nearmiss_w(SpellerImpl::WS::const_iterator i,
761
                               const WordEntry & w, const ScoreInfo & inf0)
762
10.6M
  {
763
10.6M
    assert(w.word_size == strlen(w.word));
764
0
    ScoreInfo inf = inf0;
765
10.6M
    if (w.what == WordEntry::Misspelled) {
766
0
      inf.repl_list = new WordEntry;
767
0
      const ReplacementDict * repl_dict
768
0
        = static_cast<const ReplacementDict *>(*i);
769
0
      repl_dict->repl_lookup(w, *inf.repl_list);
770
0
    }
771
10.6M
    add_nearmiss(buffer.dup(ParmString(w.word, w.word_size)), 
772
10.6M
                 w.word_size, w.word_info, inf);
773
10.6M
  }
774
  
775
  void Working::add_nearmiss_a(SpellerImpl::WS::const_iterator i,
776
                               const WordAff * w, const ScoreInfo & inf)
777
1.62M
  {
778
1.62M
    add_nearmiss(buffer.dup(w->word), w->word.size, 0, inf);
779
1.62M
  }
780
781
10.6k
  void Working::try_split() {
782
10.6k
    const String & word = original.word;
783
    
784
10.6k
    if (word.size() < 4 || parms->split_chars.empty()) return;
785
5.06k
    size_t i = 0;
786
    
787
5.06k
    String new_word_str;
788
5.06k
    String buf;
789
5.06k
    new_word_str.resize(word.size() + 1);
790
5.06k
    char * new_word = new_word_str.data();
791
5.06k
    memcpy(new_word, word.data(), word.size());
792
5.06k
    new_word[word.size() + 1] = '\0';
793
5.06k
    new_word[word.size() + 0] = new_word[word.size() - 1];
794
    
795
67.4k
    for (i = word.size() - 2; i >= 2; --i) {
796
62.3k
      new_word[i+1] = new_word[i];
797
62.3k
      new_word[i] = '\0';
798
      
799
62.3k
      if (sp->check(new_word) && sp->check(new_word + i + 1)) {
800
39.9k
        for (size_t j = 0; j != parms->split_chars.size(); ++j)
801
26.6k
        {
802
26.6k
          new_word[i] = parms->split_chars[j];
803
26.6k
          ScoreInfo inf;
804
26.6k
          inf.word_score = special_score(parms->edit_distance_weights, Split);
805
26.6k
          inf.soundslike_score = inf.word_score;
806
26.6k
          inf.soundslike = NO_SOUNDSLIKE;
807
26.6k
          inf.count = false;
808
26.6k
          inf.special_edit = Split;
809
26.6k
          add_nearmiss(buffer.dup(new_word), word.size() + 1, 0, inf);
810
26.6k
        }
811
13.3k
      }
812
62.3k
    }
813
5.06k
  }
814
815
32.9k
  void Working::try_camel_word(String & word, SpecialEdit edit) {
816
32.9k
    CheckInfo ci[8];
817
32.9k
    bool ok = sp->check(word.begin(), word.end(), false, sp->run_together_limit(), ci, ci + 8, NULL, NULL);
818
32.9k
    if (!ok) return;
819
1.80k
    ScoreInfo inf;
820
1.80k
    inf.word_score = special_score(parms->edit_distance_weights, edit);
821
1.80k
    inf.soundslike_score = inf.word_score;
822
1.80k
    inf.soundslike = NO_SOUNDSLIKE;
823
1.80k
    inf.count = false;
824
1.80k
    inf.special_edit = edit;
825
1.80k
    add_nearmiss(buffer.dup(word.c_str()), word.size() + 1, 0, inf);
826
1.80k
  }
827
828
10.6k
  void Working::try_camel_edits() {
829
10.6k
    if (!camel_case) return;
830
    
831
1.61k
    String word = original.word;
832
1.61k
    word.ensure_null_end();
833
834
23.4k
    for (size_t i = 1; i < word.size(); ++i) {
835
      // try splitting or joining a word by changing the case of a letter
836
21.8k
      SpecialEdit edit = None;
837
21.8k
      char save = word[i];
838
21.8k
      word[i] = lang->to_upper(word[i]);
839
21.8k
      if (word[i] != save) {
840
9.64k
        edit = CamelSplit;
841
12.1k
      } else {
842
12.1k
        word[i] = lang->to_lower(word[i]);
843
12.1k
        if (word[i] != save)
844
10.7k
          edit = CamelJoin;
845
12.1k
      }
846
21.8k
      try_camel_word(word, edit);
847
848
      //if the char was made lower now also try making an adjacent character uppercase
849
21.8k
      if (edit == CamelJoin) {
850
10.7k
        char save2 = word[i-1];
851
10.7k
        word[i-1] = lang->to_upper(word[i-1]);
852
10.7k
        if (word[i-1] != save2)
853
6.00k
          try_camel_word(word, CamelOffByOne);
854
10.7k
        word[i-1] = save2;
855
10.7k
        if (i+1 < word.size()) {
856
9.98k
          save2 = word[i+1];
857
9.98k
          word[i+1] = lang->to_upper(word[i+1]);
858
9.98k
          if (word[i+1] != save2)
859
5.12k
            try_camel_word(word, CamelOffByOne);
860
9.98k
          word[i+1] = save2;
861
9.98k
        }
862
10.7k
      }
863
      
864
21.8k
      word[i] = save;
865
21.8k
    }
866
1.61k
  }
867
868
  void Working::try_one_edit_word() 
869
10.6k
  {
870
10.6k
    const String & orig = original.clean;
871
10.6k
    const char * replace_list = lang->clean_chars();
872
10.6k
    char a,b;
873
10.6k
    const char * c;
874
10.6k
    VARARRAY(char, new_word, orig.size() + 2);
875
10.6k
    char * new_word_end = new_word + orig.size();
876
10.6k
    size_t i;
877
878
10.6k
    memcpy(new_word, orig.str(), orig.size() + 1);
879
880
    // Try word as is (in case of case difference etc)
881
882
10.6k
    try_word(new_word,  new_word_end, 0);
883
884
    // Change one letter
885
    
886
90.4k
    for (i = 0; i != orig.size(); ++i) {
887
2.59M
      for (c = replace_list; *c; ++c) {
888
2.51M
        if (*c == orig[i]) continue;
889
2.43M
        new_word[i] = *c;
890
2.43M
        try_word(new_word, new_word_end, parms->edit_distance_weights.sub);
891
2.43M
      }
892
79.7k
      new_word[i] = orig[i];
893
79.7k
    }
894
    
895
    // Interchange two adjacent letters.
896
    
897
81.5k
    for (i = 0; i+1 < orig.size(); ++i) {
898
70.9k
      a = new_word[i];
899
70.9k
      b = new_word[i+1];
900
70.9k
      new_word[i] = b;
901
70.9k
      new_word[i+1] = a;
902
70.9k
      try_word(new_word, new_word_end, parms->edit_distance_weights.swap);
903
70.9k
      new_word[i] = a;
904
70.9k
      new_word[i+1] = b;
905
70.9k
    }
906
907
    // Add one letter
908
909
10.6k
    *new_word_end = ' ';
910
10.6k
    new_word_end++;
911
10.6k
    *new_word_end = '\0';
912
10.6k
    i = new_word_end - new_word - 1;
913
90.4k
    while(true) {
914
2.95M
      for (c=replace_list; *c; ++c) {
915
2.86M
        new_word[i] = *c;
916
2.86M
        try_word(new_word, new_word_end, parms->edit_distance_weights.del1);
917
2.86M
      }
918
90.4k
      if (i == 0) break;
919
79.7k
      new_word[i] = new_word[i-1];
920
79.7k
      --i;
921
79.7k
    }
922
    
923
    // Delete one letter
924
925
10.6k
    if (orig.size() > 1) {
926
8.77k
      memcpy(new_word, orig.str(), orig.size() + 1);
927
8.77k
      new_word_end = new_word + orig.size() - 1;
928
8.77k
      a = *new_word_end;
929
8.77k
      *new_word_end = '\0';
930
8.77k
      i = orig.size() - 1;
931
79.6k
      while (true) {
932
79.6k
        try_word(new_word, new_word_end, parms->edit_distance_weights.del2);
933
79.6k
        if (i == 0) break;
934
70.9k
        b = a;
935
70.9k
        a = new_word[i-1];
936
70.9k
        new_word[i-1] = b;
937
70.9k
        --i;
938
70.9k
      }
939
8.77k
    }
940
10.6k
  }
941
942
  void Working::add_sound(SpellerImpl::WS::const_iterator i,
943
                          WordEntry * sw, const char * sl, int score)
944
1.95M
  {
945
1.95M
    WordEntry w;
946
1.95M
    (*i)->soundslike_lookup(*sw, w);
947
948
12.4M
    for (; !w.at_end(); w.adv()) {
949
950
10.4M
      ScoreInfo inf;
951
10.4M
      inf.soundslike = sl;
952
10.4M
      inf.soundslike_score = score;
953
10.4M
      add_nearmiss_w(i, w, inf);
954
      
955
10.4M
      if (w.aff[0]) {
956
263k
        String sl_buf;
957
263k
        temp_buffer.reset();
958
263k
        WordAff * exp_list;
959
263k
        exp_list = lang->affix()->expand(w.word, w.aff, temp_buffer);
960
1.88M
        for (WordAff * p = exp_list->next; p; p = p->next) {
961
1.62M
          add_nearmiss_a(i, p, ScoreInfo());
962
1.62M
        }
963
263k
      }
964
      
965
10.4M
    }
966
1.95M
  }
967
968
  void Working::try_scan() 
969
11.6k
  {
970
11.6k
    const char * original_soundslike = original.soundslike.str();
971
    
972
11.6k
    WordEntry * sw;
973
11.6k
    WordEntry w;
974
11.6k
    const char * sl = 0;
975
11.6k
    EditDist score;
976
11.6k
    unsigned int stopped_at = LARGE_NUM;
977
11.6k
    WordAff * exp_list;
978
11.6k
    WordAff single;
979
11.6k
    single.next = 0;
980
981
11.6k
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
982
69.8k
         i != sp->suggest_ws.end();
983
58.1k
         ++i) 
984
58.1k
    {
985
      //CERR.printf(">>%p %s\n", *i, typeid(**i).name());
986
58.1k
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
987
988
30.1M
      while ( (sw = els->next(stopped_at)) ) {
989
990
        //CERR.printf("[%s (%d) %d]\n", sw->word, sw->word_size, sw->what);
991
        //assert(strlen(sw->word) == sw->word_size);
992
          
993
30.0M
        if (sw->what != WordEntry::Word) {
994
30.0M
          sl = sw->word;
995
30.0M
          abort_temp();
996
30.0M
        } else if (!*sw->aff) {
997
0
          sl = to_soundslike_temp(*sw);
998
0
        } else {
999
0
          goto affix_case;
1000
0
        }
1001
1002
        //CERR.printf("SL = %s\n", sl);
1003
        
1004
30.0M
        score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1005
30.0M
        stopped_at = score.stopped_at - sl;
1006
30.0M
        if (score >= LARGE_NUM) continue;
1007
1.50M
        stopped_at = LARGE_NUM;
1008
1.50M
        commit_temp(sl);
1009
1.50M
        add_sound(i, sw, sl, score);
1010
1.50M
        continue;
1011
        
1012
0
      affix_case:
1013
        
1014
0
        temp_buffer.reset();
1015
        
1016
        // first expand any prefixes
1017
0
        if (sp->fast_scan) { // if fast_scan, then no prefixes
1018
0
          single.word.str = sw->word;
1019
0
          single.word.size = strlen(sw->word);
1020
0
          single.aff = (const unsigned char *)sw->aff;
1021
0
          exp_list = &single;
1022
0
        } else {
1023
0
          exp_list = lang->affix()->expand_prefix(sw->word, sw->aff, temp_buffer);
1024
0
        }
1025
        
1026
        // iterate through each semi-expanded word, any affix flags
1027
        // are now guaranteed to be suffixes
1028
0
        for (WordAff * p = exp_list; p; p = p->next)
1029
0
        {
1030
          // try the root word
1031
0
          unsigned sl_len;
1032
0
          sl = to_soundslike_temp(p->word.str, p->word.size, &sl_len);
1033
0
          score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1034
0
          stopped_at = score.stopped_at - sl;
1035
0
          stopped_at += p->word.size - sl_len;
1036
          
1037
0
          if (score < LARGE_NUM) {
1038
0
            commit_temp(sl);
1039
0
            ScoreInfo inf;
1040
0
            inf.soundslike = sl;
1041
0
            inf.soundslike_score = score;
1042
0
            add_nearmiss_a(i, p, inf);
1043
0
          }
1044
          
1045
          // expand any suffixes, using stopped_at as a hint to avoid
1046
          // unneeded expansions.  Note stopped_at is the last character
1047
          // looked at by limit_edit_dist.  Thus if the character
1048
          // at stopped_at is changed it might effect the result
1049
          // hence the "limit" is stopped_at + 1
1050
0
          if (p->word.size - lang->affix()->max_strip() > stopped_at)
1051
0
            exp_list = 0;
1052
0
          else
1053
0
            exp_list = lang->affix()->expand_suffix(p->word, p->aff, 
1054
0
                                                    temp_buffer, 
1055
0
                                                    stopped_at + 1);
1056
          
1057
          // reset stopped_at if necessary
1058
0
          if (score < LARGE_NUM) stopped_at = LARGE_NUM;
1059
          
1060
          // iterate through fully expanded words, if any
1061
0
          for (WordAff * q = exp_list; q; q = q->next) {
1062
0
            sl = to_soundslike_temp(q->word.str, q->word.size);
1063
0
            score = edit_dist_fun(sl, original_soundslike, parms->edit_distance_weights);
1064
0
            if (score >= LARGE_NUM) continue;
1065
0
            commit_temp(sl);
1066
0
            ScoreInfo inf;
1067
0
            inf.soundslike = sl;
1068
0
            inf.soundslike_score = score;
1069
0
            add_nearmiss_a(i, q, inf);
1070
0
          }
1071
0
        }
1072
0
      }
1073
58.1k
    }
1074
11.6k
  }
1075
1076
  void Working::try_scan_root() 
1077
868
  {
1078
1079
868
    WordEntry * sw;
1080
868
    WordEntry w;
1081
868
    const char * sl = 0;
1082
868
    EditDist score;
1083
868
    int stopped_at = LARGE_NUM;
1084
868
    GuessInfo gi;
1085
868
    lang->munch(original.word, &gi);
1086
868
    Vector<const char *> sls;
1087
868
    sls.push_back(original.soundslike.str());
1088
#ifdef DEBUG_SUGGEST
1089
    COUT.printf("will try soundslike: %s\n", sls.back());
1090
#endif
1091
868
    for (const aspeller::CheckInfo * ci = gi.head;
1092
1.32k
         ci; 
1093
868
         ci = ci->next) 
1094
454
    {
1095
454
      sl = to_soundslike(ci->word.str, ci->word.len);
1096
454
      Vector<const char *>::iterator i = sls.begin();
1097
933
      while (i != sls.end() && strcmp(*i, sl) != 0) ++i;
1098
454
      if (i == sls.end()) {
1099
148
        sls.push_back(to_soundslike(ci->word.str, ci->word.len));
1100
#ifdef DEBUG_SUGGEST
1101
        COUT.printf("will try root soundslike: %s\n", sls.back());
1102
#endif
1103
148
      }
1104
454
    }
1105
868
    const char * * begin = sls.pbegin();
1106
868
    const char * * end   = sls.pend();
1107
868
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
1108
4.34k
         i != sp->suggest_ws.end();
1109
3.47k
         ++i) 
1110
3.47k
    {
1111
3.47k
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
1112
1113
985k
      while ( (sw = els->next(stopped_at)) ) {
1114
          
1115
981k
        if (sw->what != WordEntry::Word) {
1116
455k
          sl = sw->word;
1117
455k
          abort_temp();
1118
526k
        } else {
1119
526k
          sl = to_soundslike_temp(*sw);
1120
526k
        } 
1121
1122
981k
        stopped_at = LARGE_NUM;
1123
1.65M
        for (const char * * s = begin; s != end; ++s) {
1124
1.08M
          score = edit_dist_fun(sl, *s, 
1125
1.08M
                                parms->edit_distance_weights);
1126
1.08M
          if (score.stopped_at - sl < stopped_at)
1127
982k
            stopped_at = score.stopped_at - sl;
1128
1.08M
          if (score >= LARGE_NUM) continue;
1129
415k
          stopped_at = LARGE_NUM;
1130
415k
          commit_temp(sl);
1131
415k
          add_sound(i, sw, sl, score);
1132
          //CERR.printf("using %s: will add %s with score %d\n", *s, sl, (int)score);
1133
415k
          break;
1134
1.08M
        }
1135
981k
      }
1136
3.47k
    }
1137
868
  }
1138
1139
  struct ReplTry 
1140
  {
1141
    const char * begin;
1142
    const char * end;
1143
    const char * repl;
1144
    size_t repl_len;
1145
    ReplTry(const char * b, const char * e, const char * r)
1146
0
      : begin(b), end(e), repl(r), repl_len(strlen(r)) {}
1147
  };
1148
1149
  void Working::try_repl() 
1150
865
  {
1151
865
    String buf;
1152
865
    Vector<ReplTry> repl_try;
1153
865
    StackPtr<SuggestReplEnumeration> els(lang->repl());
1154
865
    const SuggestRepl * r = 0;
1155
865
    const char * word = original.clean.str();
1156
865
    const char * wend = word + original.clean.size();
1157
367k
    while (r = els->next(), r) 
1158
366k
    {
1159
366k
      const char * p = word;
1160
370k
      while ((p = strstr(p, r->substr))) {
1161
3.67k
        buf.clear();
1162
3.67k
        buf.append(word, p);
1163
3.67k
        buf.append(r->repl, strlen(r->repl));
1164
3.67k
        p += strlen(r->substr);
1165
3.67k
        buf.append(p, wend + 1);
1166
3.67k
        buf.ensure_null_end();
1167
        //COUT.printf("%s (%s) => %s (%s)\n", word, r->substr, buf.str(), r->repl);
1168
3.67k
        ScoreInfo inf;
1169
3.67k
        inf.word_score = parms->edit_distance_weights.sub*3/2;
1170
3.67k
        inf.repl_table = true;
1171
3.67k
        try_word(buf.pbegin(), buf.pend(), inf);
1172
3.67k
      }
1173
366k
    }
1174
865
  }
1175
1176
  // generate an n-gram score comparing s1 and s2
1177
  static int ngram(int n, char * s1, int l1, const char * s2, int l2)
1178
16.4M
  {
1179
16.4M
    int nscore = 0;
1180
16.4M
    int ns;
1181
23.5M
    for (int j=1;j<=n;j++) {
1182
23.5M
      ns = 0;
1183
159M
      for (int i=0;i<=(l1-j);i++) {
1184
135M
        char c = *(s1 + i + j);
1185
135M
        *(s1 + i + j) = '\0';
1186
135M
        if (strstr(s2,(s1+i))) ns++;
1187
135M
        *(s1 + i + j ) = c;
1188
135M
      }
1189
23.5M
      nscore = nscore + ns;
1190
23.5M
      if (ns < 2) break;
1191
23.5M
    }
1192
16.4M
    ns = 0;
1193
16.4M
    ns = (l2-l1)-2;
1194
16.4M
    return (nscore - ((ns > 0) ? ns : 0));
1195
16.4M
  }
1196
1197
  struct NGramScore {
1198
    SpellerImpl::WS::const_iterator i;
1199
    WordEntry info;
1200
    const char * soundslike;
1201
    int score;
1202
0
    NGramScore() {}
1203
    NGramScore(SpellerImpl::WS::const_iterator i0,
1204
               const WordEntry & info0, const char * sl, int score0)
1205
112k
      : i(i0), info(info0), soundslike(sl), score(score0) {}
1206
  };
1207
1208
1209
  void Working::try_ngram()
1210
325
  {
1211
325
    String original_soundslike = original.soundslike;
1212
325
    original_soundslike.ensure_null_end();
1213
325
    WordEntry * sw = 0;
1214
325
    const char * sl = 0;
1215
325
    typedef Vector<NGramScore> Candidates;
1216
325
    hash_set<const char *> already_have;
1217
325
    Candidates candidates;
1218
325
    int min_score = 0;
1219
325
    int count = 0;
1220
1221
325
    for (NearMisses::iterator i = scored_near_misses.begin();
1222
394k
         i != scored_near_misses.end(); ++i)
1223
394k
    {
1224
394k
      if (!i->soundslike)
1225
0
        i->soundslike = to_soundslike(i->word, strlen(i->word));
1226
394k
      already_have.insert(i->soundslike);
1227
394k
    }
1228
1229
325
    for (SpellerImpl::WS::const_iterator i = sp->suggest_ws.begin();
1230
1.94k
         i != sp->suggest_ws.end();
1231
1.62k
         ++i) 
1232
1.62k
    {
1233
1.62k
      StackPtr<SoundslikeEnumeration> els((*i)->soundslike_elements());
1234
      
1235
16.5M
      while ( (sw = els->next(LARGE_NUM)) ) {
1236
1237
16.5M
        if (sw->what != WordEntry::Word) {
1238
16.5M
          abort_temp();
1239
16.5M
          sl = sw->word;
1240
16.5M
        } else {
1241
0
          sl = to_soundslike_temp(sw->word, sw->word_size);
1242
0
        }
1243
        
1244
16.5M
        if (already_have.have(sl)) continue;
1245
1246
16.4M
        int ng = ngram(3, original_soundslike.data(), original_soundslike.size(),
1247
16.4M
                       sl, strlen(sl));
1248
1249
16.4M
        if (ng > 0 && ng >= min_score) {
1250
112k
          commit_temp(sl);
1251
112k
          candidates.push_back(NGramScore(i, *sw, sl, ng));
1252
112k
          if (ng > min_score) count++;
1253
112k
          if (count >= parms->ngram_keep) {
1254
2.22k
            int orig_min = min_score;
1255
2.22k
            min_score = LARGE_NUM;
1256
2.22k
            Candidates::iterator i = candidates.begin();
1257
2.22k
            Candidates::iterator j = candidates.begin();
1258
105k
            for (; i != candidates.end(); ++i) {
1259
103k
              if (i->score == orig_min) continue;
1260
22.2k
              if (min_score > i->score) min_score = i->score;
1261
22.2k
              *j = *i;
1262
22.2k
              ++j;
1263
22.2k
            }
1264
2.22k
            count = 0;
1265
2.22k
            candidates.resize(j-candidates.begin());
1266
24.5k
            for (i = candidates.begin(); i != candidates.end(); ++i) {
1267
22.2k
              if (i->score != min_score) count++;
1268
22.2k
            }
1269
2.22k
          }
1270
112k
        }
1271
16.4M
      }
1272
1.62k
    }
1273
    
1274
325
    for (Candidates::iterator i = candidates.begin();
1275
31.3k
         i != candidates.end();
1276
31.0k
         ++i)
1277
31.0k
    {
1278
      //COUT.printf("ngram: %s %d\n", i->soundslike, i->score);
1279
31.0k
      add_sound(i->i, &i->info, i->soundslike);
1280
31.0k
    }
1281
325
  }
1282
  
1283
23.4k
  void Working::score_list() {
1284
1285
#  ifdef DEBUG_SUGGEST
1286
    COUT.printl("SCORING LIST");
1287
#  endif
1288
1289
23.4k
    try_harder = 3;
1290
23.4k
    if (near_misses.empty()) return;
1291
1292
18.1k
    NearMisses::iterator i;
1293
18.1k
    NearMisses::iterator prev;
1294
1295
18.1k
    near_misses.push_front(ScoreWordSound(this));
1296
    // the first item will NEVER be looked at.
1297
18.1k
    scored_near_misses.push_front(ScoreWordSound(this));
1298
18.1k
    scored_near_misses.front().score = -1;
1299
    // this item will only be looked at when sorting so 
1300
    // make it a small value to keep it at the front.
1301
1302
18.1k
    int try_for = (parms->word_weight*parms->edit_distance_weights.max)/100;
1303
40.8k
    while (true) {
1304
40.8k
      try_for += (parms->word_weight*parms->edit_distance_weights.max)/100;
1305
1306
      // put all pairs whose score <= initial_limit*max_weight
1307
      // into the scored list
1308
1309
40.8k
      prev = near_misses.begin();
1310
40.8k
      i = prev;
1311
40.8k
      ++i;
1312
29.0M
      while (i != near_misses.end()) {
1313
1314
        //CERR.printf("%s %s %s %d %d\n", i->word, i->word_clean, i->soundslike,
1315
        //            i->word_score, i->soundslike_score);
1316
1317
28.9M
        if (i->word_score >= LARGE_NUM) {
1318
17.5M
          int sl_score = i->soundslike_score < LARGE_NUM ? i->soundslike_score : 0;
1319
17.5M
          int level = needed_level(try_for, sl_score);
1320
          
1321
17.5M
          if (level >= int(sl_score/parms->edit_distance_weights.min)) 
1322
15.4M
            i->word_score = edit_distance(original.clean,
1323
15.4M
                                          i->word_clean,
1324
15.4M
                                          level, level,
1325
15.4M
                                          parms->edit_distance_weights);
1326
17.5M
        }
1327
        
1328
28.9M
        if (i->word_score >= LARGE_NUM) goto cont1;
1329
1330
16.5M
        if (i->soundslike_score >= LARGE_NUM) 
1331
2.29M
        {
1332
2.29M
          if (weighted_average(0, i->word_score) > try_for) goto cont1;
1333
1334
282k
          if (i->soundslike == 0) i->soundslike = to_soundslike(i->word, strlen(i->word));
1335
1336
282k
          i->soundslike_score = edit_distance(original.soundslike, i->soundslike, 
1337
282k
                                              parms->edit_distance_weights);
1338
282k
        }
1339
1340
14.5M
        i->score = weighted_average(i->soundslike_score, i->word_score);
1341
1342
14.5M
        if (i->score > try_for + parms->span) goto cont1;
1343
1344
        //CERR.printf("2>%s %s %s %d %d\n", i->word, i->word_clean, i->soundslike,
1345
        //            i->word_score, i->soundslike_score);
1346
1347
2.80M
        scored_near_misses.splice_into(near_misses,prev,i);
1348
        
1349
2.80M
        i = prev; // Yes this is right due to the slice
1350
2.80M
        ++i;
1351
1352
2.80M
        continue;
1353
        
1354
26.1M
      cont1:
1355
26.1M
        prev = i;
1356
26.1M
        ++i;
1357
26.1M
      }
1358
  
1359
40.8k
      scored_near_misses.sort();
1360
  
1361
40.8k
      i = scored_near_misses.begin();
1362
40.8k
      ++i;
1363
  
1364
40.8k
      if (i == scored_near_misses.end()) continue;
1365
  
1366
26.4k
      int k = skip_first_couple(i);
1367
  
1368
26.4k
      if ((k == parms->skip && i->score <= try_for) 
1369
26.4k
    || prev == near_misses.begin() ) // or no more left in near_misses
1370
18.1k
  break;
1371
26.4k
    }
1372
      
1373
18.1k
    threshold = i->score + parms->span;
1374
18.1k
    if (threshold < parms->edit_distance_weights.max)
1375
5.33k
      threshold = parms->edit_distance_weights.max;
1376
1377
#  ifdef DEBUG_SUGGEST
1378
    COUT << "Threshold is: " << threshold << "\n";
1379
    COUT << "try_for: " << try_for << "\n";
1380
    COUT << "Size of scored: " << scored_near_misses.size() << "\n";
1381
    COUT << "Size of ! scored: " << near_misses.size() << "\n";
1382
#  endif
1383
1384
    //if (threshold - try_for <=  parms->edit_distance_weights.max/2) return;
1385
      
1386
18.1k
    prev = near_misses.begin();
1387
18.1k
    i = prev;
1388
18.1k
    ++i;
1389
9.59M
    while (i != near_misses.end()) {
1390
  
1391
9.57M
      if (i->word_score >= LARGE_NUM) {
1392
1393
6.97M
        int sl_score = i->soundslike_score < LARGE_NUM ? i->soundslike_score : 0;
1394
6.97M
        int initial_level = needed_level(try_for, sl_score);
1395
6.97M
        int max_level = needed_level(threshold, sl_score);
1396
        
1397
6.97M
        if (initial_level < max_level)
1398
2.88M
          i->word_score = edit_distance(original.clean.c_str(),
1399
2.88M
                                        i->word_clean,
1400
2.88M
                                        initial_level+1,max_level,
1401
2.88M
                                        parms->edit_distance_weights);
1402
6.97M
      }
1403
1404
9.57M
      if (i->word_score >= LARGE_NUM) goto cont2;
1405
      
1406
4.28M
      if (i->soundslike_score >= LARGE_NUM) 
1407
1.60M
      {
1408
1.60M
        if (weighted_average(0, i->word_score) > threshold) goto cont2;
1409
1410
79.2k
        if (i->soundslike == 0) 
1411
62.5k
          i->soundslike = to_soundslike(i->word, strlen(i->word));
1412
        
1413
79.2k
        i->soundslike_score = edit_distance(original.soundslike, i->soundslike,
1414
79.2k
                                            parms->edit_distance_weights);
1415
79.2k
      }
1416
1417
2.75M
      i->score = weighted_average(i->soundslike_score, i->word_score);
1418
1419
2.75M
      if (i->score > threshold + parms->span) goto cont2;
1420
      
1421
1.21M
      scored_near_misses.splice_into(near_misses,prev,i);
1422
      
1423
1.21M
      i = prev; // Yes this is right due to the slice
1424
1.21M
      ++i;
1425
      
1426
1.21M
      continue;
1427
  
1428
8.36M
    cont2:
1429
8.36M
  prev = i;
1430
8.36M
  ++i;
1431
        
1432
8.36M
    }
1433
1434
18.1k
    near_misses.pop_front();
1435
1436
18.1k
    scored_near_misses.sort();
1437
18.1k
    scored_near_misses.pop_front();
1438
1439
18.1k
    if (near_misses.empty()) {
1440
8.00k
      try_harder = 1;
1441
10.1k
    } else {
1442
10.1k
      i = scored_near_misses.begin();
1443
10.1k
      skip_first_couple(i);
1444
10.1k
      ++i;
1445
10.1k
      try_harder = i == scored_near_misses.end() ? 2 : 0;
1446
10.1k
    }
1447
1448
#  ifdef DEBUG_SUGGEST
1449
    COUT << "Size of scored: " << scored_near_misses.size() << "\n";
1450
    COUT << "Size of ! scored: " << near_misses.size() << "\n";
1451
    COUT << "Try Harder: " << try_harder << "\n";
1452
#  endif
1453
18.1k
  }
1454
1455
21.3k
  void Working::fine_tune_score(int thres) {
1456
1457
21.3k
    NearMisses::iterator i;
1458
1459
21.3k
    if (parms->use_typo_analysis) {
1460
20.9k
      adj_threshold = 0;
1461
20.9k
      unsigned int j;
1462
      
1463
20.9k
      CharVector orig_norm, word;
1464
20.9k
      orig_norm.resize(original.word.size() + 1);
1465
233k
      for (j = 0; j != original.word.size(); ++j)
1466
212k
        orig_norm[j] = parms->ti->to_normalized(original.word[j]);
1467
20.9k
      orig_norm[j] = 0;
1468
20.9k
      ParmString orig(orig_norm.data(), j);
1469
20.9k
      word.resize(max_word_length + 1);
1470
      
1471
20.9k
      for (i = scored_near_misses.begin();
1472
1.40M
           i != scored_near_misses.end() && i->score <= thres;
1473
1.38M
           ++i)
1474
1.38M
      {
1475
1.38M
        SpecialTypoScore special = special_typo_score(*parms->ti, i->special_edit);
1476
1.38M
        if (special) {
1477
55.6k
          i->word_score = special.score;
1478
55.6k
          i->soundslike_score = i->word_score;
1479
55.6k
          i->adj_score = i->word_score;
1480
55.6k
        }
1481
1.38M
        if (i->adj_score >= LARGE_NUM) {
1482
1.07M
          if (!special) {
1483
3.61M
            for (j = 0; (i->word)[j] != 0; ++j)
1484
2.53M
              word[j] = parms->ti->to_normalized((i->word)[j]);
1485
1.07M
            word[j] = 0;
1486
1.07M
            int new_score = typo_edit_distance(ParmString(word.data(), j), orig, *parms->ti);
1487
            // if a repl. table was used we don't want to increase the score
1488
1.07M
            if (!i->repl_table || new_score < i->word_score)
1489
1.07M
              i->word_score = new_score;
1490
1.07M
          }
1491
1.07M
          if (!special.is_overall_score) 
1492
1.07M
            i->adj_score = adj_wighted_average(i->soundslike_score, i->word_score, parms->ti->max);
1493
1.07M
        }
1494
1.38M
        if (i->adj_score > adj_threshold)
1495
68.4k
          adj_threshold = i->adj_score;
1496
1.38M
      }
1497
20.9k
    } else {
1498
396
      for (i = scored_near_misses.begin();
1499
135k
           i != scored_near_misses.end() && i->score <= thres;
1500
135k
           ++i)
1501
135k
      {
1502
135k
        i->adj_score = i->score;
1503
135k
      }
1504
396
      adj_threshold = threshold;
1505
396
    }
1506
    
1507
2.80M
    for (; i != scored_near_misses.end(); ++i) {
1508
2.78M
      if (i->adj_score > adj_threshold)
1509
2.77M
        i->adj_score = LARGE_NUM;
1510
2.78M
    }
1511
21.3k
  }
1512
1513
  struct StrEquals {
1514
654k
    bool operator() (const char * x, const char * y) const {
1515
654k
      return strcmp(x,y) == 0;
1516
654k
    }
1517
  };
1518
  typedef hash_set<const char *,hash<const char *>,StrEquals> StrHashSet;
1519
1520
23.0k
  char * Working::fix_word(ObjStack & buf, ParmStr w) {
1521
23.0k
    size_t sz = prefix.size() + w.size() + suffix.size();
1522
23.0k
    char * word = static_cast<char *>(buf.alloc(sz + 1));
1523
23.0k
    char * i = word;
1524
23.0k
    memcpy(i, prefix.c_str(), prefix.size());
1525
23.0k
    i += prefix.size();
1526
23.0k
    memcpy(i, w.str(), w.size() + 1);
1527
23.0k
    fix_case(i);
1528
23.0k
    i += w.size();
1529
23.0k
    memcpy(i, suffix.c_str(), suffix.size() + 1);
1530
23.0k
    return word;
1531
23.0k
  }
1532
1533
10.5k
  void Sugs::transfer(SuggestionsImpl & res, int limit) {
1534
    // FIXME: double check that conv->in_code() is correct
1535
10.5k
    res.reset();
1536
//#  ifdef DEBUG_SUGGEST
1537
//    COUT << "\n" << "\n" 
1538
//   << original.word << '\t' 
1539
//   << original.soundslike << '\t'
1540
//   << "\n";
1541
//    String sl;
1542
//#  endif
1543
10.5k
    StrHashSet duplicates_check;
1544
10.5k
    pair<StrHashSet::iterator, bool> dup_pair;
1545
10.5k
    for (NearMisses::const_iterator i = scored_near_misses.begin();
1546
894k
   i != scored_near_misses.end() && res.size() < limit
1547
894k
           && ( i->adj_score < LARGE_NUM || res.size() < 3);
1548
883k
   ++i) {
1549
#    ifdef DEBUG_SUGGEST
1550
      //COUT.printf("%p %p: ",  i->word, i->soundslike);
1551
      COUT << i->word
1552
           << '\t' << i->adj_score 
1553
           << '\t' << i->score 
1554
           << '\t' << i->word_score
1555
           << '\t' << i->soundslike
1556
           << '\t' << i->soundslike_score << "\n";
1557
#    endif
1558
883k
      Working * src = i->src;
1559
883k
      if (i->repl_list != 0) {
1560
0
  do {
1561
0
          const char * word = i->src->fix_word(res.buf, i->repl_list->word);
1562
0
    dup_pair = duplicates_check.insert(word);
1563
0
    if (dup_pair.second) {
1564
0
            const char * pos = strchr(word, ' ');
1565
0
            bool in_dict;
1566
0
            if (pos == NULL)
1567
0
              in_dict = src->sp->check(word);
1568
0
            else
1569
0
              in_dict = src->sp->check(word, pos - word) && src->sp->check(pos + 1);
1570
0
            if (in_dict)
1571
0
              res.push_back(Suggestion(word,&*i));
1572
0
          }
1573
0
        } while (i->repl_list->adv());
1574
883k
      } else {
1575
883k
        char * word = src->have_presuf ? src->fix_word(res.buf, i->word) : src->fix_case(i->word);
1576
883k
  dup_pair = duplicates_check.insert(word);
1577
883k
  if (dup_pair.second)
1578
627k
          res.push_back(Suggestion(word,&*i));
1579
883k
      }
1580
883k
    }
1581
21.4k
    for (Vector<Working *>::iterator i = srcs.begin(), e = srcs.end(); i != e; ++i) {
1582
10.9k
      res.saved_bufs_.push_back((*i)->buffer.freeze());
1583
10.9k
    }
1584
10.5k
    res.saved_near_misses_.swap(scored_near_misses);
1585
10.5k
  }
1586
  
1587
  class SuggestionListImpl : public SuggestionList {
1588
    struct Parms {
1589
      typedef const char *                    Value;
1590
      typedef SuggestionsImpl::const_iterator Iterator;
1591
      Iterator end;
1592
10.5k
      Parms(Iterator e) : end(e) {}
1593
638k
      bool endf(Iterator e) const {return e == end;}
1594
10.5k
      Value end_state() const {return 0;}
1595
627k
      Value deref(Iterator i) const {return i->word;}
1596
    };
1597
  public:
1598
    SuggestionsImpl suggestions;
1599
1600
    //SuggestionList * clone() const {return new SuggestionListImpl(*this);}
1601
    //void assign(const SuggestionList * other) {
1602
    //  *this = *static_cast<const SuggestionListImpl *>(other);
1603
    //}
1604
1605
0
    bool empty() const { return suggestions.empty(); }
1606
0
    Size size() const { return suggestions.size(); }
1607
10.5k
    VirEmul * elements() const {
1608
10.5k
      return new MakeEnumeration<Parms, StringEnumeration>
1609
10.5k
  (suggestions.begin(), Parms(suggestions.end()));
1610
10.5k
    }
1611
  };
1612
1613
  class SuggestImpl : public Suggest {
1614
    SpellerImpl * speller_;
1615
    SuggestionListImpl  suggestion_list;
1616
    SuggestParms parms_;
1617
  public:
1618
1.40k
    SuggestImpl(SpellerImpl * sp) : speller_(sp) {}
1619
    PosibErr<void> setup(String mode = "");
1620
0
    PosibErr<void> set_mode(ParmString mode) {
1621
0
      return setup(mode);
1622
0
    }
1623
    SuggestionList & suggest(const char * word);
1624
    SuggestionsData & suggestions(const char * word);
1625
  };
1626
  
1627
  PosibErr<void> SuggestImpl::setup(String mode)
1628
1.40k
  {
1629
1.40k
    if (mode == "") 
1630
1.40k
      mode = speller_->config()->retrieve("sug-mode");
1631
    
1632
1.40k
    RET_ON_ERR(parms_.init(mode, speller_, speller_->config()));
1633
1634
1.40k
    return no_err;
1635
1.40k
  }
1636
1637
10.5k
  SuggestionList & SuggestImpl::suggest(const char * word) { 
1638
#   ifdef DEBUG_SUGGEST
1639
    COUT << "=========== begin suggest " << word << " ===========\n";
1640
#   endif
1641
10.5k
    Working * sug = new Working(speller_, &speller_->lang(),word, &parms_);
1642
10.5k
    Sugs * sugs = sug->suggestions();
1643
10.5k
    CheckInfo ci[8];
1644
10.5k
    SpellerImpl::CompoundInfo cpi;
1645
10.5k
    String buf = word;
1646
10.5k
    char * str = buf.mstr();
1647
10.5k
    speller_->check(str, str + buf.size(), false, speller_->run_together_limit(), ci, ci + 8, NULL, &cpi);
1648
10.5k
    if (cpi.count > 1 && cpi.incorrect_count == 1) {
1649
343
      CheckInfo * ci = cpi.first_incorrect;
1650
343
      String prefix(str, ci->word.str - str), middle(ci->word.str, ci->word.len), suffix(ci->word.str + ci->word.len);
1651
343
      sug = new Working(speller_, &speller_->lang(), middle, &parms_);
1652
343
      sug->camel_case = false;
1653
343
      sug->with_presuf(prefix, suffix);
1654
343
      Sugs * sugs2 = sug->suggestions();
1655
343
      sugs->merge(*sugs2);
1656
343
      delete sugs2;
1657
343
    }
1658
10.5k
    sugs->transfer(suggestion_list.suggestions, parms_.limit);
1659
10.5k
    delete sugs;
1660
#   ifdef DEBUG_SUGGEST
1661
    COUT << "^^^^^^^^^^^  end suggest " << word << "  ^^^^^^^^^^^\n";
1662
#   endif
1663
10.5k
    return suggestion_list;
1664
10.5k
  }
1665
1666
0
  SuggestionsData & SuggestImpl::suggestions(const char * word) {
1667
0
    suggest(word);
1668
0
    return suggestion_list.suggestions;
1669
0
  }
1670
}
1671
1672
namespace aspeller {
1673
1.40k
  PosibErr<Suggest *> new_default_suggest(SpellerImpl * m) {
1674
1.40k
    StackPtr<SuggestImpl> s(new SuggestImpl(m));
1675
1.40k
    RET_ON_ERR(s->setup());
1676
1.40k
    return s.release();
1677
1.40k
  }
1678
1679
1.40k
  PosibErr<void> SuggestParms::init(ParmString mode, SpellerImpl * sp) {
1680
1681
1.40k
    edit_distance_weights.del1 =  95;
1682
1.40k
    edit_distance_weights.del2 =  95;
1683
1.40k
    edit_distance_weights.swap =  90;
1684
1.40k
    edit_distance_weights.sub =  100;
1685
1.40k
    edit_distance_weights.similar = 10;
1686
1.40k
    edit_distance_weights.max = 100;
1687
1.40k
    edit_distance_weights.min =  90;
1688
1689
1.40k
    soundslike_weight = 50;
1690
1691
1.40k
    split_chars = " -";
1692
1.40k
    camel_case = false;
1693
1694
1.40k
    skip = 2;
1695
1.40k
    limit = 100;
1696
1.40k
    span = 50;
1697
1.40k
    ngram_keep = 10;
1698
1.40k
    use_typo_analysis = true;
1699
1.40k
    use_repl_table = sp->have_repl;
1700
1.40k
    try_one_edit_word = true; // always a good idea, even when
1701
                              // soundslike lookup is used
1702
1.40k
    check_after_one_edit_word = false;
1703
1.40k
    try_scan_0 = false;
1704
1.40k
    try_scan_1 = false;
1705
1.40k
    try_scan_2 = false;
1706
1.40k
    try_ngram = false;
1707
1.40k
    ngram_threshold = 2;
1708
1709
1.40k
    if (mode == "ultra") {
1710
4
      try_scan_0 = true;
1711
1.40k
    } else if (mode == "fast") {
1712
0
      try_scan_1 = true;
1713
1.40k
    } else if (mode == "normal") {
1714
1.37k
      try_scan_1 = true;
1715
1.37k
      try_scan_2 = true;
1716
1.37k
    } else if (mode == "slow") {
1717
16
      try_scan_2 = true;
1718
16
      try_ngram = true;
1719
16
      limit = 1000;
1720
16
      ngram_threshold = sp->have_soundslike ? 1 : 2;
1721
16
    } else if (mode == "bad-spellers") {
1722
12
      try_scan_2 = true;
1723
12
      try_ngram = true;
1724
12
      use_typo_analysis = false;
1725
12
      soundslike_weight = 55;
1726
12
      span = 125;
1727
12
      limit = 1000;
1728
12
      ngram_threshold = 1;
1729
12
    } else {
1730
3
      return make_err(bad_value, "sug-mode", mode, _("one of ultra, fast, normal, slow, or bad-spellers"));
1731
3
    }
1732
1733
1.40k
    if (!sp->have_soundslike) {
1734
      // in this case try_scan_0/1 will not get better results than
1735
      // try_one_edit_word
1736
0
      if (try_scan_0 || try_scan_1) {
1737
0
        check_after_one_edit_word = true;
1738
0
        try_scan_0 = false;
1739
0
        try_scan_1 = false;
1740
0
      }
1741
0
    }
1742
1743
1.40k
    word_weight = 100 - soundslike_weight;
1744
    
1745
1.40k
    return no_err;
1746
1.40k
  }
1747
1748
1.40k
  PosibErr<void> SuggestParms::init(ParmString mode, SpellerImpl * sp, Config * config) {
1749
1.40k
    RET_ON_ERR(init(mode,sp));
1750
1751
1.40k
    if (config->have("sug-typo-analysis"))
1752
0
      use_typo_analysis = config->retrieve_bool("sug-typo-analysis");
1753
1.40k
    if (config->have("sug-repl-table"))
1754
0
      use_repl_table = config->retrieve_bool("sug-repl-table");
1755
1756
1.40k
    camel_case = config->retrieve_bool("camel-case");
1757
1.40k
    if (camel_case)
1758
52
      split_chars.clear();
1759
1760
1.40k
    if (!camel_case || config->have("sug-split-char")) {
1761
1.35k
      StringList sl;
1762
1.35k
      config->retrieve_list("sug-split-char", &sl);
1763
1.35k
      StringListEnumeration els = sl.elements_obj();
1764
1.35k
      const char * s;
1765
1.35k
      split_chars.clear();
1766
4.61k
      while ((s = els.next()) != 0) {
1767
3.25k
        split_chars.push_back(*s);
1768
3.25k
      }
1769
1.35k
    }
1770
1771
1.40k
    if (use_typo_analysis) {
1772
1.39k
      String keyboard = config->retrieve("keyboard");
1773
1.39k
      RET_ON_ERR(aspeller::setup(ti, config, &sp->lang(), keyboard));
1774
1.39k
    }
1775
    
1776
1.40k
    return no_err;
1777
1.40k
  }
1778
  
1779
}
1780