Coverage Report

Created: 2025-05-08 07:17

/src/hunspell/src/hunspell/hunspell.cxx
Line
Count
Source (jump to first uncovered line)
1
/* ***** BEGIN LICENSE BLOCK *****
2
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3
 *
4
 * Copyright (C) 2002-2022 Németh László
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version
7
 * 1.1 (the "License"); you may not use this file except in compliance with
8
 * the License. You may obtain a copy of the License at
9
 * http://www.mozilla.org/MPL/
10
 *
11
 * Software distributed under the License is distributed on an "AS IS" basis,
12
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
 * for the specific language governing rights and limitations under the
14
 * License.
15
 *
16
 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17
 *
18
 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19
 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20
 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21
 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22
 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23
 *
24
 * Alternatively, the contents of this file may be used under the terms of
25
 * either the GNU General Public License Version 2 or later (the "GPL"), or
26
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
 * in which case the provisions of the GPL or the LGPL are applicable instead
28
 * of those above. If you wish to allow use of your version of this file only
29
 * under the terms of either the GPL or the LGPL, and not to allow others to
30
 * use your version of this file under the terms of the MPL, indicate your
31
 * decision by deleting the provisions above and replace them with the notice
32
 * and other provisions required by the GPL or the LGPL. If you do not delete
33
 * the provisions above, a recipient may use your version of this file under
34
 * the terms of any one of the MPL, the GPL or the LGPL.
35
 *
36
 * ***** END LICENSE BLOCK ***** */
37
/*
38
 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39
 * And Contributors.  All rights reserved.
40
 *
41
 * Redistribution and use in source and binary forms, with or without
42
 * modification, are permitted provided that the following conditions
43
 * are met:
44
 *
45
 * 1. Redistributions of source code must retain the above copyright
46
 *    notice, this list of conditions and the following disclaimer.
47
 *
48
 * 2. Redistributions in binary form must reproduce the above copyright
49
 *    notice, this list of conditions and the following disclaimer in the
50
 *    documentation and/or other materials provided with the distribution.
51
 *
52
 * 3. All modifications to the source code must be clearly marked as
53
 *    such.  Binary redistributions based on modified source code
54
 *    must be clearly marked as modified versions in the documentation
55
 *    and/or other materials provided with the distribution.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68
 * SUCH DAMAGE.
69
 */
70
71
#include <cstdlib>
72
#include <cstring>
73
#include <cstdio>
74
#include <ctime>
75
76
#include "affixmgr.hxx"
77
#include "hunspell.hxx"
78
#include "suggestmgr.hxx"
79
#include "hunspell.h"
80
#include "csutil.hxx"
81
82
#include <limits>
83
#include <string>
84
85
266k
#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
86
87
class HunspellImpl
88
{
89
public:
90
  HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
91
  HunspellImpl(const HunspellImpl&) = delete;
92
  HunspellImpl& operator=(const HunspellImpl&) = delete;
93
  ~HunspellImpl();
94
  int add_dic(const char* dpath, const char* key = NULL);
95
  std::vector<std::string> suffix_suggest(const std::string& root_word);
96
  std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
97
  std::vector<std::string> generate(const std::string& word, const std::string& pattern);
98
  std::vector<std::string> stem(const std::string& word);
99
  std::vector<std::string> stem(const std::vector<std::string>& morph);
100
  std::vector<std::string> analyze(const std::string& word);
101
  int get_langnum() const;
102
  bool input_conv(const std::string& word, std::string& dest);
103
  bool spell(const std::string& word, std::vector<std::string>& candidate_stack,
104
             int* info = NULL, std::string* root = NULL);
105
  std::vector<std::string> suggest(const std::string& word);
106
  std::vector<std::string> suggest(const std::string& word, std::vector<std::string>& suggest_candidate_stack);
107
  const std::string& get_wordchars_cpp() const;
108
  const std::vector<w_char>& get_wordchars_utf16() const;
109
  const std::string& get_dict_encoding() const;
110
  int add(const std::string& word);
111
  int add_with_flags(const std::string& word, const std::string& flags, const std::string& desc = NULL);
112
  int add_with_affix(const std::string& word, const std::string& example);
113
  int remove(const std::string& word);
114
  const std::string& get_version_cpp() const;
115
  struct cs_info* get_csconv();
116
117
  int spell(const char* word, int* info = NULL, char** root = NULL);
118
  int suggest(char*** slst, const char* word);
119
  int suffix_suggest(char*** slst, const char* root_word);
120
  void free_list(char*** slst, int n);
121
  char* get_dic_encoding();
122
  int analyze(char*** slst, const char* word);
123
  int stem(char*** slst, const char* word);
124
  int stem(char*** slst, char** morph, int n);
125
  int generate(char*** slst, const char* word, const char* word2);
126
  int generate(char*** slst, const char* word, char** desc, int n);
127
  const char* get_wordchars() const;
128
  const char* get_version() const;
129
  int input_conv(const char* word, char* dest, size_t destsize);
130
131
private:
132
  AffixMgr* pAMgr;
133
  std::vector<HashMgr*> m_HMgrs;
134
  SuggestMgr* pSMgr;
135
  std::string affixpath;
136
  std::string encoding;
137
  struct cs_info* csconv;
138
  int langnum;
139
  int utf8;
140
  int complexprefixes;
141
  std::vector<std::string> wordbreak;
142
143
private:
144
  std::vector<std::string> analyze_internal(const std::string& word);
145
  bool spell_internal(const std::string& word, std::vector<std::string>& candidate_stack,
146
                      int* info = NULL, std::string* root = NULL);
147
  std::vector<std::string> suggest_internal(const std::string& word,
148
                                            std::vector<std::string>& spell_candidate_stack,
149
                                            std::vector<std::string>& suggest_candidate_stack,
150
                                            bool& capitalized, size_t& abbreviated, int& captype);
151
  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
152
  size_t cleanword2(std::string& dest,
153
                    std::vector<w_char>& dest_u,
154
                    const std::string& src,
155
                    int* pcaptype,
156
                    size_t* pabbrev);
157
  void clean_ignore(std::string& dest, const std::string& src);
158
  void mkinitcap(std::string& u8);
159
  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
160
  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
161
  void mkallcap(std::string& u8);
162
  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
163
  struct hentry* checkword(const std::string& source, int* info, std::string* root);
164
  std::string sharps_u8_l1(const std::string& source);
165
  hentry*
166
  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
167
  int is_keepcase(const hentry* rv);
168
  void insert_sug(std::vector<std::string>& slst, const std::string& word);
169
  void cat_result(std::string& result, const std::string& st);
170
  std::vector<std::string> spellml(const std::string& word);
171
  std::string get_xml_par(const std::string& par, std::string::size_type pos);
172
  std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
173
  std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
174
  int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
175
};
176
177
HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key)
178
18.3k
  : affixpath(affpath) {
179
18.3k
  csconv = NULL;
180
18.3k
  utf8 = 0;
181
18.3k
  complexprefixes = 0;
182
183
  /* first set up the hash manager */
184
18.3k
  m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
185
186
  /* next set up the affix manager */
187
  /* it needs access to the hash manager lookup methods */
188
18.3k
  pAMgr = new AffixMgr(affpath, m_HMgrs, key);
189
190
  /* get the preferred try string and the dictionary */
191
  /* encoding from the Affix Manager for that dictionary */
192
18.3k
  std::string try_string = pAMgr->get_try_string();
193
18.3k
  encoding = pAMgr->get_encoding();
194
18.3k
  langnum = pAMgr->get_langnum();
195
18.3k
  utf8 = pAMgr->get_utf8();
196
18.3k
  if (!utf8)
197
13.9k
    csconv = get_current_cs(encoding);
198
18.3k
  complexprefixes = pAMgr->get_complexprefixes();
199
18.3k
  wordbreak = pAMgr->get_breaktable();
200
201
  /* and finally set up the suggestion manager */
202
18.3k
  pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
203
18.3k
}
204
205
18.3k
HunspellImpl::~HunspellImpl() {
206
18.3k
  delete pSMgr;
207
18.3k
  delete pAMgr;
208
18.3k
  for (auto& m_HMgr : m_HMgrs)
209
18.3k
    delete m_HMgr;
210
18.3k
  pSMgr = NULL;
211
18.3k
  pAMgr = NULL;
212
#ifdef MOZILLA_CLIENT
213
  delete[] csconv;
214
#endif
215
18.3k
  csconv = NULL;
216
18.3k
}
217
218
// load extra dictionaries
219
0
int HunspellImpl::add_dic(const char* dpath, const char* key) {
220
0
  m_HMgrs.push_back(new HashMgr(dpath, affixpath.c_str(), key));
221
0
  return 0;
222
0
}
223
224
225
// make a copy of src at dest while removing all characters
226
// specified in IGNORE rule
227
void HunspellImpl::clean_ignore(std::string& dest,
228
7.88M
                                const std::string& src) {
229
7.88M
  dest.clear();
230
7.88M
  dest.assign(src);
231
7.88M
  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
232
7.88M
  if (ignoredchars != NULL) {
233
53.9k
    if (utf8) {
234
27.7k
      const std::vector<w_char>& ignoredchars_utf16 =
235
27.7k
          pAMgr->get_ignore_utf16();
236
27.7k
      remove_ignored_chars_utf(dest, ignoredchars_utf16);
237
27.7k
    } else {
238
26.2k
      remove_ignored_chars(dest, ignoredchars);
239
26.2k
    }
240
53.9k
  }
241
7.88M
}
242
243
244
// make a copy of src at destination while removing all leading
245
// blanks and removing any trailing periods after recording
246
// their presence with the abbreviation flag
247
// also since already going through character by character,
248
// set the capitalization type
249
// return the length of the "cleaned" (and UTF-8 encoded) word
250
251
size_t HunspellImpl::cleanword2(std::string& dest,
252
                         std::vector<w_char>& dest_utf,
253
                         const std::string& src,
254
                         int* pcaptype,
255
3.09M
                         size_t* pabbrev) {
256
3.09M
  dest.clear();
257
3.09M
  dest_utf.clear();
258
259
  // remove IGNORE characters from the string
260
3.09M
  std::string w2;
261
3.09M
  clean_ignore(w2, src);
262
263
3.09M
  const char* q = w2.c_str();
264
3.09M
  int nl = (int)w2.size();
265
266
  // first skip over any leading blanks
267
3.09M
  while (*q == ' ') {
268
6.79k
    ++q;
269
6.79k
    nl--;
270
6.79k
  }
271
  
272
  // now strip off any trailing periods (recording their presence)
273
3.09M
  *pabbrev = 0;
274
  
275
3.20M
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
276
116k
    nl--;
277
116k
    (*pabbrev)++;
278
116k
  }
279
280
  // if no characters are left it can't be capitalized
281
3.09M
  if (nl <= 0) {
282
57.9k
    *pcaptype = NOCAP;
283
57.9k
    return 0;
284
57.9k
  }
285
286
3.03M
  dest.append(q, nl);
287
3.03M
  nl = dest.size();
288
3.03M
  if (utf8) {
289
214k
    u8_u16(dest_utf, dest);
290
214k
    *pcaptype = get_captype_utf8(dest_utf, langnum);
291
2.82M
  } else {
292
2.82M
    *pcaptype = get_captype(dest, csconv);
293
2.82M
  }
294
3.03M
  return nl;
295
3.09M
}
296
297
void HunspellImpl::cleanword(std::string& dest,
298
                        const std::string& src,
299
                        int* pcaptype,
300
0
                        int* pabbrev) {
301
0
  dest.clear();
302
0
  const unsigned char* q = (const unsigned char*)src.c_str();
303
0
  int firstcap = 0, nl = (int)src.size();
304
305
  // first skip over any leading blanks
306
0
  while (*q == ' ') {
307
0
    ++q;
308
0
    nl--;
309
0
  }
310
  
311
  // now strip off any trailing periods (recording their presence)
312
0
  *pabbrev = 0;
313
  
314
0
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
315
0
    nl--;
316
0
    (*pabbrev)++;
317
0
  }
318
319
  // if no characters are left it can't be capitalized
320
0
  if (nl <= 0) {
321
0
    *pcaptype = NOCAP;
322
0
    return;
323
0
  }
324
325
  // now determine the capitalization type of the first nl letters
326
0
  int ncap = 0;
327
0
  int nneutral = 0;
328
0
  int nc = 0;
329
330
0
  if (!utf8) {
331
0
    while (nl > 0) {
332
0
      nc++;
333
0
      if (csconv[(*q)].ccase)
334
0
        ncap++;
335
0
      if (csconv[(*q)].cupper == csconv[(*q)].clower)
336
0
        nneutral++;
337
0
      dest.push_back(*q++);
338
0
      nl--;
339
0
    }
340
    // remember to terminate the destination string
341
0
    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
342
0
  } else {
343
0
    std::vector<w_char> t;
344
0
    u8_u16(t, src);
345
0
    for (auto& wc : t) {
346
0
      const auto idx = (unsigned short)wc;
347
0
      const auto low = unicodetolower(idx, langnum);
348
0
      if (idx != low)
349
0
        ncap++;
350
0
      if (unicodetoupper(idx, langnum) == low)
351
0
        nneutral++;
352
0
    }
353
0
    u16_u8(dest, t);
354
0
    if (ncap) {
355
0
      const auto idx = (unsigned short)t[0];
356
0
      firstcap = (idx != unicodetolower(idx, langnum));
357
0
    }
358
0
  }
359
360
  // now finally set the captype
361
0
  if (ncap == 0) {
362
0
    *pcaptype = NOCAP;
363
0
  } else if ((ncap == 1) && firstcap) {
364
0
    *pcaptype = INITCAP;
365
0
  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
366
0
    *pcaptype = ALLCAP;
367
0
  } else if ((ncap > 1) && firstcap) {
368
0
    *pcaptype = HUHINITCAP;
369
0
  } else {
370
0
    *pcaptype = HUHCAP;
371
0
  }
372
0
}
373
374
25.8k
void HunspellImpl::mkallcap(std::string& u8) {
375
25.8k
  if (utf8) {
376
14.0k
    std::vector<w_char> u16;
377
14.0k
    u8_u16(u16, u8);
378
14.0k
    ::mkallcap_utf(u16, langnum);
379
14.0k
    u16_u8(u8, u16);
380
14.0k
  } else {
381
11.7k
    ::mkallcap(u8, csconv);
382
11.7k
  }
383
25.8k
}
384
385
657k
int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
386
657k
  if (utf8) {
387
109k
    ::mkallsmall_utf(u16, langnum);
388
109k
    u16_u8(u8, u16);
389
547k
  } else {
390
547k
    ::mkallsmall(u8, csconv);
391
547k
  }
392
657k
  return u8.size();
393
657k
}
394
395
// convert UTF-8 sharp S codes to latin 1
396
232k
std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
397
232k
  std::string dest(source);
398
232k
  mystrrep(dest, "\xC3\x9F", "\xDF");
399
232k
  return dest;
400
232k
}
401
402
// recursive search for right ss - sharp s permutations
403
hentry* HunspellImpl::spellsharps(std::string& base,
404
                              size_t n_pos,
405
                              int n,
406
                              int repnum,
407
                              int* info,
408
561k
                              std::string* root) {
409
561k
  size_t pos = base.find("ss", n_pos);
410
561k
  if (pos != std::string::npos && (n < MAXSHARPS)) {
411
239k
    base[pos] = '\xC3';
412
239k
    base[pos + 1] = '\x9F';
413
239k
    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
414
239k
    if (h)
415
2.88k
      return h;
416
236k
    base[pos] = 's';
417
236k
    base[pos + 1] = 's';
418
236k
    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
419
236k
    if (h)
420
1.60k
      return h;
421
321k
  } else if (repnum > 0) {
422
239k
    if (utf8)
423
7.06k
      return checkword(base, info, root);
424
232k
    std::string tmp(sharps_u8_l1(base));
425
232k
    return checkword(tmp, info, root);
426
239k
  }
427
317k
  return NULL;
428
561k
}
429
430
17.7k
int HunspellImpl::is_keepcase(const hentry* rv) {
431
17.7k
  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
432
17.7k
         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
433
17.7k
}
434
435
/* insert a word to the beginning of the suggestion array */
436
1.11M
void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
437
1.11M
  slst.insert(slst.begin(), word);
438
1.11M
}
439
440
bool HunspellImpl::spell(const std::string& word, std::vector<std::string>& candidate_stack,
441
2.99M
                         int* info, std::string* root) {
442
  // something very broken if spell ends up calling itself with the same word
443
2.99M
  if (std::find(candidate_stack.begin(), candidate_stack.end(), word) != candidate_stack.end())
444
75
    return false;
445
446
2.99M
  candidate_stack.push_back(word);
447
2.99M
  bool r = spell_internal(word, candidate_stack, info, root);
448
2.99M
  candidate_stack.pop_back();
449
450
2.99M
  if (r && root) {
451
    // output conversion
452
0
    RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
453
0
    if (rl) {
454
0
      std::string wspace;
455
0
      if (rl->conv(*root, wspace)) {
456
0
        *root = wspace;
457
0
      }
458
0
    }
459
0
  }
460
2.99M
  return r;
461
2.99M
}
462
463
bool HunspellImpl::spell_internal(const std::string& word, std::vector<std::string>& candidate_stack,
464
2.99M
                                  int* info, std::string* root) {
465
2.99M
  struct hentry* rv = NULL;
466
467
2.99M
  int info2 = 0;
468
2.99M
  if (!info)
469
2.98M
    info = &info2;
470
5.58k
  else
471
5.58k
    *info = 0;
472
473
  // Hunspell supports XML input of the simplified API (see manual)
474
2.99M
  if (word == SPELL_XML)
475
656
    return true;
476
2.99M
  if (utf8) {
477
207k
    if (word.size() >= MAXWORDUTF8LEN)
478
6.21k
      return false;
479
2.78M
  } else {
480
2.78M
    if (word.size() >= MAXWORDLEN)
481
3.01k
      return false;
482
2.78M
  }
483
2.98M
  int captype = NOCAP;
484
2.98M
  size_t abbv = 0;
485
2.98M
  size_t wl = 0;
486
487
2.98M
  std::string scw;
488
2.98M
  std::vector<w_char> sunicw;
489
490
  // input conversion
491
2.98M
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
492
2.98M
  {
493
2.98M
    std::string wspace;
494
495
2.98M
    bool convstatus = rl ? rl->conv(word, wspace) : false;
496
2.98M
    if (convstatus)
497
128k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
498
2.85M
    else
499
2.85M
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
500
2.98M
  }
501
502
2.98M
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
503
2.98M
    if (wl > 32768)
504
86
      return false;
505
2.98M
#endif
506
507
#ifdef MOZILLA_CLIENT
508
  // accept the abbreviated words without dots
509
  // workaround for the incomplete tokenization of Mozilla
510
  abbv = 1;
511
#endif
512
513
2.98M
  if (wl == 0 || m_HMgrs.empty())
514
57.9k
    return true;
515
2.92M
  if (root)
516
0
    root->clear();
517
518
  // allow numbers with dots, dashes and commas (but forbid double separators:
519
  // "..", "--" etc.)
520
2.92M
  enum { NBEGIN, NNUM, NSEP };
521
2.92M
  int nstate = NBEGIN;
522
2.92M
  size_t i;
523
524
2.99M
  for (i = 0; (i < wl); i++) {
525
2.98M
    if ((scw[i] <= '9') && (scw[i] >= '0')) {
526
57.9k
      nstate = NNUM;
527
2.92M
    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
528
395k
      if ((nstate == NSEP) || (i == 0))
529
386k
        break;
530
9.28k
      nstate = NSEP;
531
9.28k
    } else
532
2.52M
      break;
533
2.98M
  }
534
2.92M
  if ((i == wl) && (nstate == NNUM))
535
10.9k
    return true;
536
537
2.91M
  switch (captype) {
538
215k
    case HUHCAP:
539
    /* FALLTHROUGH */
540
241k
    case HUHINITCAP:
541
241k
      *info |= SPELL_ORIGCAP;
542
    /* FALLTHROUGH */
543
2.58M
    case NOCAP:
544
2.58M
      rv = checkword(scw, info, root);
545
2.58M
      if ((abbv) && !(rv)) {
546
24.3k
        std::string u8buffer(scw);
547
24.3k
        u8buffer.push_back('.');
548
24.3k
        rv = checkword(u8buffer, info, root);
549
24.3k
      }
550
2.58M
      break;
551
166k
    case ALLCAP: {
552
166k
      *info |= SPELL_ORIGCAP;
553
166k
      rv = checkword(scw, info, root);
554
166k
      if (rv)
555
4.41k
        break;
556
161k
      if (abbv) {
557
14.4k
        std::string u8buffer(scw);
558
14.4k
        u8buffer.push_back('.');
559
14.4k
        rv = checkword(u8buffer, info, root);
560
14.4k
        if (rv)
561
492
          break;
562
14.4k
      }
563
      // Spec. prefix handling for Catalan, French, Italian:
564
      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
565
161k
      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
566
161k
      if (apos != std::string::npos) {
567
13.8k
        mkallsmall2(scw, sunicw);
568
        //conversion may result in string with different len to pre-mkallsmall2
569
        //so re-scan
570
13.8k
        if (apos != std::string::npos && apos < scw.size() - 1) {
571
13.1k
          std::string part1 = scw.substr(0, apos + 1), part2 = scw.substr(apos + 1);
572
13.1k
          if (utf8) {
573
5.61k
            std::vector<w_char> part1u, part2u;
574
5.61k
            u8_u16(part1u, part1);
575
5.61k
            u8_u16(part2u, part2);
576
5.61k
            mkinitcap2(part2, part2u);
577
5.61k
            scw = part1 + part2;
578
5.61k
            sunicw = part1u;
579
5.61k
            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
580
5.61k
            rv = checkword(scw, info, root);
581
5.61k
            if (rv)
582
1.56k
              break;
583
7.56k
          } else {
584
7.56k
            mkinitcap2(part2, sunicw);
585
7.56k
            scw = part1 + part2;
586
7.56k
            rv = checkword(scw, info, root);
587
7.56k
            if (rv)
588
392
              break;
589
7.56k
          }
590
11.2k
          mkinitcap2(scw, sunicw);
591
11.2k
          rv = checkword(scw, info, root);
592
11.2k
          if (rv)
593
707
            break;
594
11.2k
        }
595
13.8k
      }
596
158k
      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
597
598
42.1k
        mkallsmall2(scw, sunicw);
599
42.1k
        std::string u8buffer(scw);
600
42.1k
        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
601
42.1k
        if (!rv) {
602
40.7k
          mkinitcap2(scw, sunicw);
603
40.7k
          rv = spellsharps(scw, 0, 0, 0, info, root);
604
40.7k
        }
605
42.1k
        if ((abbv) && !(rv)) {
606
884
          u8buffer.push_back('.');
607
884
          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
608
884
          if (!rv) {
609
872
            u8buffer = std::string(scw);
610
872
            u8buffer.push_back('.');
611
872
            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
612
872
          }
613
884
        }
614
42.1k
        if (rv)
615
2.26k
          break;
616
42.1k
      }
617
158k
    }
618
      /* FALLTHROUGH */
619
319k
    case INITCAP: {
620
      // handle special capitalization of dotted I
621
319k
      bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
622
319k
      *info |= SPELL_ORIGCAP;
623
319k
      if (captype == ALLCAP) {
624
156k
          mkallsmall2(scw, sunicw);
625
156k
          mkinitcap2(scw, sunicw);
626
156k
          if (Idot)
627
1.07k
             scw.replace(0, 1, "\xc4\xb0");
628
156k
      }
629
319k
      if (captype == INITCAP)
630
162k
        *info |= SPELL_INITCAP;
631
319k
      rv = checkword(scw, info, root);
632
319k
      if (captype == INITCAP)
633
162k
        *info &= ~SPELL_INITCAP;
634
      // forbid bad capitalization
635
      // (for example, ijs -> Ijs instead of IJs in Dutch)
636
      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
637
319k
      if (*info & SPELL_FORBIDDEN) {
638
757
        rv = NULL;
639
757
        break;
640
757
      }
641
318k
      if (rv && is_keepcase(rv) && (captype == ALLCAP))
642
902
        rv = NULL;
643
318k
      if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
644
9.12k
        break;
645
646
309k
      mkallsmall2(scw, sunicw);
647
309k
      std::string u8buffer(scw);
648
309k
      mkinitcap2(scw, sunicw);
649
650
309k
      rv = checkword(u8buffer, info, root);
651
309k
      if (abbv && !rv) {
652
14.9k
        u8buffer.push_back('.');
653
14.9k
        rv = checkword(u8buffer, info, root);
654
14.9k
        if (!rv) {
655
14.5k
          u8buffer = scw;
656
14.5k
          u8buffer.push_back('.');
657
14.5k
          if (captype == INITCAP)
658
1.05k
            *info |= SPELL_INITCAP;
659
14.5k
          rv = checkword(u8buffer, info, root);
660
14.5k
          if (captype == INITCAP)
661
1.05k
            *info &= ~SPELL_INITCAP;
662
14.5k
          if (rv && is_keepcase(rv) && (captype == ALLCAP))
663
16
            rv = NULL;
664
14.5k
          break;
665
14.5k
        }
666
14.9k
      }
667
294k
      if (rv && is_keepcase(rv) &&
668
294k
          ((captype == ALLCAP) ||
669
           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
670
           // in INITCAP form, too.
671
2.62k
           !(pAMgr->get_checksharps() &&
672
1.10k
             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
673
349
              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
674
2.62k
        rv = NULL;
675
294k
      break;
676
309k
    }
677
2.91M
  }
678
679
2.91M
  if (rv) {
680
267k
    if (pAMgr && pAMgr->get_warn() && rv->astr &&
681
267k
        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
682
10.5k
      *info |= SPELL_WARN;
683
10.5k
      if (pAMgr->get_forbidwarn())
684
0
        return false;
685
10.5k
      return true;
686
10.5k
    }
687
257k
    return true;
688
267k
  }
689
690
  // recursive breaking at break points
691
2.64M
  if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
692
693
2.44M
    int nbr = 0;
694
2.44M
    wl = scw.size();
695
696
    // calculate break points for recursion limit
697
7.31M
    for (auto& j : wordbreak) {
698
7.31M
      size_t pos = 0;
699
9.71M
      while ((pos = scw.find(j, pos)) != std::string::npos) {
700
2.40M
        ++nbr;
701
2.40M
        pos += j.size();
702
2.40M
      }
703
7.31M
    }
704
2.44M
    if (nbr >= 10)
705
494
      return false;
706
707
    // check boundary patterns (^begin and end$)
708
7.30M
    for (auto& j : wordbreak) {
709
7.30M
      size_t plen = j.size();
710
7.30M
      if (plen == 1 || plen > wl)
711
3.33M
        continue;
712
713
3.96M
      if (j[0] == '^' &&
714
3.96M
          scw.compare(0, plen - 1, j, 1, plen -1) == 0 && spell(scw.substr(plen - 1), candidate_stack))
715
8.69k
      {
716
8.69k
        if (info)
717
8.69k
          *info |= SPELL_COMPOUND;
718
8.69k
        return true;
719
8.69k
      }
720
721
3.95M
      if (j[plen - 1] == '$' &&
722
3.95M
          scw.compare(wl - plen + 1, plen - 1, j, 0, plen - 1) == 0) {
723
780k
        std::string suffix(scw.substr(wl - plen + 1));
724
780k
        scw.resize(wl - plen + 1);
725
780k
        if (spell(scw, candidate_stack))
726
62.1k
        {
727
62.1k
          if (info)
728
62.1k
            *info |= SPELL_COMPOUND;
729
62.1k
          return true;
730
62.1k
        }
731
717k
        scw.append(suffix);
732
717k
      }
733
3.95M
    }
734
735
    // other patterns
736
7.07M
    for (auto& j : wordbreak) {
737
7.07M
      size_t plen = j.size();
738
7.07M
      size_t found = scw.find(j);
739
7.07M
      if ((found > 0) && (found < wl - plen)) {
740
718k
        size_t found2 = scw.find(j, found + 1);
741
        // try to break at the second occurance
742
        // to recognize dictionary words with wordbreak
743
718k
        if (found2 > 0 && (found2 < wl - plen))
744
196k
            found = found2;
745
718k
        std::string substring(scw.substr(found + plen));
746
718k
        if (!spell(substring, candidate_stack))
747
582k
          continue;
748
136k
        std::string suffix(scw.substr(found));
749
136k
        scw.resize(found);
750
        // examine 2 sides of the break point
751
136k
        if (spell(scw, candidate_stack))
752
10.2k
        {
753
10.2k
          if (info)
754
10.2k
            *info |= SPELL_COMPOUND;
755
10.2k
          return true;
756
10.2k
        }
757
126k
        scw.append(suffix);
758
759
        // LANG_hu: spec. dash rule
760
126k
        if (langnum == LANG_hu && j == "-") {
761
73.8k
          suffix = scw.substr(found + 1);
762
73.8k
          scw.resize(found + 1);
763
73.8k
          if (spell(scw, candidate_stack))
764
981
          {
765
981
            if (info)
766
981
              *info |= SPELL_COMPOUND;
767
981
            return true;  // check the first part with dash
768
981
          }
769
72.8k
          scw.append(suffix);
770
72.8k
        }
771
        // end of LANG specific region
772
126k
      }
773
7.07M
    }
774
775
    // other patterns (break at first break point)
776
7.06M
    for (auto& j : wordbreak) {
777
7.06M
      size_t plen = j.size(), found = scw.find(j);
778
7.06M
      if ((found > 0) && (found < wl - plen)) {
779
705k
        if (!spell(scw.substr(found + plen), candidate_stack))
780
578k
          continue;
781
127k
        std::string suffix(scw.substr(found));
782
127k
        scw.resize(found);
783
        // examine 2 sides of the break point
784
127k
        if (spell(scw, candidate_stack))
785
636
        {
786
636
          if (info)
787
636
            *info |= SPELL_COMPOUND;
788
636
          return true;
789
636
        }
790
126k
        scw.append(suffix);
791
792
        // LANG_hu: spec. dash rule
793
126k
        if (langnum == LANG_hu && j == "-") {
794
75.2k
          suffix = scw.substr(found + 1);
795
75.2k
          scw.resize(found + 1);
796
75.2k
          if (spell(scw, candidate_stack))
797
518
          {
798
518
            if (info)
799
518
              *info |= SPELL_COMPOUND;
800
518
            return true;  // check the first part with dash
801
518
          }
802
74.7k
          scw.append(suffix);
803
74.7k
        }
804
        // end of LANG specific region
805
126k
      }
806
7.06M
    }
807
2.36M
  }
808
809
2.56M
  return false;
810
2.64M
}
811
812
4.79M
struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
813
4.79M
  std::string word;
814
815
  // remove IGNORE characters from the string
816
4.79M
  clean_ignore(word, w);
817
818
4.79M
  if (word.empty())
819
287
    return NULL;
820
821
  // word reversing wrapper for complex prefixes
822
4.79M
  if (complexprefixes) {
823
3.42M
    if (utf8)
824
241k
      reverseword_utf(word);
825
3.17M
    else
826
3.17M
      reverseword(word);
827
3.42M
  }
828
829
4.79M
  int len = word.size();
830
831
  // look word in hash table
832
4.79M
  struct hentry* he = NULL;
833
9.54M
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
834
4.79M
    he = m_HMgrs[i]->lookup(word.c_str(), word.size());
835
836
    // check forbidden and onlyincompound words
837
4.79M
    if ((he) && (he->astr) && (pAMgr) &&
838
4.79M
        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
839
45.6k
      if (info)
840
45.6k
        *info |= SPELL_FORBIDDEN;
841
      // LANG_hu section: set dash information for suggestions
842
45.6k
      if (langnum == LANG_hu) {
843
45.4k
        if (pAMgr->get_compoundflag() &&
844
45.4k
            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
845
37.7k
          if (info)
846
37.7k
            *info |= SPELL_COMPOUND;
847
37.7k
        }
848
45.4k
      }
849
45.6k
      return NULL;
850
45.6k
    }
851
852
    // he = next not needaffix, onlyincompound homonym or onlyupcase word
853
4.75M
    while (he && (he->astr) && pAMgr &&
854
4.75M
           ((pAMgr->get_needaffix() &&
855
12.2k
             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
856
12.2k
            (pAMgr->get_onlyincompound() &&
857
10.4k
             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
858
12.2k
            (info && (*info & SPELL_INITCAP) &&
859
9.99k
             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
860
2.46k
      he = he->next_homonym;
861
4.74M
  }
862
863
  // check with affixes
864
4.74M
  if (!he && pAMgr) {
865
    // try stripping off affixes
866
4.56M
    he = pAMgr->affix_check(word, 0, len, 0);
867
868
    // check compound restriction and onlyupcase
869
4.56M
    if (he && he->astr &&
870
4.56M
        ((pAMgr->get_onlyincompound() &&
871
79.7k
          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
872
79.7k
         (info && (*info & SPELL_INITCAP) &&
873
79.4k
          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
874
1.25k
      he = NULL;
875
1.25k
    }
876
877
4.56M
    if (he) {
878
79.2k
      if ((he->astr) && (pAMgr) &&
879
79.2k
          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
880
1.88k
        if (info)
881
1.88k
          *info |= SPELL_FORBIDDEN;
882
1.88k
        return NULL;
883
1.88k
      }
884
77.4k
      if (root) {
885
0
        root->assign(he->word);
886
0
        if (complexprefixes) {
887
0
          if (utf8)
888
0
            reverseword_utf(*root);
889
0
          else
890
0
            reverseword(*root);
891
0
        }
892
0
      }
893
      // try check compound word
894
4.48M
    } else if (pAMgr->get_compound()) {
895
537k
      struct hentry* rwords[100] = {};  // buffer for COMPOUND pattern checking
896
897
      // first allow only 2 words in the compound
898
537k
      int setinfo = SPELL_COMPOUND_2;
899
537k
      if (info)
900
537k
        setinfo |= *info;
901
537k
      he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, &setinfo);
902
537k
      if (info)
903
537k
        *info = setinfo & ~SPELL_COMPOUND_2;
904
      // if not 2-word compoud word, try with 3 or more words
905
      // (only if original info didn't forbid it)
906
537k
      if (!he && info && !(*info & SPELL_COMPOUND_2)) {
907
535k
        *info &= ~SPELL_COMPOUND_2;
908
535k
        he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
909
        // accept the compound with 3 or more words only if it is
910
        // - not a dictionary word with a typo and
911
        // - not two words written separately,
912
        // - or if it's an arbitrary number accepted by compound rules (e.g. 999%)
913
535k
        if (he && !isdigit(word[0]))
914
22.5k
        {
915
22.5k
          std::vector<std::string> slst;
916
22.5k
          if (pSMgr->suggest(slst, word, NULL, /*test_simplesug=*/true))
917
20.9k
            he = NULL;
918
22.5k
        }
919
535k
      }
920
921
      // LANG_hu section: `moving rule' with last dash
922
537k
      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
923
69.8k
        std::string dup(word, 0, len - 1);
924
69.8k
        he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
925
69.8k
      }
926
      // end of LANG specific region
927
537k
      if (he) {
928
6.95k
        if (root) {
929
0
          root->assign(he->word);
930
0
          if (complexprefixes) {
931
0
            if (utf8)
932
0
              reverseword_utf(*root);
933
0
            else
934
0
              reverseword(*root);
935
0
          }
936
0
        }
937
6.95k
        if (info)
938
6.95k
          *info |= SPELL_COMPOUND;
939
6.95k
      }
940
537k
    }
941
4.56M
  }
942
943
4.74M
  return he;
944
4.74M
}
945
946
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
947
217k
#define MAX_CANDIDATE_STACK_DEPTH 512
948
#else
949
#define MAX_CANDIDATE_STACK_DEPTH 2048
950
#endif
951
952
108k
std::vector<std::string> HunspellImpl::suggest(const std::string& word, std::vector<std::string>& suggest_candidate_stack) {
953
954
108k
  if (suggest_candidate_stack.size() > MAX_CANDIDATE_STACK_DEPTH || // apply a fairly arbitrary depth limit
955
      // something very broken if suggest ends up calling itself with the same word
956
108k
      std::find(suggest_candidate_stack.begin(), suggest_candidate_stack.end(), word) != suggest_candidate_stack.end()) {
957
85
    return { };
958
85
  }
959
960
108k
  bool capwords;
961
108k
  size_t abbv;
962
108k
  int captype;
963
108k
  std::vector<std::string> spell_candidate_stack;
964
108k
  suggest_candidate_stack.push_back(word);
965
108k
  std::vector<std::string> slst = suggest_internal(word, spell_candidate_stack, suggest_candidate_stack,
966
108k
                                       capwords, abbv, captype);
967
108k
  suggest_candidate_stack.pop_back();
968
  // word reversing wrapper for complex prefixes
969
108k
  if (complexprefixes) {
970
237k
    for (auto& j : slst) {
971
237k
      if (utf8)
972
187k
        reverseword_utf(j);
973
49.7k
      else
974
49.7k
        reverseword(j);
975
237k
    }
976
25.5k
  }
977
978
  // capitalize
979
108k
  if (capwords)
980
38.0k
    for (auto& j : slst) {
981
38.0k
      mkinitcap(j);
982
38.0k
    }
983
984
  // expand suggestions with dot(s)
985
108k
  if (abbv && pAMgr && pAMgr->get_sugswithdots() && word.size() >= abbv) {
986
117k
    for (auto& j : slst) {
987
117k
      j.append(word.substr(word.size() - abbv));
988
117k
    }
989
6.01k
  }
990
991
  // remove bad capitalized and forbidden forms
992
108k
  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
993
107k
    switch (captype) {
994
7.26k
      case INITCAP:
995
32.5k
      case ALLCAP: {
996
32.5k
        size_t l = 0;
997
188k
        for (size_t j = 0; j < slst.size(); ++j) {
998
155k
          if (slst[j].find(' ') == std::string::npos && !spell(slst[j], spell_candidate_stack)) {
999
19.8k
            std::string s;
1000
19.8k
            std::vector<w_char> w;
1001
19.8k
            if (utf8) {
1002
11.8k
              u8_u16(w, slst[j]);
1003
11.8k
            } else {
1004
7.94k
              s = slst[j];
1005
7.94k
            }
1006
19.8k
            mkallsmall2(s, w);
1007
19.8k
            if (spell(s, spell_candidate_stack)) {
1008
1.06k
              slst[l] = s;
1009
1.06k
              ++l;
1010
18.7k
            } else {
1011
18.7k
              mkinitcap2(s, w);
1012
18.7k
              if (spell(s, spell_candidate_stack)) {
1013
411
                slst[l] = s;
1014
411
                ++l;
1015
411
              }
1016
18.7k
            }
1017
135k
          } else {
1018
135k
            slst[l] = slst[j];
1019
135k
            ++l;
1020
135k
          }
1021
155k
        }
1022
32.5k
        slst.resize(l);
1023
32.5k
      }
1024
107k
    }
1025
107k
  }
1026
1027
  // remove duplications
1028
108k
  size_t l = 0;
1029
1.29M
  for (size_t j = 0; j < slst.size(); ++j) {
1030
1.18M
    slst[l] = slst[j];
1031
92.3M
    for (size_t k = 0; k < l; ++k) {
1032
91.1M
      if (slst[k] == slst[j]) {
1033
13.5k
        --l;
1034
13.5k
        break;
1035
13.5k
      }
1036
91.1M
    }
1037
1.18M
    ++l;
1038
1.18M
  }
1039
108k
  slst.resize(l);
1040
1041
  // output conversion
1042
108k
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1043
108k
  if (rl) {
1044
8.03k
    for (size_t i = 0; rl && i < slst.size(); ++i) {
1045
5.88k
      std::string wspace;
1046
5.88k
      if (rl->conv(slst[i], wspace)) {
1047
1.60k
        slst[i] = wspace;
1048
1.60k
      }
1049
5.88k
    }
1050
2.15k
  }
1051
108k
  return slst;
1052
108k
}
1053
1054
16.0k
std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
1055
16.0k
  std::vector<std::string> suggest_candidate_stack;
1056
16.0k
  return suggest(word, suggest_candidate_stack);
1057
16.0k
}
1058
1059
std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
1060
        std::vector<std::string>& spell_candidate_stack,
1061
        std::vector<std::string>& suggest_candidate_stack,
1062
108k
        bool& capwords, size_t& abbv, int& captype) {
1063
108k
  captype = NOCAP;
1064
108k
  abbv = 0;
1065
108k
  capwords = false;
1066
1067
108k
  std::vector<std::string> slst;
1068
1069
108k
  int onlycmpdsug = 0;
1070
108k
  if (!pSMgr || m_HMgrs.empty())
1071
0
    return slst;
1072
1073
  // process XML input of the simplified API (see manual)
1074
108k
  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
1075
413
    return spellml(word);
1076
413
  }
1077
108k
  if (utf8) {
1078
58.6k
    if (word.size() >= MAXWORDUTF8LEN)
1079
421
      return slst;
1080
58.6k
  } else {
1081
49.7k
    if (word.size() >= MAXWORDLEN)
1082
404
      return slst;
1083
49.7k
  }
1084
107k
  size_t wl = 0;
1085
1086
107k
  std::string scw;
1087
107k
  std::vector<w_char> sunicw;
1088
1089
  // input conversion
1090
107k
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1091
107k
  {
1092
107k
    std::string wspace;
1093
1094
107k
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1095
107k
    if (convstatus)
1096
89.5k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1097
18.0k
    else
1098
18.0k
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1099
1100
107k
    if (wl == 0)
1101
0
      return slst;
1102
1103
107k
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
1104
107k
    if (wl > 32768)
1105
22
      return slst;
1106
107k
#endif
1107
107k
  }
1108
1109
107k
  bool good = false;
1110
1111
107k
  clock_t timelimit;
1112
  // initialize in every suggestion call
1113
107k
  timelimit = clock();
1114
1115
  // check capitalized form for FORCEUCASE
1116
107k
  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
1117
438
    int info = SPELL_ORIGCAP;
1118
438
    if (checkword(scw, &info, NULL)) {
1119
2
      std::string form(scw);
1120
2
      mkinitcap(form);
1121
2
      slst.push_back(form);
1122
2
      return slst;
1123
2
    }
1124
438
  }
1125
1126
107k
  switch (captype) {
1127
48.4k
    case NOCAP: {
1128
48.4k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1129
48.4k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1130
9
          return slst;
1131
48.4k
      if (abbv) {
1132
10.1k
        std::string wspace(scw);
1133
10.1k
        wspace.push_back('.');
1134
10.1k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1135
10.1k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1136
4
            return slst;
1137
10.1k
      }
1138
48.4k
      break;
1139
48.4k
    }
1140
1141
48.4k
    case INITCAP: {
1142
7.26k
      capwords = true;
1143
7.26k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1144
7.26k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1145
1
          return slst;
1146
7.26k
      std::string wspace(scw);
1147
7.26k
      mkallsmall2(wspace, sunicw);
1148
7.26k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1149
7.26k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1150
12
          return slst;
1151
7.25k
      break;
1152
7.26k
    }
1153
7.25k
    case HUHINITCAP:
1154
5.00k
      capwords = true;
1155
      /* FALLTHROUGH */
1156
26.2k
    case HUHCAP: {
1157
26.2k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1158
26.2k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1159
52
          return slst;
1160
      // something.The -> something. The
1161
26.1k
      size_t dot_pos = scw.find('.');
1162
26.1k
      if (dot_pos != std::string::npos) {
1163
11.0k
        std::string postdot = scw.substr(dot_pos + 1);
1164
11.0k
        int captype_;
1165
11.0k
        if (utf8) {
1166
2.59k
          std::vector<w_char> postdotu;
1167
2.59k
          u8_u16(postdotu, postdot);
1168
2.59k
          captype_ = get_captype_utf8(postdotu, langnum);
1169
8.44k
        } else {
1170
8.44k
          captype_ = get_captype(postdot, csconv);
1171
8.44k
        }
1172
11.0k
        if (captype_ == INITCAP) {
1173
39
          std::string str(scw);
1174
39
          str.insert(dot_pos + 1, 1, ' ');
1175
39
          insert_sug(slst, str);
1176
39
        }
1177
11.0k
      }
1178
1179
26.1k
      std::string wspace;
1180
1181
26.1k
      if (captype == HUHINITCAP) {
1182
        // TheOpenOffice.org -> The OpenOffice.org
1183
4.98k
        wspace = scw;
1184
4.98k
        mkinitsmall2(wspace, sunicw);
1185
4.98k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1186
4.98k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1187
31
            return slst;
1188
4.98k
      }
1189
26.1k
      wspace = scw;
1190
26.1k
      mkallsmall2(wspace, sunicw);
1191
26.1k
      if (spell(wspace, spell_candidate_stack))
1192
344
        insert_sug(slst, wspace);
1193
26.1k
      size_t prevns = slst.size();
1194
26.1k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1195
26.1k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1196
108
          return slst;
1197
26.0k
      if (captype == HUHINITCAP) {
1198
4.93k
        mkinitcap2(wspace, sunicw);
1199
4.93k
        if (spell(wspace, spell_candidate_stack))
1200
255
          insert_sug(slst, wspace);
1201
4.93k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1202
4.93k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1203
8
            return slst;
1204
4.93k
      }
1205
      // aNew -> "a New" (instead of "a new")
1206
38.5k
      for (size_t j = prevns; j < slst.size(); ++j) {
1207
12.5k
        const char* space = strchr(slst[j].c_str(), ' ');
1208
12.5k
        if (space) {
1209
3.18k
          size_t slen = strlen(space + 1);
1210
          // different case after space (need capitalisation)
1211
3.18k
          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
1212
2.77k
            std::string first(slst[j].c_str(), space + 1);
1213
2.77k
            std::string second(space + 1);
1214
2.77k
            std::vector<w_char> w;
1215
2.77k
            if (utf8)
1216
952
              u8_u16(w, second);
1217
2.77k
            mkinitcap2(second, w);
1218
            // set as first suggestion
1219
2.77k
            slst.erase(slst.begin() + j);
1220
2.77k
            slst.insert(slst.begin(), first + second);
1221
2.77k
          }
1222
3.18k
        }
1223
12.5k
      }
1224
26.0k
      break;
1225
26.0k
    }
1226
1227
25.5k
    case ALLCAP: {
1228
25.5k
      std::string wspace(scw);
1229
25.5k
      mkallsmall2(wspace, sunicw);
1230
25.5k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1231
25.5k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1232
13
          return slst;
1233
25.5k
      if (pAMgr && pAMgr->get_keepcase() && spell(wspace, spell_candidate_stack))
1234
370
        insert_sug(slst, wspace);
1235
25.5k
      mkinitcap2(wspace, sunicw);
1236
25.5k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1237
25.5k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1238
38
          return slst;
1239
25.5k
      for (auto& j : slst) {
1240
14.0k
        mkallcap(j);
1241
14.0k
        if (pAMgr && pAMgr->get_checksharps()) {
1242
1.07k
          if (utf8) {
1243
284
            mystrrep(j, "\xC3\x9F", "SS");
1244
793
          } else {
1245
793
            mystrrep(j, "\xDF", "SS");
1246
793
          }
1247
1.07k
        }
1248
14.0k
      }
1249
25.5k
      break;
1250
25.5k
    }
1251
107k
  }
1252
1253
  // LANG_hu section: replace '-' with ' ' in Hungarian
1254
107k
  if (langnum == LANG_hu) {
1255
11.9k
    for (auto& j : slst) {
1256
11.9k
      size_t pos = j.find('-');
1257
11.9k
      if (pos != std::string::npos) {
1258
5.58k
        int info;
1259
5.58k
        std::string w(j.substr(0, pos));
1260
5.58k
        w.append(j.substr(pos + 1));
1261
5.58k
        (void)spell(w, spell_candidate_stack, &info, NULL);
1262
5.58k
        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1263
17
          j[pos] = ' ';
1264
17
        } else
1265
5.56k
          j[pos] = '-';
1266
5.58k
      }
1267
11.9k
    }
1268
9.65k
  }
1269
  // END OF LANG_hu section
1270
  // try ngram approach since found nothing good suggestion
1271
107k
  if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1272
105k
    switch (captype) {
1273
48.1k
      case NOCAP: {
1274
48.1k
        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
1275
48.1k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1276
1
            return slst;
1277
48.1k
        break;
1278
48.1k
      }
1279
      /* FALLTHROUGH */
1280
48.1k
      case HUHINITCAP:
1281
4.57k
        capwords = true;
1282
      /* FALLTHROUGH */
1283
24.5k
      case HUHCAP: {
1284
24.5k
        std::string wspace(scw);
1285
24.5k
        mkallsmall2(wspace, sunicw);
1286
24.5k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
1287
24.5k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1288
4
            return slst;
1289
24.5k
        break;
1290
24.5k
      }
1291
24.5k
      case INITCAP: {
1292
7.23k
        capwords = true;
1293
7.23k
        std::string wspace(scw);
1294
7.23k
        mkallsmall2(wspace, sunicw);
1295
7.23k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
1296
7.23k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1297
0
            return slst;
1298
7.23k
        break;
1299
7.23k
      }
1300
25.2k
      case ALLCAP: {
1301
25.2k
        std::string wspace(scw);
1302
25.2k
        mkallsmall2(wspace, sunicw);
1303
25.2k
        size_t oldns = slst.size();
1304
25.2k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
1305
25.2k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1306
1
            return slst;
1307
37.0k
        for (size_t j = oldns; j < slst.size(); ++j) {
1308
11.7k
          mkallcap(slst[j]);
1309
11.7k
        }
1310
25.2k
        break;
1311
25.2k
      }
1312
105k
    }
1313
105k
  }
1314
1315
  // try dash suggestion (Afo-American -> Afro-American)
1316
  // Note: LibreOffice was modified to treat dashes as word
1317
  // characters to check "scot-free" etc. word forms, but
1318
  // we need to handle suggestions for "Afo-American", etc.,
1319
  // while "Afro-American" is missing from the dictionary.
1320
  // TODO avoid possible overgeneration
1321
107k
  size_t dash_pos = scw.find('-');
1322
107k
  if (dash_pos != std::string::npos) {
1323
95.1k
    int nodashsug = 1;
1324
165k
    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1325
70.3k
      if (slst[j].find('-') != std::string::npos)
1326
1.85k
        nodashsug = 0;
1327
70.3k
    }
1328
1329
95.1k
    size_t prev_pos = 0;
1330
95.1k
    bool last = false;
1331
1332
222k
    while (!good && nodashsug && !last) {
1333
144k
      if (dash_pos == scw.size())
1334
34.0k
        last = 1;
1335
144k
      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1336
144k
      if (chunk != word && !spell(chunk, spell_candidate_stack)) {
1337
92.8k
        std::vector<std::string> nlst = suggest(chunk, suggest_candidate_stack);
1338
92.8k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1339
17.0k
            return slst;
1340
1.18M
        for (auto j = nlst.rbegin(); j != nlst.rend(); ++j) {
1341
1.11M
          std::string wspace = scw.substr(0, prev_pos);
1342
1.11M
          wspace.append(*j);
1343
1.11M
          if (!last) {
1344
900k
            wspace.append("-");
1345
900k
            wspace.append(scw.substr(dash_pos + 1));
1346
900k
          }
1347
1.11M
          int info = 0;
1348
1.11M
          if (pAMgr && pAMgr->get_forbiddenword())
1349
1.08M
            checkword(wspace, &info, NULL);
1350
1.11M
          if (!(info & SPELL_FORBIDDEN))
1351
1.11M
            insert_sug(slst, wspace);
1352
1.11M
        }
1353
75.7k
        nodashsug = 0;
1354
75.7k
      }
1355
127k
      if (!last) {
1356
101k
        prev_pos = dash_pos + 1;
1357
101k
        dash_pos = scw.find('-', prev_pos);
1358
101k
      }
1359
127k
      if (dash_pos == std::string::npos)
1360
65.6k
        dash_pos = scw.size();
1361
127k
    }
1362
95.1k
  }
1363
90.1k
  return slst;
1364
107k
}
1365
1366
0
const std::string& HunspellImpl::get_dict_encoding() const {
1367
0
  return encoding;
1368
0
}
1369
1370
0
std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1371
0
  std::vector<std::string> slst;
1372
1373
0
  std::string result2;
1374
0
  if (desc.empty())
1375
0
    return slst;
1376
0
  for (const auto& i : desc) {
1377
0
    std::string result;
1378
1379
    // add compound word parts (except the last one)
1380
0
    const char* s = i.c_str();
1381
0
    const char* part = strstr(s, MORPH_PART);
1382
0
    if (part) {
1383
0
      const char* nextpart = strstr(part + 1, MORPH_PART);
1384
0
      while (nextpart) {
1385
0
        std::string field;
1386
0
        copy_field(field, part, MORPH_PART);
1387
0
        result.append(field);
1388
0
        part = nextpart;
1389
0
        nextpart = strstr(part + 1, MORPH_PART);
1390
0
      }
1391
0
      s = part;
1392
0
    }
1393
1394
0
    std::string tok(s);
1395
0
    size_t alt = 0;
1396
0
    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1397
0
      tok[alt + 1] = MSEP_ALT;
1398
0
    }
1399
0
    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1400
0
    for (auto& k : pl) {
1401
      // add derivational suffixes
1402
0
      if (k.find(MORPH_DERI_SFX) != std::string::npos) {
1403
        // remove inflectional suffixes
1404
0
        const size_t is = k.find(MORPH_INFL_SFX);
1405
0
        if (is != std::string::npos)
1406
0
          k.resize(is);
1407
0
        std::vector<std::string> singlepl;
1408
0
        singlepl.push_back(k);
1409
0
        std::string sg = pSMgr->suggest_gen(singlepl, k);
1410
0
        if (!sg.empty()) {
1411
0
          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1412
0
          for (auto& j : gen) {
1413
0
            result2.push_back(MSEP_REC);
1414
0
            result2.append(result);
1415
0
            result2.append(j);
1416
0
          }
1417
0
        }
1418
0
      } else {
1419
0
        result2.push_back(MSEP_REC);
1420
0
        result2.append(result);
1421
0
        if (k.find(MORPH_SURF_PFX) != std::string::npos) {
1422
0
          std::string field;
1423
0
          copy_field(field, k, MORPH_SURF_PFX);
1424
0
          result2.append(field);
1425
0
        }
1426
0
        std::string field;
1427
0
        copy_field(field, k, MORPH_STEM);
1428
0
        result2.append(field);
1429
0
      }
1430
0
    }
1431
0
  }
1432
0
  slst = line_tok(result2, MSEP_REC);
1433
0
  uniqlist(slst);
1434
0
  return slst;
1435
0
}
1436
1437
0
std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1438
0
  return stem(analyze(word));
1439
0
}
1440
1441
0
const std::string& HunspellImpl::get_wordchars_cpp() const {
1442
0
  return pAMgr->get_wordchars();
1443
0
}
1444
1445
0
const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1446
0
  return pAMgr->get_wordchars_utf16();
1447
0
}
1448
1449
38.0k
void HunspellImpl::mkinitcap(std::string& u8) {
1450
38.0k
  if (utf8) {
1451
9.75k
    std::vector<w_char> u16;
1452
9.75k
    u8_u16(u16, u8);
1453
9.75k
    ::mkinitcap_utf(u16, langnum);
1454
9.75k
    u16_u8(u8, u16);
1455
28.2k
  } else {
1456
28.2k
    ::mkinitcap(u8, csconv);
1457
28.2k
  }
1458
38.0k
}
1459
1460
582k
int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1461
582k
  if (utf8) {
1462
89.0k
    ::mkinitcap_utf(u16, langnum);
1463
89.0k
    u16_u8(u8, u16);
1464
493k
  } else {
1465
493k
    ::mkinitcap(u8, csconv);
1466
493k
  }
1467
582k
  return u8.size();
1468
582k
}
1469
1470
4.98k
int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1471
4.98k
  if (utf8) {
1472
2.57k
    ::mkinitsmall_utf(u16, langnum);
1473
2.57k
    u16_u8(u8, u16);
1474
2.57k
  } else {
1475
2.41k
    ::mkinitsmall(u8, csconv);
1476
2.41k
  }
1477
4.98k
  return u8.size();
1478
4.98k
}
1479
1480
0
int HunspellImpl::add(const std::string& word) {
1481
0
  if (!m_HMgrs.empty())
1482
0
    return m_HMgrs[0]->add(word);
1483
0
  return 0;
1484
0
}
1485
1486
0
int HunspellImpl::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
1487
0
  if (!m_HMgrs.empty())
1488
0
    return m_HMgrs[0]->add_with_flags(word, flags, desc);
1489
0
  return 0;
1490
0
}
1491
1492
0
int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1493
0
  if (!m_HMgrs.empty())
1494
0
    return m_HMgrs[0]->add_with_affix(word, example);
1495
0
  return 0;
1496
0
}
1497
1498
0
int HunspellImpl::remove(const std::string& word) {
1499
0
  if (!m_HMgrs.empty())
1500
0
    return m_HMgrs[0]->remove(word);
1501
0
  return 0;
1502
0
}
1503
1504
0
const std::string& HunspellImpl::get_version_cpp() const {
1505
0
  return pAMgr->get_version();
1506
0
}
1507
1508
0
struct cs_info* HunspellImpl::get_csconv() {
1509
0
  return csconv;
1510
0
}
1511
1512
0
void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1513
0
  if (!st.empty()) {
1514
0
    if (!result.empty())
1515
0
      result.append("\n");
1516
0
    result.append(st);
1517
0
  }
1518
0
}
1519
1520
0
std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1521
0
  std::vector<std::string> slst = analyze_internal(word);
1522
  // output conversion
1523
0
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1524
0
  if (rl) {
1525
0
    for (size_t i = 0; rl && i < slst.size(); ++i) {
1526
0
      std::string wspace;
1527
0
      if (rl->conv(slst[i], wspace)) {
1528
0
        slst[i] = wspace;
1529
0
      }
1530
0
    }
1531
0
  }
1532
0
  return slst;
1533
0
}
1534
1535
0
std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
1536
0
  std::vector<std::string> candidate_stack, slst;
1537
0
  if (!pSMgr || m_HMgrs.empty())
1538
0
    return slst;
1539
0
  if (utf8) {
1540
0
    if (word.size() >= MAXWORDUTF8LEN)
1541
0
      return slst;
1542
0
  } else {
1543
0
    if (word.size() >= MAXWORDLEN)
1544
0
      return slst;
1545
0
  }
1546
0
  int captype = NOCAP;
1547
0
  size_t abbv = 0;
1548
0
  size_t wl = 0;
1549
1550
0
  std::string scw;
1551
0
  std::vector<w_char> sunicw;
1552
1553
  // input conversion
1554
0
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1555
0
  {
1556
0
    std::string wspace;
1557
1558
0
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1559
0
    if (convstatus)
1560
0
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1561
0
    else
1562
0
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1563
0
  }
1564
1565
0
  if (wl == 0) {
1566
0
    if (abbv) {
1567
0
      scw.clear();
1568
0
      for (wl = 0; wl < abbv; wl++)
1569
0
        scw.push_back('.');
1570
0
      abbv = 0;
1571
0
    } else
1572
0
      return slst;
1573
0
  }
1574
1575
0
  std::string result;
1576
1577
0
  size_t n = 0;
1578
  // test numbers
1579
  // LANG_hu section: set dash information for suggestions
1580
0
  if (langnum == LANG_hu) {
1581
0
    size_t n2 = 0;
1582
0
    size_t n3 = 0;
1583
1584
0
    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1585
0
                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1586
0
      n++;
1587
0
      if ((scw[n] == '.') || (scw[n] == ',')) {
1588
0
        if (((n2 == 0) && (n > 3)) ||
1589
0
            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1590
0
          break;
1591
0
        n2++;
1592
0
        n3 = n;
1593
0
      }
1594
0
    }
1595
1596
0
    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1597
0
      return slst;
1598
0
    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1599
0
                      checkword(scw.substr(n), NULL, NULL))) {
1600
0
      result.append(scw);
1601
0
      result.resize(n - 1);
1602
0
      if (n == wl)
1603
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1604
0
      else {
1605
0
        std::string chunk = scw.substr(n - 1, 1);
1606
0
        cat_result(result, pSMgr->suggest_morph(chunk));
1607
0
        result.push_back('+');  // XXX SPEC. MORPHCODE
1608
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1609
0
      }
1610
0
      return line_tok(result, MSEP_REC);
1611
0
    }
1612
0
  }
1613
  // END OF LANG_hu section
1614
1615
0
  switch (captype) {
1616
0
    case HUHCAP:
1617
0
    case HUHINITCAP:
1618
0
    case NOCAP: {
1619
0
      cat_result(result, pSMgr->suggest_morph(scw));
1620
0
      if (abbv) {
1621
0
        std::string u8buffer(scw);
1622
0
        u8buffer.push_back('.');
1623
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1624
0
      }
1625
0
      break;
1626
0
    }
1627
0
    case INITCAP: {
1628
0
      mkallsmall2(scw, sunicw);
1629
0
      std::string u8buffer(scw);
1630
0
      mkinitcap2(scw, sunicw);
1631
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1632
0
      cat_result(result, pSMgr->suggest_morph(scw));
1633
0
      if (abbv) {
1634
0
        u8buffer.push_back('.');
1635
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1636
1637
0
        u8buffer = scw;
1638
0
        u8buffer.push_back('.');
1639
1640
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1641
0
      }
1642
0
      break;
1643
0
    }
1644
0
    case ALLCAP: {
1645
0
      cat_result(result, pSMgr->suggest_morph(scw));
1646
0
      if (abbv) {
1647
0
        std::string u8buffer(scw);
1648
0
        u8buffer.push_back('.');
1649
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1650
0
      }
1651
0
      mkallsmall2(scw, sunicw);
1652
0
      std::string u8buffer(scw);
1653
0
      mkinitcap2(scw, sunicw);
1654
1655
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1656
0
      cat_result(result, pSMgr->suggest_morph(scw));
1657
0
      if (abbv) {
1658
0
        u8buffer.push_back('.');
1659
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1660
1661
0
        u8buffer = scw;
1662
0
        u8buffer.push_back('.');
1663
1664
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1665
0
      }
1666
0
      break;
1667
0
    }
1668
0
  }
1669
1670
0
  if (!result.empty()) {
1671
    // word reversing wrapper for complex prefixes
1672
0
    if (complexprefixes) {
1673
0
      if (utf8)
1674
0
        reverseword_utf(result);
1675
0
      else
1676
0
        reverseword(result);
1677
0
    }
1678
0
    return line_tok(result, MSEP_REC);
1679
0
  }
1680
1681
  // compound word with dash (HU) I18n
1682
  // LANG_hu section: set dash information for suggestions
1683
1684
0
  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1685
0
  if (dash_pos != std::string::npos) {
1686
0
    int nresult = 0;
1687
1688
0
    std::string part1 = scw.substr(0, dash_pos), part2 = scw.substr(dash_pos + 1);
1689
1690
    // examine 2 sides of the dash
1691
0
    if (part2.empty()) {  // base word ending with dash
1692
0
      if (spell(part1, candidate_stack)) {
1693
0
        std::string p = pSMgr->suggest_morph(part1);
1694
0
        if (!p.empty()) {
1695
0
          slst = line_tok(p, MSEP_REC);
1696
0
          return slst;
1697
0
        }
1698
0
      }
1699
0
    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1700
0
      if (spell(part1, candidate_stack) && (spell("-e", candidate_stack))) {
1701
0
        std::string st = pSMgr->suggest_morph(part1);
1702
0
        if (!st.empty()) {
1703
0
          result.append(st);
1704
0
        }
1705
0
        result.push_back('+');  // XXX spec. separator in MORPHCODE
1706
0
        st = pSMgr->suggest_morph("-e");
1707
0
        if (!st.empty()) {
1708
0
          result.append(st);
1709
0
        }
1710
0
        return line_tok(result, MSEP_REC);
1711
0
      }
1712
0
    } else {
1713
      // first word ending with dash: word- XXX ???
1714
0
      part1.push_back(' ');
1715
0
      nresult = spell(part1, candidate_stack);
1716
0
      part1.erase(part1.size() - 1);
1717
0
      if (nresult && spell(part2, candidate_stack) &&
1718
0
          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1719
0
        std::string st = pSMgr->suggest_morph(part1);
1720
0
        if (!st.empty()) {
1721
0
          result.append(st);
1722
0
          result.push_back('+');  // XXX spec. separator in MORPHCODE
1723
0
        }
1724
0
        st = pSMgr->suggest_morph(part2);
1725
0
        if (!st.empty()) {
1726
0
          result.append(st);
1727
0
        }
1728
0
        return line_tok(result, MSEP_REC);
1729
0
      }
1730
0
    }
1731
    // affixed number in correct word
1732
0
    if (nresult && (dash_pos > 0) &&
1733
0
        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1734
0
         (scw[dash_pos - 1] == '.'))) {
1735
0
      n = 1;
1736
0
      if (scw[dash_pos - n] == '.')
1737
0
        n++;
1738
      // search first not a number character to left from dash
1739
0
      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1740
0
             (n < 6)) {
1741
0
        n++;
1742
0
      }
1743
0
      if (dash_pos < n)
1744
0
        n--;
1745
      // numbers: valami1000000-hoz
1746
      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1747
      // 56-hoz, 6-hoz
1748
0
      for (; n >= 1; n--) {
1749
0
        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1750
0
            continue;
1751
0
        }
1752
0
        std::string chunk = scw.substr(dash_pos - n);
1753
0
        if (checkword(chunk, NULL, NULL)) {
1754
0
          result.append(chunk);
1755
0
          std::string st = pSMgr->suggest_morph(chunk);
1756
0
          if (!st.empty()) {
1757
0
            result.append(st);
1758
0
          }
1759
0
          return line_tok(result, MSEP_REC);
1760
0
        }
1761
0
      }
1762
0
    }
1763
0
  }
1764
0
  return slst;
1765
0
}
1766
1767
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1768
0
  std::vector<std::string> slst;
1769
0
  if (!pSMgr || pl.empty())
1770
0
    return slst;
1771
0
  std::vector<std::string> pl2 = analyze(word);
1772
0
  int captype = NOCAP, abbv = 0;
1773
0
  std::string cw;
1774
0
  cleanword(cw, word, &captype, &abbv);
1775
0
  std::string result;
1776
1777
0
  for (const auto& i : pl) {
1778
0
    cat_result(result, pSMgr->suggest_gen(pl2, i));
1779
0
  }
1780
1781
0
  if (!result.empty()) {
1782
    // allcap
1783
0
    if (captype == ALLCAP)
1784
0
      mkallcap(result);
1785
1786
    // line split
1787
0
    slst = line_tok(result, MSEP_REC);
1788
1789
    // capitalize
1790
0
    if (captype == INITCAP || captype == HUHINITCAP) {
1791
0
      for (auto& str : slst) {
1792
0
        mkinitcap(str);
1793
0
      }
1794
0
    }
1795
1796
    // temporary filtering of prefix related errors (eg.
1797
    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1798
0
    auto it = slst.begin();
1799
0
    while (it != slst.end()) {
1800
0
      std::vector<std::string> candidate_stack;
1801
0
      if (!spell(*it, candidate_stack)) {
1802
0
        it = slst.erase(it);
1803
0
      } else  {
1804
0
        ++it;
1805
0
      }
1806
0
    }
1807
0
  }
1808
0
  return slst;
1809
0
}
1810
1811
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1812
0
  std::vector<std::string> pl = analyze(pattern);
1813
0
  std::vector<std::string> slst = generate(word, pl);
1814
0
  uniqlist(slst);
1815
0
  return slst;
1816
0
}
1817
1818
// minimal XML parser functions
1819
120
std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
1820
120
  std::string dest;
1821
120
  if (pos == std::string::npos)
1822
40
    return dest;
1823
80
  const char* par = in_par.c_str() + pos;
1824
80
  char end = *par;
1825
80
  if (end == '>')
1826
48
    end = '<';
1827
32
  else if (end != '\'' && end != '"')
1828
12
    return dest;  // bad XML
1829
1.18k
  for (par++; *par != '\0' && *par != end; ++par) {
1830
1.11k
    dest.push_back(*par);
1831
1.11k
  }
1832
68
  mystrrep(dest, "&lt;", "<");
1833
68
  mystrrep(dest, "&amp;", "&");
1834
68
  return dest;
1835
80
}
1836
1837
0
int HunspellImpl::get_langnum() const {
1838
0
  return langnum;
1839
0
}
1840
1841
0
bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1842
0
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1843
0
  if (rl) {
1844
0
    return rl->conv(word, dest);
1845
0
  }
1846
0
  dest.assign(word);
1847
0
  return false;
1848
0
}
1849
1850
// return the beginning of the element (attr == NULL) or the attribute
1851
120
std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
1852
120
  if (pos == std::string::npos)
1853
0
    return std::string::npos;
1854
1855
120
  std::string::size_type endpos = s.find('>', pos);
1856
120
  if (attr == NULL)
1857
0
    return endpos;
1858
152
  while (true) {
1859
152
    pos = s.find(attr, pos);
1860
152
    if (pos == std::string::npos || pos >= endpos)
1861
40
      return std::string::npos;
1862
112
    if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
1863
80
      break;
1864
32
    pos += strlen(attr);
1865
32
  }
1866
80
  return pos + strlen(attr);
1867
120
}
1868
1869
int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
1870
                                const char* attr,
1871
120
                                const char* value) {
1872
120
  const std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
1873
120
  return cw == value ? 1 : 0;
1874
120
}
1875
1876
0
std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
1877
0
  std::vector<std::string> slst;
1878
0
  if (pos == std::string::npos)
1879
0
    return slst;
1880
0
  while (true) {
1881
0
    pos = list.find(tag, pos);
1882
0
    if (pos == std::string::npos)
1883
0
        break;
1884
0
    std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
1885
0
    if (cw.empty()) {
1886
0
      break;
1887
0
    }
1888
0
    slst.push_back(cw);
1889
0
    ++pos;
1890
0
  }
1891
0
  return slst;
1892
0
}
1893
1894
413
std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1895
413
  std::vector<std::string> slst;
1896
1897
413
  std::string::size_type qpos = in_word.find("<query");
1898
413
  if (qpos == std::string::npos)
1899
333
    return slst;  // bad XML input
1900
1901
80
  std::string::size_type q2pos = in_word.find('>', qpos);
1902
80
  if (q2pos == std::string::npos)
1903
38
    return slst;  // bad XML input
1904
1905
42
  q2pos = in_word.find("<word", q2pos);
1906
42
  if (q2pos == std::string::npos)
1907
12
    return slst;  // bad XML input
1908
1909
30
  if (check_xml_par(in_word, qpos, "type=", "analyze")) {
1910
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1911
0
    if (!cw.empty())
1912
0
      slst = analyze(cw);
1913
0
    if (slst.empty())
1914
0
      return slst;
1915
    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1916
0
    std::string r;
1917
0
    r.append("<code>");
1918
0
    for (auto entry : slst) {
1919
0
      r.append("<a>");
1920
1921
0
      mystrrep(entry, "\t", " ");
1922
0
      mystrrep(entry, "&", "&amp;");
1923
0
      mystrrep(entry, "<", "&lt;");
1924
0
      r.append(entry);
1925
1926
0
      r.append("</a>");
1927
0
    }
1928
0
    r.append("</code>");
1929
0
    slst.clear();
1930
0
    slst.push_back(r);
1931
0
    return slst;
1932
30
  } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
1933
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1934
0
    if (!cw.empty())
1935
0
      return stem(cw);
1936
30
  } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
1937
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1938
0
    if (cw.empty())
1939
0
      return slst;
1940
0
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1941
0
    if (q3pos != std::string::npos) {
1942
0
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1943
0
      if (!cw2.empty()) {
1944
0
        return generate(cw, cw2);
1945
0
      }
1946
0
    } else {
1947
0
      q2pos = in_word.find("<code", q2pos + 1);
1948
0
      if (q2pos != std::string::npos) {
1949
0
        std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
1950
0
        if (!slst2.empty()) {
1951
0
          slst = generate(cw, slst2);
1952
0
          uniqlist(slst);
1953
0
          return slst;
1954
0
        }
1955
0
      }
1956
0
    }
1957
30
  } else if (check_xml_par(in_word, qpos, "type=", "add")) {
1958
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1959
0
    if (cw.empty())
1960
0
      return slst;
1961
0
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1962
0
    if (q3pos != std::string::npos) {
1963
0
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1964
0
      if (!cw2.empty()) {
1965
0
        add_with_affix(cw, cw2);
1966
0
      } else {
1967
0
        add(cw);
1968
0
      }
1969
0
    } else {
1970
0
        add(cw);
1971
0
    }
1972
0
  }
1973
30
  return slst;
1974
30
}
1975
1976
0
std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1977
0
  std::vector<std::string> slst;
1978
0
  struct hentry* he = NULL;
1979
0
  int len;
1980
0
  std::string w2;
1981
0
  const char* word;
1982
0
  const char* ignoredchars = pAMgr->get_ignore();
1983
0
  if (ignoredchars != NULL) {
1984
0
    w2.assign(root_word);
1985
0
    if (utf8) {
1986
0
      const std::vector<w_char>& ignoredchars_utf16 =
1987
0
          pAMgr->get_ignore_utf16();
1988
0
      remove_ignored_chars_utf(w2, ignoredchars_utf16);
1989
0
    } else {
1990
0
      remove_ignored_chars(w2, ignoredchars);
1991
0
    }
1992
0
    word = w2.c_str();
1993
0
    len = (int)w2.size();
1994
0
  } else {
1995
0
    word = root_word.c_str();
1996
0
    len = (int)root_word.size();
1997
0
  }
1998
1999
0
  if (!len)
2000
0
    return slst;
2001
2002
0
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2003
0
    he = m_HMgrs[i]->lookup(word, len);
2004
0
  }
2005
0
  if (he) {
2006
0
    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word);
2007
0
  }
2008
0
  return slst;
2009
0
}
2010
2011
namespace {
2012
  // using malloc because this is for the c-api where the callers
2013
  // expect to be able to use free
2014
0
  char* stringdup(const std::string& s) {
2015
0
    size_t sl = s.size() + 1;
2016
0
    char* d = (char*)malloc(sl);
2017
0
    if (d)
2018
0
      memcpy(d, s.c_str(), sl);
2019
0
    return d;
2020
0
  }
2021
2022
0
  int munge_vector(char*** slst, const std::vector<std::string>& items) {
2023
0
    if (items.empty()) {
2024
0
      *slst = NULL;
2025
0
      return 0;
2026
0
    } else {
2027
0
      *slst = new char*[items.size()];
2028
0
      for (size_t i = 0; i < items.size(); ++i)
2029
0
        (*slst)[i] = stringdup(items[i]);
2030
0
    }
2031
0
    return items.size();
2032
0
  }
2033
}
2034
2035
0
int HunspellImpl::spell(const char* word, int* info, char** root) {
2036
0
  std::string sroot;
2037
0
  std::vector<std::string> candidate_stack;
2038
0
  bool ret = spell(word, candidate_stack, info, root ? &sroot : NULL);
2039
0
  if (root) {
2040
0
    if (sroot.empty()) {
2041
0
      *root = NULL;
2042
0
    } else {
2043
0
      *root = stringdup(sroot);
2044
0
    }
2045
0
  }
2046
0
  return ret;
2047
0
}
2048
2049
0
int HunspellImpl::suggest(char*** slst, const char* word) {
2050
0
  std::vector<std::string> suggests = suggest(word);
2051
0
  return munge_vector(slst, suggests);
2052
0
}
2053
2054
0
int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
2055
0
  std::vector<std::string> stems = suffix_suggest(root_word);
2056
0
  return munge_vector(slst, stems);
2057
0
}
2058
2059
0
void HunspellImpl::free_list(char*** slst, int n) {
2060
0
  if (slst && *slst) {
2061
0
    for (int i = 0; i < n; i++)
2062
0
      free((*slst)[i]);
2063
0
    delete[] *slst;
2064
0
    *slst = NULL;
2065
0
  }
2066
0
}
2067
2068
0
char* HunspellImpl::get_dic_encoding() {
2069
0
  return &encoding[0];
2070
0
}
2071
2072
0
int HunspellImpl::analyze(char*** slst, const char* word) {
2073
0
  std::vector<std::string> stems = analyze(word);
2074
0
  return munge_vector(slst, stems);
2075
0
}
2076
2077
0
int HunspellImpl::stem(char*** slst, const char* word) {
2078
0
  std::vector<std::string> stems = stem(word);
2079
0
  return munge_vector(slst, stems);
2080
0
}
2081
2082
0
int HunspellImpl::stem(char*** slst, char** desc, int n) {
2083
0
  std::vector<std::string> morph;
2084
0
  morph.reserve(n);
2085
0
  for (int i = 0; i < n; ++i) morph.emplace_back(desc[i]);
2086
2087
0
  std::vector<std::string> stems = stem(morph);
2088
0
  return munge_vector(slst, stems);
2089
0
}
2090
2091
0
int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
2092
0
  std::vector<std::string> stems = generate(word, pattern);
2093
0
  return munge_vector(slst, stems);
2094
0
}
2095
2096
0
int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
2097
0
  std::vector<std::string> morph;
2098
0
  morph.reserve(pln);
2099
0
  for (int i = 0; i < pln; ++i) morph.emplace_back(pl[i]);
2100
2101
0
  std::vector<std::string> stems = generate(word, morph);
2102
0
  return munge_vector(slst, stems);
2103
0
}
2104
2105
0
const char* HunspellImpl::get_wordchars() const {
2106
0
  return get_wordchars_cpp().c_str();
2107
0
}
2108
2109
0
const char* HunspellImpl::get_version() const {
2110
0
  return get_version_cpp().c_str();
2111
0
}
2112
2113
0
int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
2114
0
  std::string d;
2115
0
  bool ret = input_conv(word, d);
2116
0
  if (ret && d.size() < destsize) {
2117
0
    strncpy(dest, d.c_str(), destsize);
2118
0
    return 1;
2119
0
  }
2120
0
  return 0;
2121
0
}
2122
2123
Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
2124
18.3k
  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
2125
18.3k
}
2126
2127
18.3k
Hunspell::~Hunspell() {
2128
18.3k
  delete m_Impl;
2129
18.3k
}
2130
2131
// load extra dictionaries
2132
0
int Hunspell::add_dic(const char* dpath, const char* key) {
2133
0
  return m_Impl->add_dic(dpath, key);
2134
0
}
2135
2136
18.3k
bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
2137
18.3k
  std::vector<std::string> candidate_stack;
2138
18.3k
  return m_Impl->spell(word, candidate_stack, info, root);
2139
18.3k
}
2140
2141
16.0k
std::vector<std::string> Hunspell::suggest(const std::string& word) {
2142
16.0k
  return m_Impl->suggest(word);
2143
16.0k
}
2144
2145
0
std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
2146
0
  return m_Impl->suffix_suggest(root_word);
2147
0
}
2148
2149
0
const std::string& Hunspell::get_dict_encoding() const {
2150
0
  return m_Impl->get_dict_encoding();
2151
0
}
2152
2153
0
std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
2154
0
  return m_Impl->stem(desc);
2155
0
}
2156
2157
0
std::vector<std::string> Hunspell::stem(const std::string& word) {
2158
0
  return m_Impl->stem(word);
2159
0
}
2160
2161
0
const std::string& Hunspell::get_wordchars_cpp() const {
2162
0
  return m_Impl->get_wordchars_cpp();
2163
0
}
2164
2165
0
const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
2166
0
  return m_Impl->get_wordchars_utf16();
2167
0
}
2168
2169
0
int Hunspell::add(const std::string& word) {
2170
0
  return m_Impl->add(word);
2171
0
}
2172
2173
0
int Hunspell::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
2174
0
  return m_Impl->add_with_flags(word, flags, desc);
2175
0
}
2176
2177
0
int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
2178
0
  return m_Impl->add_with_affix(word, example);
2179
0
}
2180
2181
0
int Hunspell::remove(const std::string& word) {
2182
0
  return m_Impl->remove(word);
2183
0
}
2184
2185
0
const std::string& Hunspell::get_version_cpp() const {
2186
0
  return m_Impl->get_version_cpp();
2187
0
}
2188
2189
0
struct cs_info* Hunspell::get_csconv() {
2190
0
  return m_Impl->get_csconv();
2191
0
}
2192
2193
0
std::vector<std::string> Hunspell::analyze(const std::string& word) {
2194
0
  return m_Impl->analyze(word);
2195
0
}
2196
2197
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
2198
0
  return m_Impl->generate(word, pl);
2199
0
}
2200
2201
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
2202
0
  return m_Impl->generate(word, pattern);
2203
0
}
2204
2205
0
int Hunspell::get_langnum() const {
2206
0
  return m_Impl->get_langnum();
2207
0
}
2208
2209
0
bool Hunspell::input_conv(const std::string& word, std::string& dest) {
2210
0
  return m_Impl->input_conv(word, dest);
2211
0
}
2212
2213
0
int Hunspell::spell(const char* word, int* info, char** root) {
2214
0
  return m_Impl->spell(word, info, root);
2215
0
}
2216
2217
0
int Hunspell::suggest(char*** slst, const char* word) {
2218
0
  return m_Impl->suggest(slst, word);
2219
0
}
2220
2221
0
int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
2222
0
  return m_Impl->suffix_suggest(slst, root_word);
2223
0
}
2224
2225
0
void Hunspell::free_list(char*** slst, int n) {
2226
0
  m_Impl->free_list(slst, n);
2227
0
}
2228
2229
0
char* Hunspell::get_dic_encoding() {
2230
0
  return m_Impl->get_dic_encoding();
2231
0
}
2232
2233
0
int Hunspell::analyze(char*** slst, const char* word) {
2234
0
  return m_Impl->analyze(slst, word);
2235
0
}
2236
2237
0
int Hunspell::stem(char*** slst, const char* word) {
2238
0
  return m_Impl->stem(slst, word);
2239
0
}
2240
2241
0
int Hunspell::stem(char*** slst, char** desc, int n) {
2242
0
  return m_Impl->stem(slst, desc, n);
2243
0
}
2244
2245
0
int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
2246
0
  return m_Impl->generate(slst, word, pattern);
2247
0
}
2248
2249
0
int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
2250
0
  return m_Impl->generate(slst, word, pl, pln);
2251
0
}
2252
2253
0
const char* Hunspell::get_wordchars() const {
2254
0
  return m_Impl->get_wordchars();
2255
0
}
2256
2257
0
const char* Hunspell::get_version() const {
2258
0
  return m_Impl->get_version();
2259
0
}
2260
2261
0
int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
2262
0
  return m_Impl->input_conv(word, dest, destsize);
2263
0
}
2264
2265
0
Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
2266
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
2267
0
}
2268
2269
Hunhandle* Hunspell_create_key(const char* affpath,
2270
                               const char* dpath,
2271
0
                               const char* key) {
2272
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
2273
0
}
2274
2275
0
void Hunspell_destroy(Hunhandle* pHunspell) {
2276
0
  delete reinterpret_cast<HunspellImpl*>(pHunspell);
2277
0
}
2278
2279
0
int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
2280
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
2281
0
}
2282
2283
0
int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
2284
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
2285
0
}
2286
2287
0
char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
2288
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
2289
0
}
2290
2291
0
int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
2292
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
2293
0
}
2294
2295
0
int Hunspell_suffix_suggest(Hunhandle* pHunspell, char*** slst, const char* root_word) {
2296
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suffix_suggest(slst, root_word);
2297
0
}
2298
2299
0
int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
2300
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
2301
0
}
2302
2303
0
int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
2304
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
2305
0
}
2306
2307
0
int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
2308
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
2309
0
}
2310
2311
int Hunspell_generate(Hunhandle* pHunspell,
2312
                      char*** slst,
2313
                      const char* word,
2314
                      const char* pattern)
2315
0
{
2316
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
2317
0
}
2318
2319
int Hunspell_generate2(Hunhandle* pHunspell,
2320
                       char*** slst,
2321
                       const char* word,
2322
                       char** desc,
2323
                       int n)
2324
0
{
2325
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
2326
0
}
2327
2328
/* functions for run-time modification of the dictionary */
2329
2330
/* add word to the run-time dictionary */
2331
2332
0
int Hunspell_add(Hunhandle* pHunspell, const char* word) {
2333
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
2334
0
}
2335
2336
0
int Hunspell_add_with_flags(Hunhandle* pHunspell, const char* word, const char* flags, const char* desc) {
2337
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_flags(word, flags, desc);
2338
0
}
2339
2340
/* add word to the run-time dictionary with affix flags of
2341
 * the example (a dictionary word): Hunspell will recognize
2342
 * affixed forms of the new word, too.
2343
 */
2344
2345
int Hunspell_add_with_affix(Hunhandle* pHunspell,
2346
                            const char* word,
2347
0
                            const char* example) {
2348
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
2349
0
}
2350
2351
/* remove word from the run-time dictionary */
2352
2353
0
int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
2354
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
2355
0
}
2356
2357
0
void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
2358
0
  reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
2359
0
}