Coverage Report

Created: 2025-10-05 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hunspell/src/hunspell/hunspell.cxx
Line
Count
Source
1
/* ***** BEGIN LICENSE BLOCK *****
2
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3
 *
4
 * Copyright (C) 2002-2022 Németh László
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version
7
 * 1.1 (the "License"); you may not use this file except in compliance with
8
 * the License. You may obtain a copy of the License at
9
 * http://www.mozilla.org/MPL/
10
 *
11
 * Software distributed under the License is distributed on an "AS IS" basis,
12
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
 * for the specific language governing rights and limitations under the
14
 * License.
15
 *
16
 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17
 *
18
 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19
 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20
 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21
 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22
 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23
 *
24
 * Alternatively, the contents of this file may be used under the terms of
25
 * either the GNU General Public License Version 2 or later (the "GPL"), or
26
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
 * in which case the provisions of the GPL or the LGPL are applicable instead
28
 * of those above. If you wish to allow use of your version of this file only
29
 * under the terms of either the GPL or the LGPL, and not to allow others to
30
 * use your version of this file under the terms of the MPL, indicate your
31
 * decision by deleting the provisions above and replace them with the notice
32
 * and other provisions required by the GPL or the LGPL. If you do not delete
33
 * the provisions above, a recipient may use your version of this file under
34
 * the terms of any one of the MPL, the GPL or the LGPL.
35
 *
36
 * ***** END LICENSE BLOCK ***** */
37
/*
38
 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39
 * And Contributors.  All rights reserved.
40
 *
41
 * Redistribution and use in source and binary forms, with or without
42
 * modification, are permitted provided that the following conditions
43
 * are met:
44
 *
45
 * 1. Redistributions of source code must retain the above copyright
46
 *    notice, this list of conditions and the following disclaimer.
47
 *
48
 * 2. Redistributions in binary form must reproduce the above copyright
49
 *    notice, this list of conditions and the following disclaimer in the
50
 *    documentation and/or other materials provided with the distribution.
51
 *
52
 * 3. All modifications to the source code must be clearly marked as
53
 *    such.  Binary redistributions based on modified source code
54
 *    must be clearly marked as modified versions in the documentation
55
 *    and/or other materials provided with the distribution.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68
 * SUCH DAMAGE.
69
 */
70
71
#include <cstdlib>
72
#include <cstring>
73
#include <cstdio>
74
#include <ctime>
75
76
#include "affixmgr.hxx"
77
#include "hunspell.hxx"
78
#include "suggestmgr.hxx"
79
#include "hunspell.h"
80
#include "csutil.hxx"
81
82
#include <limits>
83
#include <string>
84
85
215k
#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
86
87
class HunspellImpl
88
{
89
public:
90
  HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
91
  HunspellImpl(const HunspellImpl&) = delete;
92
  HunspellImpl& operator=(const HunspellImpl&) = delete;
93
  ~HunspellImpl();
94
  int add_dic(const char* dpath, const char* key = NULL);
95
  std::vector<std::string> suffix_suggest(const std::string& root_word);
96
  std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
97
  std::vector<std::string> generate(const std::string& word, const std::string& pattern);
98
  std::vector<std::string> stem(const std::string& word);
99
  std::vector<std::string> stem(const std::vector<std::string>& morph);
100
  std::vector<std::string> analyze(const std::string& word);
101
  int get_langnum() const;
102
  bool input_conv(const std::string& word, std::string& dest);
103
  bool spell(const std::string& word, std::vector<std::string>& candidate_stack,
104
             int* info = NULL, std::string* root = NULL);
105
  std::vector<std::string> suggest(const std::string& word);
106
  std::vector<std::string> suggest(const std::string& word, std::vector<std::string>& suggest_candidate_stack);
107
  const std::string& get_wordchars_cpp() const;
108
  const std::vector<w_char>& get_wordchars_utf16() const;
109
  const std::string& get_dict_encoding() const;
110
  int add(const std::string& word);
111
  int add_with_flags(const std::string& word, const std::string& flags, const std::string& desc = NULL);
112
  int add_with_affix(const std::string& word, const std::string& example);
113
  int remove(const std::string& word);
114
  const std::string& get_version_cpp() const;
115
  const struct cs_info* get_csconv() const;
116
117
  int spell(const char* word, int* info = NULL, char** root = NULL);
118
  int suggest(char*** slst, const char* word);
119
  int suffix_suggest(char*** slst, const char* root_word);
120
  void free_list(char*** slst, int n);
121
  char* get_dic_encoding();
122
  int analyze(char*** slst, const char* word);
123
  int stem(char*** slst, const char* word);
124
  int stem(char*** slst, char** morph, int n);
125
  int generate(char*** slst, const char* word, const char* word2);
126
  int generate(char*** slst, const char* word, char** desc, int n);
127
  const char* get_wordchars() const;
128
  const char* get_version() const;
129
  int input_conv(const char* word, char* dest, size_t destsize);
130
131
private:
132
  AffixMgr* pAMgr;
133
  std::vector<HashMgr*> m_HMgrs;
134
  SuggestMgr* pSMgr;
135
  std::string affixpath;
136
  std::string encoding;
137
  const struct cs_info* csconv;
138
  int langnum;
139
  int utf8;
140
  int complexprefixes;
141
  std::vector<std::string> wordbreak;
142
143
private:
144
  std::vector<std::string> analyze_internal(const std::string& word);
145
  bool spell_internal(const std::string& word, std::vector<std::string>& candidate_stack,
146
                      int* info = NULL, std::string* root = NULL);
147
  std::vector<std::string> suggest_internal(const std::string& word,
148
                                            std::vector<std::string>& spell_candidate_stack,
149
                                            std::vector<std::string>& suggest_candidate_stack,
150
                                            bool& capitalized, size_t& abbreviated, int& captype);
151
  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
152
  size_t cleanword2(std::string& dest,
153
                    std::vector<w_char>& dest_u,
154
                    const std::string& src,
155
                    int* pcaptype,
156
                    size_t* pabbrev);
157
  void clean_ignore(std::string& dest, const std::string& src);
158
  void mkinitcap(std::string& u8);
159
  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
160
  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
161
  void mkallcap(std::string& u8);
162
  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
163
  struct hentry* checkword(const std::string& source, int* info, std::string* root);
164
  std::string sharps_u8_l1(const std::string& source);
165
  hentry*
166
  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
167
  int is_keepcase(const hentry* rv);
168
  void insert_sug(std::vector<std::string>& slst, const std::string& word);
169
  void cat_result(std::string& result, const std::string& st);
170
  std::vector<std::string> spellml(const std::string& word);
171
  std::string get_xml_par(const std::string& par, std::string::size_type pos);
172
  std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
173
  std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
174
  int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
175
};
176
177
HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key)
178
14.2k
  : affixpath(affpath) {
179
14.2k
  csconv = NULL;
180
14.2k
  utf8 = 0;
181
14.2k
  complexprefixes = 0;
182
183
  /* first set up the hash manager */
184
14.2k
  m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
185
186
  /* next set up the affix manager */
187
  /* it needs access to the hash manager lookup methods */
188
14.2k
  pAMgr = new AffixMgr(affpath, m_HMgrs, key);
189
190
  /* get the preferred try string and the dictionary */
191
  /* encoding from the Affix Manager for that dictionary */
192
14.2k
  std::string try_string = pAMgr->get_try_string();
193
14.2k
  encoding = pAMgr->get_encoding();
194
14.2k
  langnum = pAMgr->get_langnum();
195
14.2k
  utf8 = pAMgr->get_utf8();
196
14.2k
  if (!utf8)
197
11.1k
    csconv = get_current_cs(encoding);
198
14.2k
  complexprefixes = pAMgr->get_complexprefixes();
199
14.2k
  wordbreak = pAMgr->get_breaktable();
200
201
  /* and finally set up the suggestion manager */
202
14.2k
  pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
203
14.2k
}
204
205
14.2k
HunspellImpl::~HunspellImpl() {
206
14.2k
  delete pSMgr;
207
14.2k
  delete pAMgr;
208
14.2k
  for (auto& m_HMgr : m_HMgrs)
209
14.2k
    delete m_HMgr;
210
14.2k
  pSMgr = NULL;
211
14.2k
  pAMgr = NULL;
212
#ifdef MOZILLA_CLIENT
213
  delete[] csconv;
214
#endif
215
14.2k
  csconv = NULL;
216
14.2k
}
217
218
// load extra dictionaries
219
0
int HunspellImpl::add_dic(const char* dpath, const char* key) {
220
0
  m_HMgrs.push_back(new HashMgr(dpath, affixpath.c_str(), key));
221
0
  return 0;
222
0
}
223
224
225
// make a copy of src at dest while removing all characters
226
// specified in IGNORE rule
227
void HunspellImpl::clean_ignore(std::string& dest,
228
5.22M
                                const std::string& src) {
229
5.22M
  dest.clear();
230
5.22M
  dest.assign(src);
231
5.22M
  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
232
5.22M
  if (ignoredchars != NULL) {
233
27.5k
    if (utf8) {
234
3.65k
      const std::vector<w_char>& ignoredchars_utf16 =
235
3.65k
          pAMgr->get_ignore_utf16();
236
3.65k
      remove_ignored_chars_utf(dest, ignoredchars_utf16);
237
23.9k
    } else {
238
23.9k
      remove_ignored_chars(dest, ignoredchars);
239
23.9k
    }
240
27.5k
  }
241
5.22M
}
242
243
244
// make a copy of src at destination while removing all leading
245
// blanks and removing any trailing periods after recording
246
// their presence with the abbreviation flag
247
// also since already going through character by character,
248
// set the capitalization type
249
// return the length of the "cleaned" (and UTF-8 encoded) word
250
251
size_t HunspellImpl::cleanword2(std::string& dest,
252
                         std::vector<w_char>& dest_utf,
253
                         const std::string& src,
254
                         int* pcaptype,
255
1.76M
                         size_t* pabbrev) {
256
1.76M
  dest.clear();
257
1.76M
  dest_utf.clear();
258
259
  // remove IGNORE characters from the string
260
1.76M
  std::string w2;
261
1.76M
  clean_ignore(w2, src);
262
263
1.76M
  const char* q = w2.c_str();
264
1.76M
  int nl = (int)w2.size();
265
266
  // first skip over any leading blanks
267
1.76M
  while (*q == ' ') {
268
5.07k
    ++q;
269
5.07k
    nl--;
270
5.07k
  }
271
  
272
  // now strip off any trailing periods (recording their presence)
273
1.76M
  *pabbrev = 0;
274
  
275
1.87M
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
276
110k
    nl--;
277
110k
    (*pabbrev)++;
278
110k
  }
279
280
  // if no characters are left it can't be capitalized
281
1.76M
  if (nl <= 0) {
282
45.2k
    *pcaptype = NOCAP;
283
45.2k
    return 0;
284
45.2k
  }
285
286
1.71M
  dest.append(q, nl);
287
1.71M
  nl = dest.size();
288
1.71M
  if (utf8) {
289
177k
    u8_u16(dest_utf, dest);
290
177k
    *pcaptype = get_captype_utf8(dest_utf, langnum);
291
1.54M
  } else {
292
1.54M
    *pcaptype = get_captype(dest, csconv);
293
1.54M
  }
294
1.71M
  return nl;
295
1.76M
}
296
297
void HunspellImpl::cleanword(std::string& dest,
298
                        const std::string& src,
299
                        int* pcaptype,
300
0
                        int* pabbrev) {
301
0
  dest.clear();
302
0
  const unsigned char* q = (const unsigned char*)src.c_str();
303
0
  int firstcap = 0, nl = (int)src.size();
304
305
  // first skip over any leading blanks
306
0
  while (*q == ' ') {
307
0
    ++q;
308
0
    nl--;
309
0
  }
310
  
311
  // now strip off any trailing periods (recording their presence)
312
0
  *pabbrev = 0;
313
  
314
0
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
315
0
    nl--;
316
0
    (*pabbrev)++;
317
0
  }
318
319
  // if no characters are left it can't be capitalized
320
0
  if (nl <= 0) {
321
0
    *pcaptype = NOCAP;
322
0
    return;
323
0
  }
324
325
  // now determine the capitalization type of the first nl letters
326
0
  int ncap = 0;
327
0
  int nneutral = 0;
328
0
  int nc = 0;
329
330
0
  if (!utf8) {
331
0
    while (nl > 0) {
332
0
      nc++;
333
0
      if (csconv[(*q)].ccase)
334
0
        ncap++;
335
0
      if (csconv[(*q)].cupper == csconv[(*q)].clower)
336
0
        nneutral++;
337
0
      dest.push_back(*q++);
338
0
      nl--;
339
0
    }
340
    // remember to terminate the destination string
341
0
    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
342
0
  } else {
343
0
    std::vector<w_char> t;
344
0
    u8_u16(t, src);
345
0
    for (auto& wc : t) {
346
0
      const auto idx = (unsigned short)wc;
347
0
      const auto low = unicodetolower(idx, langnum);
348
0
      if (idx != low)
349
0
        ncap++;
350
0
      if (unicodetoupper(idx, langnum) == low)
351
0
        nneutral++;
352
0
    }
353
0
    u16_u8(dest, t);
354
0
    if (ncap) {
355
0
      const auto idx = (unsigned short)t[0];
356
0
      firstcap = (idx != unicodetolower(idx, langnum));
357
0
    }
358
0
  }
359
360
  // now finally set the captype
361
0
  if (ncap == 0) {
362
0
    *pcaptype = NOCAP;
363
0
  } else if ((ncap == 1) && firstcap) {
364
0
    *pcaptype = INITCAP;
365
0
  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
366
0
    *pcaptype = ALLCAP;
367
0
  } else if ((ncap > 1) && firstcap) {
368
0
    *pcaptype = HUHINITCAP;
369
0
  } else {
370
0
    *pcaptype = HUHCAP;
371
0
  }
372
0
}
373
374
23.7k
void HunspellImpl::mkallcap(std::string& u8) {
375
23.7k
  if (utf8) {
376
14.7k
    std::vector<w_char> u16;
377
14.7k
    u8_u16(u16, u8);
378
14.7k
    ::mkallcap_utf(u16, langnum);
379
14.7k
    u16_u8(u8, u16);
380
14.7k
  } else {
381
9.04k
    ::mkallcap(u8, csconv);
382
9.04k
  }
383
23.7k
}
384
385
427k
int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
386
427k
  if (utf8) {
387
117k
    ::mkallsmall_utf(u16, langnum);
388
117k
    u16_u8(u8, u16);
389
310k
  } else {
390
310k
    ::mkallsmall(u8, csconv);
391
310k
  }
392
427k
  return u8.size();
393
427k
}
394
395
// convert UTF-8 sharp S codes to latin 1
396
101k
std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
397
101k
  std::string dest(source);
398
101k
  mystrrep(dest, "\xC3\x9F", "\xDF");
399
101k
  return dest;
400
101k
}
401
402
// recursive search for right ss - sharp s permutations
403
hentry* HunspellImpl::spellsharps(std::string& base,
404
                              size_t n_pos,
405
                              int n,
406
                              int repnum,
407
                              int* info,
408
245k
                              std::string* root) {
409
245k
  size_t pos = base.find("ss", n_pos);
410
245k
  if (pos != std::string::npos && (n < MAXSHARPS)) {
411
104k
    base[pos] = '\xC3';
412
104k
    base[pos + 1] = '\x9F';
413
104k
    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
414
104k
    if (h)
415
1.15k
      return h;
416
103k
    base[pos] = 's';
417
103k
    base[pos + 1] = 's';
418
103k
    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
419
103k
    if (h)
420
578
      return h;
421
141k
  } else if (repnum > 0) {
422
103k
    if (utf8)
423
2.85k
      return checkword(base, info, root);
424
101k
    std::string tmp(sharps_u8_l1(base));
425
101k
    return checkword(tmp, info, root);
426
103k
  }
427
140k
  return NULL;
428
245k
}
429
430
17.2k
int HunspellImpl::is_keepcase(const hentry* rv) {
431
17.2k
  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
432
7.29k
         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
433
17.2k
}
434
435
/* insert a word to the beginning of the suggestion array */
436
1.38M
void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
437
1.38M
  slst.insert(slst.begin(), word);
438
1.38M
}
439
440
bool HunspellImpl::spell(const std::string& word, std::vector<std::string>& candidate_stack,
441
1.68M
                         int* info, std::string* root) {
442
  // something very broken if spell ends up calling itself with the same word
443
1.68M
  if (std::find(candidate_stack.begin(), candidate_stack.end(), word) != candidate_stack.end())
444
70
    return false;
445
446
1.68M
  candidate_stack.push_back(word);
447
1.68M
  bool r = spell_internal(word, candidate_stack, info, root);
448
1.68M
  candidate_stack.pop_back();
449
450
1.68M
  if (r && root) {
451
    // output conversion
452
0
    RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
453
0
    if (rl) {
454
0
      std::string wspace;
455
0
      if (rl->conv(*root, wspace)) {
456
0
        *root = wspace;
457
0
      }
458
0
    }
459
0
  }
460
1.68M
  return r;
461
1.68M
}
462
463
bool HunspellImpl::spell_internal(const std::string& word, std::vector<std::string>& candidate_stack,
464
1.68M
                                  int* info, std::string* root) {
465
1.68M
  struct hentry* rv = NULL;
466
467
1.68M
  int info2 = 0;
468
1.68M
  if (!info)
469
1.68M
    info = &info2;
470
3.94k
  else
471
3.94k
    *info = 0;
472
473
  // Hunspell supports XML input of the simplified API (see manual)
474
1.68M
  if (word == SPELL_XML)
475
1.09k
    return true;
476
1.68M
  if (utf8) {
477
165k
    if (word.size() >= MAXWORDUTF8LEN)
478
4.47k
      return false;
479
1.51M
  } else {
480
1.51M
    if (word.size() >= MAXWORDLEN)
481
2.69k
      return false;
482
1.51M
  }
483
1.67M
  int captype = NOCAP;
484
1.67M
  size_t abbv = 0;
485
1.67M
  size_t wl = 0;
486
487
1.67M
  std::string scw;
488
1.67M
  std::vector<w_char> sunicw;
489
490
  // input conversion
491
1.67M
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
492
1.67M
  {
493
1.67M
    std::string wspace;
494
495
1.67M
    bool convstatus = rl ? rl->conv(word, wspace) : false;
496
1.67M
    if (convstatus)
497
104k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
498
1.57M
    else
499
1.57M
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
500
1.67M
  }
501
502
1.67M
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
503
1.67M
    if (wl > 32768)
504
59
      return false;
505
1.67M
#endif
506
507
#ifdef MOZILLA_CLIENT
508
  // accept the abbreviated words without dots
509
  // workaround for the incomplete tokenization of Mozilla
510
  abbv = 1;
511
#endif
512
513
1.67M
  if (wl == 0 || m_HMgrs.empty())
514
45.2k
    return true;
515
1.63M
  if (root)
516
0
    root->clear();
517
518
  // allow numbers with dots, dashes and commas (but forbid double separators:
519
  // "..", "--" etc.)
520
1.63M
  enum { NBEGIN, NNUM, NSEP };
521
1.63M
  int nstate = NBEGIN;
522
1.63M
  size_t i;
523
524
1.67M
  for (i = 0; (i < wl); i++) {
525
1.66M
    if ((scw[i] <= '9') && (scw[i] >= '0')) {
526
39.5k
      nstate = NNUM;
527
1.63M
    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
528
212k
      if ((nstate == NSEP) || (i == 0))
529
205k
        break;
530
7.09k
      nstate = NSEP;
531
7.09k
    } else
532
1.41M
      break;
533
1.66M
  }
534
1.63M
  if ((i == wl) && (nstate == NNUM))
535
6.87k
    return true;
536
537
1.62M
  switch (captype) {
538
155k
    case HUHCAP:
539
    /* FALLTHROUGH */
540
180k
    case HUHINITCAP:
541
180k
      *info |= SPELL_ORIGCAP;
542
    /* FALLTHROUGH */
543
1.42M
    case NOCAP:
544
1.42M
      rv = checkword(scw, info, root);
545
1.42M
      if ((abbv) && !(rv)) {
546
25.6k
        std::string u8buffer(scw);
547
25.6k
        u8buffer.push_back('.');
548
25.6k
        rv = checkword(u8buffer, info, root);
549
25.6k
      }
550
1.42M
      break;
551
107k
    case ALLCAP: {
552
107k
      *info |= SPELL_ORIGCAP;
553
107k
      rv = checkword(scw, info, root);
554
107k
      if (rv)
555
5.08k
        break;
556
102k
      if (abbv) {
557
11.6k
        std::string u8buffer(scw);
558
11.6k
        u8buffer.push_back('.');
559
11.6k
        rv = checkword(u8buffer, info, root);
560
11.6k
        if (rv)
561
286
          break;
562
11.6k
      }
563
      // Spec. prefix handling for Catalan, French, Italian:
564
      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
565
102k
      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
566
102k
      if (apos != std::string::npos) {
567
8.79k
        mkallsmall2(scw, sunicw);
568
        //conversion may result in string with different len to pre-mkallsmall2
569
        //so re-scan
570
8.79k
        if (apos != std::string::npos && apos < scw.size() - 1) {
571
8.54k
          std::string part1 = scw.substr(0, apos + 1), part2 = scw.substr(apos + 1);
572
8.54k
          if (utf8) {
573
3.61k
            std::vector<w_char> part1u, part2u;
574
3.61k
            u8_u16(part1u, part1);
575
3.61k
            u8_u16(part2u, part2);
576
3.61k
            mkinitcap2(part2, part2u);
577
3.61k
            scw = part1 + part2;
578
3.61k
            sunicw = part1u;
579
3.61k
            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
580
3.61k
            rv = checkword(scw, info, root);
581
3.61k
            if (rv)
582
761
              break;
583
4.92k
          } else {
584
4.92k
            mkinitcap2(part2, sunicw);
585
4.92k
            scw = part1 + part2;
586
4.92k
            rv = checkword(scw, info, root);
587
4.92k
            if (rv)
588
245
              break;
589
4.92k
          }
590
7.53k
          mkinitcap2(scw, sunicw);
591
7.53k
          rv = checkword(scw, info, root);
592
7.53k
          if (rv)
593
450
            break;
594
7.53k
        }
595
8.79k
      }
596
100k
      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
597
598
18.2k
        mkallsmall2(scw, sunicw);
599
18.2k
        std::string u8buffer(scw);
600
18.2k
        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
601
18.2k
        if (!rv) {
602
17.8k
          mkinitcap2(scw, sunicw);
603
17.8k
          rv = spellsharps(scw, 0, 0, 0, info, root);
604
17.8k
        }
605
18.2k
        if ((abbv) && !(rv)) {
606
1.19k
          u8buffer.push_back('.');
607
1.19k
          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
608
1.19k
          if (!rv) {
609
1.15k
            u8buffer = std::string(scw);
610
1.15k
            u8buffer.push_back('.');
611
1.15k
            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
612
1.15k
          }
613
1.19k
        }
614
18.2k
        if (rv)
615
838
          break;
616
18.2k
      }
617
100k
    }
618
      /* FALLTHROUGH */
619
194k
    case INITCAP: {
620
      // handle special capitalization of dotted I
621
194k
      bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
622
194k
      *info |= SPELL_ORIGCAP;
623
194k
      if (captype == ALLCAP) {
624
100k
          mkallsmall2(scw, sunicw);
625
100k
          mkinitcap2(scw, sunicw);
626
100k
          if (Idot)
627
827
             scw.replace(0, 1, "\xc4\xb0");
628
100k
      }
629
194k
      if (captype == INITCAP)
630
94.1k
        *info |= SPELL_INITCAP;
631
194k
      rv = checkword(scw, info, root);
632
194k
      if (captype == INITCAP)
633
94.1k
        *info &= ~SPELL_INITCAP;
634
      // forbid bad capitalization
635
      // (for example, ijs -> Ijs instead of IJs in Dutch)
636
      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
637
194k
      if (*info & SPELL_FORBIDDEN) {
638
699
        rv = NULL;
639
699
        break;
640
699
      }
641
193k
      if (rv && is_keepcase(rv) && (captype == ALLCAP))
642
956
        rv = NULL;
643
193k
      if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
644
8.02k
        break;
645
646
185k
      mkallsmall2(scw, sunicw);
647
185k
      std::string u8buffer(scw);
648
185k
      mkinitcap2(scw, sunicw);
649
650
185k
      rv = checkword(u8buffer, info, root);
651
185k
      if (abbv && !rv) {
652
12.9k
        u8buffer.push_back('.');
653
12.9k
        rv = checkword(u8buffer, info, root);
654
12.9k
        if (!rv) {
655
12.6k
          u8buffer = scw;
656
12.6k
          u8buffer.push_back('.');
657
12.6k
          if (captype == INITCAP)
658
1.66k
            *info |= SPELL_INITCAP;
659
12.6k
          rv = checkword(u8buffer, info, root);
660
12.6k
          if (captype == INITCAP)
661
1.66k
            *info &= ~SPELL_INITCAP;
662
12.6k
          if (rv && is_keepcase(rv) && (captype == ALLCAP))
663
22
            rv = NULL;
664
12.6k
          break;
665
12.6k
        }
666
12.9k
      }
667
172k
      if (rv && is_keepcase(rv) &&
668
2.21k
          ((captype == ALLCAP) ||
669
           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
670
           // in INITCAP form, too.
671
1.01k
           !(pAMgr->get_checksharps() &&
672
698
             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
673
698
              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
674
2.21k
        rv = NULL;
675
172k
      break;
676
185k
    }
677
1.62M
  }
678
679
1.62M
  if (rv) {
680
143k
    if (pAMgr && pAMgr->get_warn() && rv->astr &&
681
14.8k
        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
682
10.2k
      *info |= SPELL_WARN;
683
10.2k
      if (pAMgr->get_forbidwarn())
684
56
        return false;
685
10.1k
      return true;
686
10.2k
    }
687
133k
    return true;
688
143k
  }
689
690
  // recursive breaking at break points
691
1.48M
  if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
692
693
1.30M
    int nbr = 0;
694
1.30M
    wl = scw.size();
695
696
    // calculate break points for recursion limit
697
3.91M
    for (auto& j : wordbreak) {
698
3.91M
      size_t pos = 0;
699
5.19M
      while ((pos = scw.find(j, pos)) != std::string::npos) {
700
1.28M
        ++nbr;
701
1.28M
        pos += j.size();
702
1.28M
      }
703
3.91M
    }
704
1.30M
    if (nbr >= 10)
705
494
      return false;
706
707
    // check boundary patterns (^begin and end$)
708
3.90M
    for (auto& j : wordbreak) {
709
3.90M
      size_t plen = j.size();
710
3.90M
      if (plen == 1 || plen > wl)
711
1.70M
        continue;
712
713
2.19M
      if (j[0] == '^' &&
714
1.09M
          scw.compare(0, plen - 1, j, 1, plen -1) == 0 && spell(scw.substr(plen - 1), candidate_stack))
715
4.49k
      {
716
4.49k
        if (info)
717
4.49k
          *info |= SPELL_COMPOUND;
718
4.49k
        return true;
719
4.49k
      }
720
721
2.19M
      if (j[plen - 1] == '$' &&
722
1.09M
          scw.compare(wl - plen + 1, plen - 1, j, 0, plen - 1) == 0) {
723
392k
        std::string suffix(scw.substr(wl - plen + 1));
724
392k
        scw.resize(wl - plen + 1);
725
392k
        if (spell(scw, candidate_stack))
726
33.3k
        {
727
33.3k
          if (info)
728
33.3k
            *info |= SPELL_COMPOUND;
729
33.3k
          return true;
730
33.3k
        }
731
358k
        scw.append(suffix);
732
358k
      }
733
2.19M
    }
734
735
    // other patterns
736
3.78M
    for (auto& j : wordbreak) {
737
3.78M
      size_t plen = j.size();
738
3.78M
      size_t found = scw.find(j);
739
3.78M
      if ((found > 0) && (found < wl - plen)) {
740
390k
        size_t found2 = scw.find(j, found + 1);
741
        // try to break at the second occurance
742
        // to recognize dictionary words with wordbreak
743
390k
        if (found2 > 0 && (found2 < wl - plen))
744
118k
            found = found2;
745
390k
        std::string substring(scw.substr(found + plen));
746
390k
        if (!spell(substring, candidate_stack))
747
319k
          continue;
748
70.4k
        std::string suffix(scw.substr(found));
749
70.4k
        scw.resize(found);
750
        // examine 2 sides of the break point
751
70.4k
        if (spell(scw, candidate_stack))
752
4.74k
        {
753
4.74k
          if (info)
754
4.74k
            *info |= SPELL_COMPOUND;
755
4.74k
          return true;
756
4.74k
        }
757
65.7k
        scw.append(suffix);
758
759
        // LANG_hu: spec. dash rule
760
65.7k
        if (langnum == LANG_hu && j == "-") {
761
40.1k
          suffix = scw.substr(found + 1);
762
40.1k
          scw.resize(found + 1);
763
40.1k
          if (spell(scw, candidate_stack))
764
2.69k
          {
765
2.69k
            if (info)
766
2.69k
              *info |= SPELL_COMPOUND;
767
2.69k
            return true;  // check the first part with dash
768
2.69k
          }
769
37.4k
          scw.append(suffix);
770
37.4k
        }
771
        // end of LANG specific region
772
65.7k
      }
773
3.78M
    }
774
775
    // other patterns (break at first break point)
776
3.77M
    for (auto& j : wordbreak) {
777
3.77M
      size_t plen = j.size(), found = scw.find(j);
778
3.77M
      if ((found > 0) && (found < wl - plen)) {
779
381k
        if (!spell(scw.substr(found + plen), candidate_stack))
780
317k
          continue;
781
64.6k
        std::string suffix(scw.substr(found));
782
64.6k
        scw.resize(found);
783
        // examine 2 sides of the break point
784
64.6k
        if (spell(scw, candidate_stack))
785
994
        {
786
994
          if (info)
787
994
            *info |= SPELL_COMPOUND;
788
994
          return true;
789
994
        }
790
63.6k
        scw.append(suffix);
791
792
        // LANG_hu: spec. dash rule
793
63.6k
        if (langnum == LANG_hu && j == "-") {
794
39.3k
          suffix = scw.substr(found + 1);
795
39.3k
          scw.resize(found + 1);
796
39.3k
          if (spell(scw, candidate_stack))
797
913
          {
798
913
            if (info)
799
913
              *info |= SPELL_COMPOUND;
800
913
            return true;  // check the first part with dash
801
913
          }
802
38.4k
          scw.append(suffix);
803
38.4k
        }
804
        // end of LANG specific region
805
63.6k
      }
806
3.77M
    }
807
1.25M
  }
808
809
1.43M
  return false;
810
1.48M
}
811
812
3.46M
struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
813
3.46M
  std::string word;
814
815
  // remove IGNORE characters from the string
816
3.46M
  clean_ignore(word, w);
817
818
3.46M
  if (word.empty())
819
282
    return NULL;
820
821
  // word reversing wrapper for complex prefixes
822
3.46M
  if (complexprefixes) {
823
1.89M
    if (utf8)
824
241k
      reverseword_utf(word);
825
1.64M
    else
826
1.64M
      reverseword(word);
827
1.89M
  }
828
829
3.46M
  int len = word.size();
830
831
  // look word in hash table
832
3.46M
  struct hentry* he = NULL;
833
6.90M
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
834
3.46M
    he = m_HMgrs[i]->lookup(word.c_str(), word.size());
835
836
    // check forbidden and onlyincompound words
837
3.46M
    if ((he) && (he->astr) && (pAMgr) &&
838
37.3k
        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
839
27.9k
      if (info)
840
27.9k
        *info |= SPELL_FORBIDDEN;
841
      // LANG_hu section: set dash information for suggestions
842
27.9k
      if (langnum == LANG_hu) {
843
27.5k
        if (pAMgr->get_compoundflag() &&
844
21.6k
            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
845
16.2k
          if (info)
846
16.2k
            *info |= SPELL_COMPOUND;
847
16.2k
        }
848
27.5k
      }
849
27.9k
      return NULL;
850
27.9k
    }
851
852
    // he = next not needaffix, onlyincompound homonym or onlyupcase word
853
3.43M
    while (he && (he->astr) && pAMgr &&
854
10.0k
           ((pAMgr->get_needaffix() &&
855
2.58k
             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
856
9.00k
            (pAMgr->get_onlyincompound() &&
857
2.68k
             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
858
8.77k
            (info && (*info & SPELL_INITCAP) &&
859
599
             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
860
1.65k
      he = he->next_homonym;
861
3.43M
  }
862
863
  // check with affixes
864
3.43M
  if (!he && pAMgr) {
865
    // try stripping off affixes
866
3.35M
    he = pAMgr->affix_check(word, 0, len, 0);
867
868
    // check compound restriction and onlyupcase
869
3.35M
    if (he && he->astr &&
870
60.9k
        ((pAMgr->get_onlyincompound() &&
871
2.85k
          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
872
60.6k
         (info && (*info & SPELL_INITCAP) &&
873
2.40k
          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
874
734
      he = NULL;
875
734
    }
876
877
3.35M
    if (he) {
878
60.7k
      if ((he->astr) && (pAMgr) &&
879
60.1k
          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
880
1.24k
        if (info)
881
1.24k
          *info |= SPELL_FORBIDDEN;
882
1.24k
        return NULL;
883
1.24k
      }
884
59.5k
      if (root) {
885
0
        root->assign(he->word);
886
0
        if (complexprefixes) {
887
0
          if (utf8)
888
0
            reverseword_utf(*root);
889
0
          else
890
0
            reverseword(*root);
891
0
        }
892
0
      }
893
      // try check compound word
894
3.29M
    } else if (pAMgr->get_compound()) {
895
351k
      struct hentry* rwords[100] = {};  // buffer for COMPOUND pattern checking
896
897
      // first allow only 2 words in the compound
898
351k
      int setinfo = SPELL_COMPOUND_2;
899
351k
      if (info)
900
351k
        setinfo |= *info;
901
351k
      he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, &setinfo);
902
351k
      if (info)
903
351k
        *info = setinfo & ~SPELL_COMPOUND_2;
904
      // if not 2-word compoud word, try with 3 or more words
905
      // (only if original info didn't forbid it)
906
351k
      if (!he && info && !(*info & SPELL_COMPOUND_2)) {
907
350k
        *info &= ~SPELL_COMPOUND_2;
908
350k
        he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
909
        // accept the compound with 3 or more words only if it is
910
        // - not a dictionary word with a typo and
911
        // - not two words written separately,
912
        // - or if it's an arbitrary number accepted by compound rules (e.g. 999%)
913
350k
        if (he && !isdigit(word[0]))
914
22.0k
        {
915
22.0k
          std::vector<std::string> slst;
916
22.0k
          if (pSMgr->suggest(slst, word, NULL, /*test_simplesug=*/true))
917
21.1k
            he = NULL;
918
22.0k
        }
919
350k
      }
920
921
      // LANG_hu section: `moving rule' with last dash
922
351k
      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
923
38.4k
        std::string dup(word, 0, len - 1);
924
38.4k
        he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
925
38.4k
      }
926
      // end of LANG specific region
927
351k
      if (he) {
928
4.31k
        if (root) {
929
0
          root->assign(he->word);
930
0
          if (complexprefixes) {
931
0
            if (utf8)
932
0
              reverseword_utf(*root);
933
0
            else
934
0
              reverseword(*root);
935
0
          }
936
0
        }
937
4.31k
        if (info)
938
4.31k
          *info |= SPELL_COMPOUND;
939
4.31k
      }
940
351k
    }
941
3.35M
  }
942
943
3.43M
  return he;
944
3.43M
}
945
946
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
947
176k
#define MAX_CANDIDATE_STACK_DEPTH 512
948
#else
949
#define MAX_CANDIDATE_STACK_DEPTH 2048
950
#endif
951
952
88.1k
std::vector<std::string> HunspellImpl::suggest(const std::string& word, std::vector<std::string>& suggest_candidate_stack) {
953
954
88.1k
  if (suggest_candidate_stack.size() > MAX_CANDIDATE_STACK_DEPTH || // apply a fairly arbitrary depth limit
955
      // something very broken if suggest ends up calling itself with the same word
956
88.1k
      std::find(suggest_candidate_stack.begin(), suggest_candidate_stack.end(), word) != suggest_candidate_stack.end()) {
957
81
    return { };
958
81
  }
959
960
88.0k
  bool capwords;
961
88.0k
  size_t abbv;
962
88.0k
  int captype;
963
88.0k
  std::vector<std::string> spell_candidate_stack;
964
88.0k
  suggest_candidate_stack.push_back(word);
965
88.0k
  std::vector<std::string> slst = suggest_internal(word, spell_candidate_stack, suggest_candidate_stack,
966
88.0k
                                       capwords, abbv, captype);
967
88.0k
  suggest_candidate_stack.pop_back();
968
  // word reversing wrapper for complex prefixes
969
88.0k
  if (complexprefixes) {
970
256k
    for (auto& j : slst) {
971
256k
      if (utf8)
972
182k
        reverseword_utf(j);
973
74.5k
      else
974
74.5k
        reverseword(j);
975
256k
    }
976
21.0k
  }
977
978
  // capitalize
979
88.0k
  if (capwords)
980
20.4k
    for (auto& j : slst) {
981
20.4k
      mkinitcap(j);
982
20.4k
    }
983
984
  // expand suggestions with dot(s)
985
88.0k
  if (abbv && pAMgr && pAMgr->get_sugswithdots() && word.size() >= abbv) {
986
149k
    for (auto& j : slst) {
987
149k
      j.append(word.substr(word.size() - abbv));
988
149k
    }
989
5.82k
  }
990
991
  // remove bad capitalized and forbidden forms
992
88.0k
  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
993
86.9k
    switch (captype) {
994
5.80k
      case INITCAP:
995
26.1k
      case ALLCAP: {
996
26.1k
        size_t l = 0;
997
150k
        for (size_t j = 0; j < slst.size(); ++j) {
998
123k
          if (slst[j].find(' ') == std::string::npos && !spell(slst[j], spell_candidate_stack)) {
999
21.3k
            std::string s;
1000
21.3k
            std::vector<w_char> w;
1001
21.3k
            if (utf8) {
1002
15.2k
              u8_u16(w, slst[j]);
1003
15.2k
            } else {
1004
6.12k
              s = slst[j];
1005
6.12k
            }
1006
21.3k
            mkallsmall2(s, w);
1007
21.3k
            if (spell(s, spell_candidate_stack)) {
1008
566
              slst[l] = s;
1009
566
              ++l;
1010
20.8k
            } else {
1011
20.8k
              mkinitcap2(s, w);
1012
20.8k
              if (spell(s, spell_candidate_stack)) {
1013
445
                slst[l] = s;
1014
445
                ++l;
1015
445
              }
1016
20.8k
            }
1017
102k
          } else {
1018
102k
            slst[l] = slst[j];
1019
102k
            ++l;
1020
102k
          }
1021
123k
        }
1022
26.1k
        slst.resize(l);
1023
26.1k
      }
1024
86.9k
    }
1025
86.9k
  }
1026
1027
  // remove duplications
1028
88.0k
  size_t l = 0;
1029
1.53M
  for (size_t j = 0; j < slst.size(); ++j) {
1030
1.44M
    slst[l] = slst[j];
1031
155M
    for (size_t k = 0; k < l; ++k) {
1032
154M
      if (slst[k] == slst[j]) {
1033
6.84k
        --l;
1034
6.84k
        break;
1035
6.84k
      }
1036
154M
    }
1037
1.44M
    ++l;
1038
1.44M
  }
1039
88.0k
  slst.resize(l);
1040
1041
  // output conversion
1042
88.0k
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1043
88.0k
  if (rl) {
1044
7.12k
    for (size_t i = 0; rl && i < slst.size(); ++i) {
1045
5.31k
      std::string wspace;
1046
5.31k
      if (rl->conv(slst[i], wspace)) {
1047
688
        slst[i] = wspace;
1048
688
      }
1049
5.31k
    }
1050
1.80k
  }
1051
88.0k
  return slst;
1052
88.0k
}
1053
1054
12.4k
std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
1055
12.4k
  std::vector<std::string> suggest_candidate_stack;
1056
12.4k
  return suggest(word, suggest_candidate_stack);
1057
12.4k
}
1058
1059
std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
1060
        std::vector<std::string>& spell_candidate_stack,
1061
        std::vector<std::string>& suggest_candidate_stack,
1062
88.0k
        bool& capwords, size_t& abbv, int& captype) {
1063
88.0k
  captype = NOCAP;
1064
88.0k
  abbv = 0;
1065
88.0k
  capwords = false;
1066
1067
88.0k
  std::vector<std::string> slst;
1068
1069
88.0k
  int onlycmpdsug = 0;
1070
88.0k
  if (!pSMgr || m_HMgrs.empty())
1071
0
    return slst;
1072
1073
  // process XML input of the simplified API (see manual)
1074
88.0k
  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
1075
293
    return spellml(word);
1076
293
  }
1077
87.7k
  if (utf8) {
1078
49.9k
    if (word.size() >= MAXWORDUTF8LEN)
1079
311
      return slst;
1080
49.9k
  } else {
1081
37.8k
    if (word.size() >= MAXWORDLEN)
1082
261
      return slst;
1083
37.8k
  }
1084
87.2k
  size_t wl = 0;
1085
1086
87.2k
  std::string scw;
1087
87.2k
  std::vector<w_char> sunicw;
1088
1089
  // input conversion
1090
87.2k
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1091
87.2k
  {
1092
87.2k
    std::string wspace;
1093
1094
87.2k
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1095
87.2k
    if (convstatus)
1096
73.0k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1097
14.1k
    else
1098
14.1k
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1099
1100
87.2k
    if (wl == 0)
1101
0
      return slst;
1102
1103
87.2k
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
1104
87.2k
    if (wl > 32768)
1105
10
      return slst;
1106
87.2k
#endif
1107
87.2k
  }
1108
1109
87.2k
  bool good = false;
1110
1111
87.2k
  clock_t timelimit;
1112
  // initialize in every suggestion call
1113
87.2k
  timelimit = clock();
1114
1115
  // check capitalized form for FORCEUCASE
1116
87.2k
  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
1117
355
    int info = SPELL_ORIGCAP;
1118
355
    if (checkword(scw, &info, NULL)) {
1119
2
      std::string form(scw);
1120
2
      mkinitcap(form);
1121
2
      slst.push_back(form);
1122
2
      return slst;
1123
2
    }
1124
355
  }
1125
1126
87.1k
  switch (captype) {
1127
39.4k
    case NOCAP: {
1128
39.4k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1129
39.4k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1130
12
          return slst;
1131
39.3k
      if (abbv) {
1132
10.9k
        std::string wspace(scw);
1133
10.9k
        wspace.push_back('.');
1134
10.9k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1135
10.9k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1136
6
            return slst;
1137
10.9k
      }
1138
39.3k
      break;
1139
39.3k
    }
1140
1141
39.3k
    case INITCAP: {
1142
5.80k
      capwords = true;
1143
5.80k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1144
5.80k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1145
0
          return slst;
1146
5.80k
      std::string wspace(scw);
1147
5.80k
      mkallsmall2(wspace, sunicw);
1148
5.80k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1149
5.80k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1150
1
          return slst;
1151
5.80k
      break;
1152
5.80k
    }
1153
5.80k
    case HUHINITCAP:
1154
4.75k
      capwords = true;
1155
      /* FALLTHROUGH */
1156
21.6k
    case HUHCAP: {
1157
21.6k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1158
21.6k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1159
36
          return slst;
1160
      // something.The -> something. The
1161
21.6k
      size_t dot_pos = scw.find('.');
1162
21.6k
      if (dot_pos != std::string::npos) {
1163
9.84k
        std::string postdot = scw.substr(dot_pos + 1);
1164
9.84k
        int captype_;
1165
9.84k
        if (utf8) {
1166
2.05k
          std::vector<w_char> postdotu;
1167
2.05k
          u8_u16(postdotu, postdot);
1168
2.05k
          captype_ = get_captype_utf8(postdotu, langnum);
1169
7.79k
        } else {
1170
7.79k
          captype_ = get_captype(postdot, csconv);
1171
7.79k
        }
1172
9.84k
        if (captype_ == INITCAP) {
1173
74
          std::string str(scw);
1174
74
          str.insert(dot_pos + 1, 1, ' ');
1175
74
          insert_sug(slst, str);
1176
74
        }
1177
9.84k
      }
1178
1179
21.6k
      std::string wspace;
1180
1181
21.6k
      if (captype == HUHINITCAP) {
1182
        // TheOpenOffice.org -> The OpenOffice.org
1183
4.74k
        wspace = scw;
1184
4.74k
        mkinitsmall2(wspace, sunicw);
1185
4.74k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1186
4.74k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1187
16
            return slst;
1188
4.74k
      }
1189
21.6k
      wspace = scw;
1190
21.6k
      mkallsmall2(wspace, sunicw);
1191
21.6k
      if (spell(wspace, spell_candidate_stack))
1192
271
        insert_sug(slst, wspace);
1193
21.6k
      size_t prevns = slst.size();
1194
21.6k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1195
21.6k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1196
58
          return slst;
1197
21.5k
      if (captype == HUHINITCAP) {
1198
4.71k
        mkinitcap2(wspace, sunicw);
1199
4.71k
        if (spell(wspace, spell_candidate_stack))
1200
83
          insert_sug(slst, wspace);
1201
4.71k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1202
4.71k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1203
3
            return slst;
1204
4.71k
      }
1205
      // aNew -> "a New" (instead of "a new")
1206
31.3k
      for (size_t j = prevns; j < slst.size(); ++j) {
1207
9.81k
        const char* space = strchr(slst[j].c_str(), ' ');
1208
9.81k
        if (space) {
1209
2.85k
          size_t slen = strlen(space + 1);
1210
          // different case after space (need capitalisation)
1211
2.85k
          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
1212
2.05k
            std::string first(slst[j].c_str(), space + 1);
1213
2.05k
            std::string second(space + 1);
1214
2.05k
            std::vector<w_char> w;
1215
2.05k
            if (utf8)
1216
1.08k
              u8_u16(w, second);
1217
2.05k
            mkinitcap2(second, w);
1218
            // set as first suggestion
1219
2.05k
            slst.erase(slst.begin() + j);
1220
2.05k
            slst.insert(slst.begin(), first + second);
1221
2.05k
          }
1222
2.85k
        }
1223
9.81k
      }
1224
21.5k
      break;
1225
21.5k
    }
1226
1227
20.3k
    case ALLCAP: {
1228
20.3k
      std::string wspace(scw);
1229
20.3k
      mkallsmall2(wspace, sunicw);
1230
20.3k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1231
20.3k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1232
5
          return slst;
1233
20.2k
      if (pAMgr && pAMgr->get_keepcase() && spell(wspace, spell_candidate_stack))
1234
643
        insert_sug(slst, wspace);
1235
20.2k
      mkinitcap2(wspace, sunicw);
1236
20.2k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1237
20.2k
      if (clock() > timelimit + TIMELIMIT_GLOBAL)
1238
21
          return slst;
1239
20.2k
      for (auto& j : slst) {
1240
12.1k
        mkallcap(j);
1241
12.1k
        if (pAMgr && pAMgr->get_checksharps()) {
1242
3.71k
          if (utf8) {
1243
3.10k
            mystrrep(j, "\xC3\x9F", "SS");
1244
3.10k
          } else {
1245
610
            mystrrep(j, "\xDF", "SS");
1246
610
          }
1247
3.71k
        }
1248
12.1k
      }
1249
20.2k
      break;
1250
20.2k
    }
1251
87.1k
  }
1252
1253
  // LANG_hu section: replace '-' with ' ' in Hungarian
1254
87.0k
  if (langnum == LANG_hu) {
1255
11.6k
    for (auto& j : slst) {
1256
11.6k
      size_t pos = j.find('-');
1257
11.6k
      if (pos != std::string::npos) {
1258
3.94k
        int info;
1259
3.94k
        std::string w(j.substr(0, pos));
1260
3.94k
        w.append(j.substr(pos + 1));
1261
3.94k
        (void)spell(w, spell_candidate_stack, &info, NULL);
1262
3.94k
        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1263
14
          j[pos] = ' ';
1264
14
        } else
1265
3.93k
          j[pos] = '-';
1266
3.94k
      }
1267
11.6k
    }
1268
10.6k
  }
1269
  // END OF LANG_hu section
1270
  // try ngram approach since found nothing good suggestion
1271
87.0k
  if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1272
85.1k
    switch (captype) {
1273
39.1k
      case NOCAP: {
1274
39.1k
        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
1275
39.1k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1276
2
            return slst;
1277
39.1k
        break;
1278
39.1k
      }
1279
      /* FALLTHROUGH */
1280
39.1k
      case HUHINITCAP:
1281
4.40k
        capwords = true;
1282
      /* FALLTHROUGH */
1283
20.1k
      case HUHCAP: {
1284
20.1k
        std::string wspace(scw);
1285
20.1k
        mkallsmall2(wspace, sunicw);
1286
20.1k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
1287
20.1k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1288
2
            return slst;
1289
20.1k
        break;
1290
20.1k
      }
1291
20.1k
      case INITCAP: {
1292
5.75k
        capwords = true;
1293
5.75k
        std::string wspace(scw);
1294
5.75k
        mkallsmall2(wspace, sunicw);
1295
5.75k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
1296
5.75k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1297
0
            return slst;
1298
5.75k
        break;
1299
5.75k
      }
1300
20.0k
      case ALLCAP: {
1301
20.0k
        std::string wspace(scw);
1302
20.0k
        mkallsmall2(wspace, sunicw);
1303
20.0k
        size_t oldns = slst.size();
1304
20.0k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
1305
20.0k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1306
0
            return slst;
1307
31.7k
        for (size_t j = oldns; j < slst.size(); ++j) {
1308
11.6k
          mkallcap(slst[j]);
1309
11.6k
        }
1310
20.0k
        break;
1311
20.0k
      }
1312
85.1k
    }
1313
85.1k
  }
1314
1315
  // try dash suggestion (Afo-American -> Afro-American)
1316
  // Note: LibreOffice was modified to treat dashes as word
1317
  // characters to check "scot-free" etc. word forms, but
1318
  // we need to handle suggestions for "Afo-American", etc.,
1319
  // while "Afro-American" is missing from the dictionary.
1320
  // TODO avoid possible overgeneration
1321
87.0k
  size_t dash_pos = scw.find('-');
1322
87.0k
  if (dash_pos != std::string::npos) {
1323
77.3k
    int nodashsug = 1;
1324
140k
    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1325
63.0k
      if (slst[j].find('-') != std::string::npos)
1326
1.27k
        nodashsug = 0;
1327
63.0k
    }
1328
1329
77.3k
    size_t prev_pos = 0;
1330
77.3k
    bool last = false;
1331
1332
179k
    while (!good && nodashsug && !last) {
1333
116k
      if (dash_pos == scw.size())
1334
22.7k
        last = 1;
1335
116k
      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1336
116k
      if (chunk != word && !spell(chunk, spell_candidate_stack)) {
1337
75.7k
        std::vector<std::string> nlst = suggest(chunk, suggest_candidate_stack);
1338
75.7k
        if (clock() > timelimit + TIMELIMIT_GLOBAL)
1339
14.2k
            return slst;
1340
1.44M
        for (auto j = nlst.rbegin(); j != nlst.rend(); ++j) {
1341
1.38M
          std::string wspace = scw.substr(0, prev_pos);
1342
1.38M
          wspace.append(*j);
1343
1.38M
          if (!last) {
1344
1.24M
            wspace.append("-");
1345
1.24M
            wspace.append(scw.substr(dash_pos + 1));
1346
1.24M
          }
1347
1.38M
          int info = 0;
1348
1.38M
          if (pAMgr && pAMgr->get_forbiddenword())
1349
1.37M
            checkword(wspace, &info, NULL);
1350
1.38M
          if (!(info & SPELL_FORBIDDEN))
1351
1.38M
            insert_sug(slst, wspace);
1352
1.38M
        }
1353
61.4k
        nodashsug = 0;
1354
61.4k
      }
1355
101k
      if (!last) {
1356
85.9k
        prev_pos = dash_pos + 1;
1357
85.9k
        dash_pos = scw.find('-', prev_pos);
1358
85.9k
      }
1359
101k
      if (dash_pos == std::string::npos)
1360
47.8k
        dash_pos = scw.size();
1361
101k
    }
1362
77.3k
  }
1363
72.8k
  return slst;
1364
87.0k
}
1365
1366
0
const std::string& HunspellImpl::get_dict_encoding() const {
1367
0
  return encoding;
1368
0
}
1369
1370
0
std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1371
0
  std::vector<std::string> slst;
1372
1373
0
  std::string result2;
1374
0
  if (desc.empty())
1375
0
    return slst;
1376
0
  for (const auto& i : desc) {
1377
0
    std::string result;
1378
1379
    // add compound word parts (except the last one)
1380
0
    const char* s = i.c_str();
1381
0
    const char* part = strstr(s, MORPH_PART);
1382
0
    if (part) {
1383
0
      const char* nextpart = strstr(part + 1, MORPH_PART);
1384
0
      while (nextpart) {
1385
0
        std::string field;
1386
0
        copy_field(field, part, MORPH_PART);
1387
0
        result.append(field);
1388
0
        part = nextpart;
1389
0
        nextpart = strstr(part + 1, MORPH_PART);
1390
0
      }
1391
0
      s = part;
1392
0
    }
1393
1394
0
    std::string tok(s);
1395
0
    size_t alt = 0;
1396
0
    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1397
0
      tok[alt + 1] = MSEP_ALT;
1398
0
    }
1399
0
    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1400
0
    for (auto& k : pl) {
1401
      // add derivational suffixes
1402
0
      if (k.find(MORPH_DERI_SFX) != std::string::npos) {
1403
        // remove inflectional suffixes
1404
0
        const size_t is = k.find(MORPH_INFL_SFX);
1405
0
        if (is != std::string::npos)
1406
0
          k.resize(is);
1407
0
        std::vector<std::string> singlepl;
1408
0
        singlepl.push_back(k);
1409
0
        std::string sg = pSMgr->suggest_gen(singlepl, k);
1410
0
        if (!sg.empty()) {
1411
0
          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1412
0
          for (auto& j : gen) {
1413
0
            result2.push_back(MSEP_REC);
1414
0
            result2.append(result);
1415
0
            result2.append(j);
1416
0
          }
1417
0
        }
1418
0
      } else {
1419
0
        result2.push_back(MSEP_REC);
1420
0
        result2.append(result);
1421
0
        if (k.find(MORPH_SURF_PFX) != std::string::npos) {
1422
0
          std::string field;
1423
0
          copy_field(field, k, MORPH_SURF_PFX);
1424
0
          result2.append(field);
1425
0
        }
1426
0
        std::string field;
1427
0
        copy_field(field, k, MORPH_STEM);
1428
0
        result2.append(field);
1429
0
      }
1430
0
    }
1431
0
  }
1432
0
  slst = line_tok(result2, MSEP_REC);
1433
0
  uniqlist(slst);
1434
0
  return slst;
1435
0
}
1436
1437
0
std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1438
0
  return stem(analyze(word));
1439
0
}
1440
1441
0
const std::string& HunspellImpl::get_wordchars_cpp() const {
1442
0
  return pAMgr->get_wordchars();
1443
0
}
1444
1445
0
const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1446
0
  return pAMgr->get_wordchars_utf16();
1447
0
}
1448
1449
20.4k
void HunspellImpl::mkinitcap(std::string& u8) {
1450
20.4k
  if (utf8) {
1451
6.53k
    std::vector<w_char> u16;
1452
6.53k
    u8_u16(u16, u8);
1453
6.53k
    ::mkinitcap_utf(u16, langnum);
1454
6.53k
    u16_u8(u8, u16);
1455
13.9k
  } else {
1456
13.9k
    ::mkinitcap(u8, csconv);
1457
13.9k
  }
1458
20.4k
}
1459
1460
367k
int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1461
367k
  if (utf8) {
1462
95.4k
    ::mkinitcap_utf(u16, langnum);
1463
95.4k
    u16_u8(u8, u16);
1464
271k
  } else {
1465
271k
    ::mkinitcap(u8, csconv);
1466
271k
  }
1467
367k
  return u8.size();
1468
367k
}
1469
1470
4.74k
int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1471
4.74k
  if (utf8) {
1472
1.90k
    ::mkinitsmall_utf(u16, langnum);
1473
1.90k
    u16_u8(u8, u16);
1474
2.84k
  } else {
1475
2.84k
    ::mkinitsmall(u8, csconv);
1476
2.84k
  }
1477
4.74k
  return u8.size();
1478
4.74k
}
1479
1480
0
int HunspellImpl::add(const std::string& word) {
1481
0
  if (!m_HMgrs.empty())
1482
0
    return m_HMgrs[0]->add(word);
1483
0
  return 0;
1484
0
}
1485
1486
0
int HunspellImpl::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
1487
0
  if (!m_HMgrs.empty())
1488
0
    return m_HMgrs[0]->add_with_flags(word, flags, desc);
1489
0
  return 0;
1490
0
}
1491
1492
0
int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1493
0
  if (!m_HMgrs.empty())
1494
0
    return m_HMgrs[0]->add_with_affix(word, example);
1495
0
  return 0;
1496
0
}
1497
1498
0
int HunspellImpl::remove(const std::string& word) {
1499
0
  if (!m_HMgrs.empty())
1500
0
    return m_HMgrs[0]->remove(word);
1501
0
  return 0;
1502
0
}
1503
1504
0
const std::string& HunspellImpl::get_version_cpp() const {
1505
0
  return pAMgr->get_version();
1506
0
}
1507
1508
0
const struct cs_info* HunspellImpl::get_csconv() const {
1509
0
  return csconv;
1510
0
}
1511
1512
0
void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1513
0
  if (!st.empty()) {
1514
0
    if (!result.empty())
1515
0
      result.append("\n");
1516
0
    result.append(st);
1517
0
  }
1518
0
}
1519
1520
0
std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1521
0
  std::vector<std::string> slst = analyze_internal(word);
1522
  // output conversion
1523
0
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1524
0
  if (rl) {
1525
0
    for (size_t i = 0; rl && i < slst.size(); ++i) {
1526
0
      std::string wspace;
1527
0
      if (rl->conv(slst[i], wspace)) {
1528
0
        slst[i] = wspace;
1529
0
      }
1530
0
    }
1531
0
  }
1532
0
  return slst;
1533
0
}
1534
1535
0
std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
1536
0
  std::vector<std::string> candidate_stack, slst;
1537
0
  if (!pSMgr || m_HMgrs.empty())
1538
0
    return slst;
1539
0
  if (utf8) {
1540
0
    if (word.size() >= MAXWORDUTF8LEN)
1541
0
      return slst;
1542
0
  } else {
1543
0
    if (word.size() >= MAXWORDLEN)
1544
0
      return slst;
1545
0
  }
1546
0
  int captype = NOCAP;
1547
0
  size_t abbv = 0;
1548
0
  size_t wl = 0;
1549
1550
0
  std::string scw;
1551
0
  std::vector<w_char> sunicw;
1552
1553
  // input conversion
1554
0
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1555
0
  {
1556
0
    std::string wspace;
1557
1558
0
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1559
0
    if (convstatus)
1560
0
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1561
0
    else
1562
0
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1563
0
  }
1564
1565
0
  if (wl == 0) {
1566
0
    if (abbv) {
1567
0
      scw.clear();
1568
0
      for (wl = 0; wl < abbv; wl++)
1569
0
        scw.push_back('.');
1570
0
      abbv = 0;
1571
0
    } else
1572
0
      return slst;
1573
0
  }
1574
1575
0
  std::string result;
1576
1577
0
  size_t n = 0;
1578
  // test numbers
1579
  // LANG_hu section: set dash information for suggestions
1580
0
  if (langnum == LANG_hu) {
1581
0
    size_t n2 = 0;
1582
0
    size_t n3 = 0;
1583
1584
0
    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1585
0
                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1586
0
      n++;
1587
0
      if ((scw[n] == '.') || (scw[n] == ',')) {
1588
0
        if (((n2 == 0) && (n > 3)) ||
1589
0
            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1590
0
          break;
1591
0
        n2++;
1592
0
        n3 = n;
1593
0
      }
1594
0
    }
1595
1596
0
    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1597
0
      return slst;
1598
0
    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1599
0
                      checkword(scw.substr(n), NULL, NULL))) {
1600
0
      result.append(scw);
1601
0
      result.resize(n - 1);
1602
0
      if (n == wl)
1603
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1604
0
      else {
1605
0
        std::string chunk = scw.substr(n - 1, 1);
1606
0
        cat_result(result, pSMgr->suggest_morph(chunk));
1607
0
        result.push_back('+');  // XXX SPEC. MORPHCODE
1608
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1609
0
      }
1610
0
      return line_tok(result, MSEP_REC);
1611
0
    }
1612
0
  }
1613
  // END OF LANG_hu section
1614
1615
0
  switch (captype) {
1616
0
    case HUHCAP:
1617
0
    case HUHINITCAP:
1618
0
    case NOCAP: {
1619
0
      cat_result(result, pSMgr->suggest_morph(scw));
1620
0
      if (abbv) {
1621
0
        std::string u8buffer(scw);
1622
0
        u8buffer.push_back('.');
1623
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1624
0
      }
1625
0
      break;
1626
0
    }
1627
0
    case INITCAP: {
1628
0
      mkallsmall2(scw, sunicw);
1629
0
      std::string u8buffer(scw);
1630
0
      mkinitcap2(scw, sunicw);
1631
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1632
0
      cat_result(result, pSMgr->suggest_morph(scw));
1633
0
      if (abbv) {
1634
0
        u8buffer.push_back('.');
1635
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1636
1637
0
        u8buffer = scw;
1638
0
        u8buffer.push_back('.');
1639
1640
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1641
0
      }
1642
0
      break;
1643
0
    }
1644
0
    case ALLCAP: {
1645
0
      cat_result(result, pSMgr->suggest_morph(scw));
1646
0
      if (abbv) {
1647
0
        std::string u8buffer(scw);
1648
0
        u8buffer.push_back('.');
1649
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1650
0
      }
1651
0
      mkallsmall2(scw, sunicw);
1652
0
      std::string u8buffer(scw);
1653
0
      mkinitcap2(scw, sunicw);
1654
1655
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1656
0
      cat_result(result, pSMgr->suggest_morph(scw));
1657
0
      if (abbv) {
1658
0
        u8buffer.push_back('.');
1659
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1660
1661
0
        u8buffer = scw;
1662
0
        u8buffer.push_back('.');
1663
1664
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1665
0
      }
1666
0
      break;
1667
0
    }
1668
0
  }
1669
1670
0
  if (!result.empty()) {
1671
    // word reversing wrapper for complex prefixes
1672
0
    if (complexprefixes) {
1673
0
      if (utf8)
1674
0
        reverseword_utf(result);
1675
0
      else
1676
0
        reverseword(result);
1677
0
    }
1678
0
    return line_tok(result, MSEP_REC);
1679
0
  }
1680
1681
  // compound word with dash (HU) I18n
1682
  // LANG_hu section: set dash information for suggestions
1683
1684
0
  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1685
0
  if (dash_pos != std::string::npos) {
1686
0
    int nresult = 0;
1687
1688
0
    std::string part1 = scw.substr(0, dash_pos), part2 = scw.substr(dash_pos + 1);
1689
1690
    // examine 2 sides of the dash
1691
0
    if (part2.empty()) {  // base word ending with dash
1692
0
      if (spell(part1, candidate_stack)) {
1693
0
        std::string p = pSMgr->suggest_morph(part1);
1694
0
        if (!p.empty()) {
1695
0
          slst = line_tok(p, MSEP_REC);
1696
0
          return slst;
1697
0
        }
1698
0
      }
1699
0
    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1700
0
      if (spell(part1, candidate_stack) && (spell("-e", candidate_stack))) {
1701
0
        std::string st = pSMgr->suggest_morph(part1);
1702
0
        if (!st.empty()) {
1703
0
          result.append(st);
1704
0
        }
1705
0
        result.push_back('+');  // XXX spec. separator in MORPHCODE
1706
0
        st = pSMgr->suggest_morph("-e");
1707
0
        if (!st.empty()) {
1708
0
          result.append(st);
1709
0
        }
1710
0
        return line_tok(result, MSEP_REC);
1711
0
      }
1712
0
    } else {
1713
      // first word ending with dash: word- XXX ???
1714
0
      part1.push_back(' ');
1715
0
      nresult = spell(part1, candidate_stack);
1716
0
      part1.erase(part1.size() - 1);
1717
0
      if (nresult && spell(part2, candidate_stack) &&
1718
0
          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1719
0
        std::string st = pSMgr->suggest_morph(part1);
1720
0
        if (!st.empty()) {
1721
0
          result.append(st);
1722
0
          result.push_back('+');  // XXX spec. separator in MORPHCODE
1723
0
        }
1724
0
        st = pSMgr->suggest_morph(part2);
1725
0
        if (!st.empty()) {
1726
0
          result.append(st);
1727
0
        }
1728
0
        return line_tok(result, MSEP_REC);
1729
0
      }
1730
0
    }
1731
    // affixed number in correct word
1732
0
    if (nresult && (dash_pos > 0) &&
1733
0
        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1734
0
         (scw[dash_pos - 1] == '.'))) {
1735
0
      n = 1;
1736
0
      if (scw[dash_pos - n] == '.')
1737
0
        n++;
1738
      // search first not a number character to left from dash
1739
0
      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1740
0
             (n < 6)) {
1741
0
        n++;
1742
0
      }
1743
0
      if (dash_pos < n)
1744
0
        n--;
1745
      // numbers: valami1000000-hoz
1746
      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1747
      // 56-hoz, 6-hoz
1748
0
      for (; n >= 1; n--) {
1749
0
        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1750
0
            continue;
1751
0
        }
1752
0
        std::string chunk = scw.substr(dash_pos - n);
1753
0
        if (checkword(chunk, NULL, NULL)) {
1754
0
          result.append(chunk);
1755
0
          std::string st = pSMgr->suggest_morph(chunk);
1756
0
          if (!st.empty()) {
1757
0
            result.append(st);
1758
0
          }
1759
0
          return line_tok(result, MSEP_REC);
1760
0
        }
1761
0
      }
1762
0
    }
1763
0
  }
1764
0
  return slst;
1765
0
}
1766
1767
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1768
0
  std::vector<std::string> slst;
1769
0
  if (!pSMgr || pl.empty())
1770
0
    return slst;
1771
0
  std::vector<std::string> pl2 = analyze(word);
1772
0
  int captype = NOCAP, abbv = 0;
1773
0
  std::string cw;
1774
0
  cleanword(cw, word, &captype, &abbv);
1775
0
  std::string result;
1776
1777
0
  for (const auto& i : pl) {
1778
0
    cat_result(result, pSMgr->suggest_gen(pl2, i));
1779
0
  }
1780
1781
0
  if (!result.empty()) {
1782
    // allcap
1783
0
    if (captype == ALLCAP)
1784
0
      mkallcap(result);
1785
1786
    // line split
1787
0
    slst = line_tok(result, MSEP_REC);
1788
1789
    // capitalize
1790
0
    if (captype == INITCAP || captype == HUHINITCAP) {
1791
0
      for (auto& str : slst) {
1792
0
        mkinitcap(str);
1793
0
      }
1794
0
    }
1795
1796
    // temporary filtering of prefix related errors (eg.
1797
    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1798
0
    auto it = slst.begin();
1799
0
    while (it != slst.end()) {
1800
0
      std::vector<std::string> candidate_stack;
1801
0
      if (!spell(*it, candidate_stack)) {
1802
0
        it = slst.erase(it);
1803
0
      } else  {
1804
0
        ++it;
1805
0
      }
1806
0
    }
1807
0
  }
1808
0
  return slst;
1809
0
}
1810
1811
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1812
0
  std::vector<std::string> pl = analyze(pattern);
1813
0
  std::vector<std::string> slst = generate(word, pl);
1814
0
  uniqlist(slst);
1815
0
  return slst;
1816
0
}
1817
1818
// minimal XML parser functions
1819
92
std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
1820
92
  std::string dest;
1821
92
  if (pos == std::string::npos)
1822
36
    return dest;
1823
56
  const char* par = in_par.c_str() + pos;
1824
56
  char end = *par;
1825
56
  if (end == '>')
1826
24
    end = '<';
1827
32
  else if (end != '\'' && end != '"')
1828
12
    return dest;  // bad XML
1829
420
  for (par++; *par != '\0' && *par != end; ++par) {
1830
376
    dest.push_back(*par);
1831
376
  }
1832
44
  mystrrep(dest, "&lt;", "<");
1833
44
  mystrrep(dest, "&amp;", "&");
1834
44
  return dest;
1835
56
}
1836
1837
0
int HunspellImpl::get_langnum() const {
1838
0
  return langnum;
1839
0
}
1840
1841
0
bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1842
0
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1843
0
  if (rl) {
1844
0
    return rl->conv(word, dest);
1845
0
  }
1846
0
  dest.assign(word);
1847
0
  return false;
1848
0
}
1849
1850
// return the beginning of the element (attr == NULL) or the attribute
1851
92
std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
1852
92
  if (pos == std::string::npos)
1853
0
    return std::string::npos;
1854
1855
92
  std::string::size_type endpos = s.find('>', pos);
1856
92
  if (attr == NULL)
1857
0
    return endpos;
1858
124
  while (true) {
1859
124
    pos = s.find(attr, pos);
1860
124
    if (pos == std::string::npos || pos >= endpos)
1861
36
      return std::string::npos;
1862
88
    if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
1863
56
      break;
1864
32
    pos += strlen(attr);
1865
32
  }
1866
56
  return pos + strlen(attr);
1867
92
}
1868
1869
int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
1870
                                const char* attr,
1871
92
                                const char* value) {
1872
92
  const std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
1873
92
  return cw == value ? 1 : 0;
1874
92
}
1875
1876
0
std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
1877
0
  std::vector<std::string> slst;
1878
0
  if (pos == std::string::npos)
1879
0
    return slst;
1880
0
  while (true) {
1881
0
    pos = list.find(tag, pos);
1882
0
    if (pos == std::string::npos)
1883
0
        break;
1884
0
    std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
1885
0
    if (cw.empty()) {
1886
0
      break;
1887
0
    }
1888
0
    slst.push_back(cw);
1889
0
    ++pos;
1890
0
  }
1891
0
  return slst;
1892
0
}
1893
1894
293
std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1895
293
  std::vector<std::string> slst;
1896
1897
293
  std::string::size_type qpos = in_word.find("<query");
1898
293
  if (qpos == std::string::npos)
1899
230
    return slst;  // bad XML input
1900
1901
63
  std::string::size_type q2pos = in_word.find('>', qpos);
1902
63
  if (q2pos == std::string::npos)
1903
34
    return slst;  // bad XML input
1904
1905
29
  q2pos = in_word.find("<word", q2pos);
1906
29
  if (q2pos == std::string::npos)
1907
6
    return slst;  // bad XML input
1908
1909
23
  if (check_xml_par(in_word, qpos, "type=", "analyze")) {
1910
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1911
0
    if (!cw.empty())
1912
0
      slst = analyze(cw);
1913
0
    if (slst.empty())
1914
0
      return slst;
1915
    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1916
0
    std::string r;
1917
0
    r.append("<code>");
1918
0
    for (auto entry : slst) {
1919
0
      r.append("<a>");
1920
1921
0
      mystrrep(entry, "\t", " ");
1922
0
      mystrrep(entry, "&", "&amp;");
1923
0
      mystrrep(entry, "<", "&lt;");
1924
0
      r.append(entry);
1925
1926
0
      r.append("</a>");
1927
0
    }
1928
0
    r.append("</code>");
1929
0
    slst.clear();
1930
0
    slst.push_back(r);
1931
0
    return slst;
1932
23
  } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
1933
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1934
0
    if (!cw.empty())
1935
0
      return stem(cw);
1936
23
  } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
1937
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1938
0
    if (cw.empty())
1939
0
      return slst;
1940
0
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1941
0
    if (q3pos != std::string::npos) {
1942
0
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1943
0
      if (!cw2.empty()) {
1944
0
        return generate(cw, cw2);
1945
0
      }
1946
0
    } else {
1947
0
      q2pos = in_word.find("<code", q2pos + 1);
1948
0
      if (q2pos != std::string::npos) {
1949
0
        std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
1950
0
        if (!slst2.empty()) {
1951
0
          slst = generate(cw, slst2);
1952
0
          uniqlist(slst);
1953
0
          return slst;
1954
0
        }
1955
0
      }
1956
0
    }
1957
23
  } else if (check_xml_par(in_word, qpos, "type=", "add")) {
1958
0
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1959
0
    if (cw.empty())
1960
0
      return slst;
1961
0
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1962
0
    if (q3pos != std::string::npos) {
1963
0
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1964
0
      if (!cw2.empty()) {
1965
0
        add_with_affix(cw, cw2);
1966
0
      } else {
1967
0
        add(cw);
1968
0
      }
1969
0
    } else {
1970
0
        add(cw);
1971
0
    }
1972
0
  }
1973
23
  return slst;
1974
23
}
1975
1976
0
std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1977
0
  std::vector<std::string> slst;
1978
0
  struct hentry* he = NULL;
1979
0
  int len;
1980
0
  std::string w2;
1981
0
  const char* word;
1982
0
  const char* ignoredchars = pAMgr->get_ignore();
1983
0
  if (ignoredchars != NULL) {
1984
0
    w2.assign(root_word);
1985
0
    if (utf8) {
1986
0
      const std::vector<w_char>& ignoredchars_utf16 =
1987
0
          pAMgr->get_ignore_utf16();
1988
0
      remove_ignored_chars_utf(w2, ignoredchars_utf16);
1989
0
    } else {
1990
0
      remove_ignored_chars(w2, ignoredchars);
1991
0
    }
1992
0
    word = w2.c_str();
1993
0
    len = (int)w2.size();
1994
0
  } else {
1995
0
    word = root_word.c_str();
1996
0
    len = (int)root_word.size();
1997
0
  }
1998
1999
0
  if (!len)
2000
0
    return slst;
2001
2002
0
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2003
0
    he = m_HMgrs[i]->lookup(word, len);
2004
0
  }
2005
0
  if (he) {
2006
0
    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word);
2007
0
  }
2008
0
  return slst;
2009
0
}
2010
2011
namespace {
2012
  // using malloc because this is for the c-api where the callers
2013
  // expect to be able to use free
2014
0
  char* stringdup(const std::string& s) {
2015
0
    size_t sl = s.size() + 1;
2016
0
    char* d = (char*)malloc(sl);
2017
0
    if (d)
2018
0
      memcpy(d, s.c_str(), sl);
2019
0
    return d;
2020
0
  }
2021
2022
0
  int munge_vector(char*** slst, const std::vector<std::string>& items) {
2023
0
    if (items.empty()) {
2024
0
      *slst = NULL;
2025
0
      return 0;
2026
0
    } else {
2027
0
      *slst = new char*[items.size()];
2028
0
      for (size_t i = 0; i < items.size(); ++i)
2029
0
        (*slst)[i] = stringdup(items[i]);
2030
0
    }
2031
0
    return items.size();
2032
0
  }
2033
}
2034
2035
0
int HunspellImpl::spell(const char* word, int* info, char** root) {
2036
0
  std::string sroot;
2037
0
  std::vector<std::string> candidate_stack;
2038
0
  bool ret = spell(word, candidate_stack, info, root ? &sroot : NULL);
2039
0
  if (root) {
2040
0
    if (sroot.empty()) {
2041
0
      *root = NULL;
2042
0
    } else {
2043
0
      *root = stringdup(sroot);
2044
0
    }
2045
0
  }
2046
0
  return ret;
2047
0
}
2048
2049
0
int HunspellImpl::suggest(char*** slst, const char* word) {
2050
0
  std::vector<std::string> suggests = suggest(word);
2051
0
  return munge_vector(slst, suggests);
2052
0
}
2053
2054
0
int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
2055
0
  std::vector<std::string> stems = suffix_suggest(root_word);
2056
0
  return munge_vector(slst, stems);
2057
0
}
2058
2059
0
void HunspellImpl::free_list(char*** slst, int n) {
2060
0
  if (slst && *slst) {
2061
0
    for (int i = 0; i < n; i++)
2062
0
      free((*slst)[i]);
2063
0
    delete[] *slst;
2064
0
    *slst = NULL;
2065
0
  }
2066
0
}
2067
2068
0
char* HunspellImpl::get_dic_encoding() {
2069
0
  return &encoding[0];
2070
0
}
2071
2072
0
int HunspellImpl::analyze(char*** slst, const char* word) {
2073
0
  std::vector<std::string> stems = analyze(word);
2074
0
  return munge_vector(slst, stems);
2075
0
}
2076
2077
0
int HunspellImpl::stem(char*** slst, const char* word) {
2078
0
  std::vector<std::string> stems = stem(word);
2079
0
  return munge_vector(slst, stems);
2080
0
}
2081
2082
0
int HunspellImpl::stem(char*** slst, char** desc, int n) {
2083
0
  std::vector<std::string> morph;
2084
0
  morph.reserve(n);
2085
0
  for (int i = 0; i < n; ++i) morph.emplace_back(desc[i]);
2086
2087
0
  std::vector<std::string> stems = stem(morph);
2088
0
  return munge_vector(slst, stems);
2089
0
}
2090
2091
0
int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
2092
0
  std::vector<std::string> stems = generate(word, pattern);
2093
0
  return munge_vector(slst, stems);
2094
0
}
2095
2096
0
int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
2097
0
  std::vector<std::string> morph;
2098
0
  morph.reserve(pln);
2099
0
  for (int i = 0; i < pln; ++i) morph.emplace_back(pl[i]);
2100
2101
0
  std::vector<std::string> stems = generate(word, morph);
2102
0
  return munge_vector(slst, stems);
2103
0
}
2104
2105
0
const char* HunspellImpl::get_wordchars() const {
2106
0
  return get_wordchars_cpp().c_str();
2107
0
}
2108
2109
0
const char* HunspellImpl::get_version() const {
2110
0
  return get_version_cpp().c_str();
2111
0
}
2112
2113
0
int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
2114
0
  std::string d;
2115
0
  bool ret = input_conv(word, d);
2116
0
  if (ret && d.size() < destsize) {
2117
0
    strncpy(dest, d.c_str(), destsize);
2118
0
    return 1;
2119
0
  }
2120
0
  return 0;
2121
0
}
2122
2123
Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
2124
14.2k
  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
2125
14.2k
}
2126
2127
14.2k
Hunspell::~Hunspell() {
2128
14.2k
  delete m_Impl;
2129
14.2k
}
2130
2131
// load extra dictionaries
2132
0
int Hunspell::add_dic(const char* dpath, const char* key) {
2133
0
  return m_Impl->add_dic(dpath, key);
2134
0
}
2135
2136
14.2k
bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
2137
14.2k
  std::vector<std::string> candidate_stack;
2138
14.2k
  return m_Impl->spell(word, candidate_stack, info, root);
2139
14.2k
}
2140
2141
12.4k
std::vector<std::string> Hunspell::suggest(const std::string& word) {
2142
12.4k
  return m_Impl->suggest(word);
2143
12.4k
}
2144
2145
0
std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
2146
0
  return m_Impl->suffix_suggest(root_word);
2147
0
}
2148
2149
0
const std::string& Hunspell::get_dict_encoding() const {
2150
0
  return m_Impl->get_dict_encoding();
2151
0
}
2152
2153
0
std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
2154
0
  return m_Impl->stem(desc);
2155
0
}
2156
2157
0
std::vector<std::string> Hunspell::stem(const std::string& word) {
2158
0
  return m_Impl->stem(word);
2159
0
}
2160
2161
0
const std::string& Hunspell::get_wordchars_cpp() const {
2162
0
  return m_Impl->get_wordchars_cpp();
2163
0
}
2164
2165
0
const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
2166
0
  return m_Impl->get_wordchars_utf16();
2167
0
}
2168
2169
0
int Hunspell::add(const std::string& word) {
2170
0
  return m_Impl->add(word);
2171
0
}
2172
2173
0
int Hunspell::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
2174
0
  return m_Impl->add_with_flags(word, flags, desc);
2175
0
}
2176
2177
0
int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
2178
0
  return m_Impl->add_with_affix(word, example);
2179
0
}
2180
2181
0
int Hunspell::remove(const std::string& word) {
2182
0
  return m_Impl->remove(word);
2183
0
}
2184
2185
0
const std::string& Hunspell::get_version_cpp() const {
2186
0
  return m_Impl->get_version_cpp();
2187
0
}
2188
2189
0
const struct cs_info* Hunspell::get_csconv() const {
2190
0
  return m_Impl->get_csconv();
2191
0
}
2192
2193
0
std::vector<std::string> Hunspell::analyze(const std::string& word) {
2194
0
  return m_Impl->analyze(word);
2195
0
}
2196
2197
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
2198
0
  return m_Impl->generate(word, pl);
2199
0
}
2200
2201
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
2202
0
  return m_Impl->generate(word, pattern);
2203
0
}
2204
2205
0
int Hunspell::get_langnum() const {
2206
0
  return m_Impl->get_langnum();
2207
0
}
2208
2209
0
bool Hunspell::input_conv(const std::string& word, std::string& dest) {
2210
0
  return m_Impl->input_conv(word, dest);
2211
0
}
2212
2213
0
int Hunspell::spell(const char* word, int* info, char** root) {
2214
0
  return m_Impl->spell(word, info, root);
2215
0
}
2216
2217
0
int Hunspell::suggest(char*** slst, const char* word) {
2218
0
  return m_Impl->suggest(slst, word);
2219
0
}
2220
2221
0
int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
2222
0
  return m_Impl->suffix_suggest(slst, root_word);
2223
0
}
2224
2225
0
void Hunspell::free_list(char*** slst, int n) {
2226
0
  m_Impl->free_list(slst, n);
2227
0
}
2228
2229
0
char* Hunspell::get_dic_encoding() {
2230
0
  return m_Impl->get_dic_encoding();
2231
0
}
2232
2233
0
int Hunspell::analyze(char*** slst, const char* word) {
2234
0
  return m_Impl->analyze(slst, word);
2235
0
}
2236
2237
0
int Hunspell::stem(char*** slst, const char* word) {
2238
0
  return m_Impl->stem(slst, word);
2239
0
}
2240
2241
0
int Hunspell::stem(char*** slst, char** desc, int n) {
2242
0
  return m_Impl->stem(slst, desc, n);
2243
0
}
2244
2245
0
int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
2246
0
  return m_Impl->generate(slst, word, pattern);
2247
0
}
2248
2249
0
int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
2250
0
  return m_Impl->generate(slst, word, pl, pln);
2251
0
}
2252
2253
0
const char* Hunspell::get_wordchars() const {
2254
0
  return m_Impl->get_wordchars();
2255
0
}
2256
2257
0
const char* Hunspell::get_version() const {
2258
0
  return m_Impl->get_version();
2259
0
}
2260
2261
0
int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
2262
0
  return m_Impl->input_conv(word, dest, destsize);
2263
0
}
2264
2265
0
Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
2266
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
2267
0
}
2268
2269
Hunhandle* Hunspell_create_key(const char* affpath,
2270
                               const char* dpath,
2271
0
                               const char* key) {
2272
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
2273
0
}
2274
2275
0
void Hunspell_destroy(Hunhandle* pHunspell) {
2276
0
  delete reinterpret_cast<HunspellImpl*>(pHunspell);
2277
0
}
2278
2279
0
int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
2280
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
2281
0
}
2282
2283
0
int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
2284
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
2285
0
}
2286
2287
0
char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
2288
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
2289
0
}
2290
2291
0
int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
2292
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
2293
0
}
2294
2295
0
int Hunspell_suffix_suggest(Hunhandle* pHunspell, char*** slst, const char* root_word) {
2296
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suffix_suggest(slst, root_word);
2297
0
}
2298
2299
0
int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
2300
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
2301
0
}
2302
2303
0
int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
2304
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
2305
0
}
2306
2307
0
int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
2308
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
2309
0
}
2310
2311
int Hunspell_generate(Hunhandle* pHunspell,
2312
                      char*** slst,
2313
                      const char* word,
2314
                      const char* pattern)
2315
0
{
2316
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
2317
0
}
2318
2319
int Hunspell_generate2(Hunhandle* pHunspell,
2320
                       char*** slst,
2321
                       const char* word,
2322
                       char** desc,
2323
                       int n)
2324
0
{
2325
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
2326
0
}
2327
2328
/* functions for run-time modification of the dictionary */
2329
2330
/* add word to the run-time dictionary */
2331
2332
0
int Hunspell_add(Hunhandle* pHunspell, const char* word) {
2333
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
2334
0
}
2335
2336
0
int Hunspell_add_with_flags(Hunhandle* pHunspell, const char* word, const char* flags, const char* desc) {
2337
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_flags(word, flags, desc);
2338
0
}
2339
2340
/* add word to the run-time dictionary with affix flags of
2341
 * the example (a dictionary word): Hunspell will recognize
2342
 * affixed forms of the new word, too.
2343
 */
2344
2345
int Hunspell_add_with_affix(Hunhandle* pHunspell,
2346
                            const char* word,
2347
0
                            const char* example) {
2348
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
2349
0
}
2350
2351
/* remove word from the run-time dictionary */
2352
2353
0
int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
2354
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
2355
0
}
2356
2357
0
void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
2358
0
  reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
2359
0
}