Coverage Report

Created: 2026-06-02 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hunspell/src/hunspell/hunspell.cxx
Line
Count
Source
1
/* ***** BEGIN LICENSE BLOCK *****
2
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3
 *
4
 * Copyright (C) 2002-2022 Németh László
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version
7
 * 1.1 (the "License"); you may not use this file except in compliance with
8
 * the License. You may obtain a copy of the License at
9
 * http://www.mozilla.org/MPL/
10
 *
11
 * Software distributed under the License is distributed on an "AS IS" basis,
12
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
 * for the specific language governing rights and limitations under the
14
 * License.
15
 *
16
 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17
 *
18
 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19
 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20
 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21
 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22
 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23
 *
24
 * Alternatively, the contents of this file may be used under the terms of
25
 * either the GNU General Public License Version 2 or later (the "GPL"), or
26
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
 * in which case the provisions of the GPL or the LGPL are applicable instead
28
 * of those above. If you wish to allow use of your version of this file only
29
 * under the terms of either the GPL or the LGPL, and not to allow others to
30
 * use your version of this file under the terms of the MPL, indicate your
31
 * decision by deleting the provisions above and replace them with the notice
32
 * and other provisions required by the GPL or the LGPL. If you do not delete
33
 * the provisions above, a recipient may use your version of this file under
34
 * the terms of any one of the MPL, the GPL or the LGPL.
35
 *
36
 * ***** END LICENSE BLOCK ***** */
37
/*
38
 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39
 * And Contributors.  All rights reserved.
40
 *
41
 * Redistribution and use in source and binary forms, with or without
42
 * modification, are permitted provided that the following conditions
43
 * are met:
44
 *
45
 * 1. Redistributions of source code must retain the above copyright
46
 *    notice, this list of conditions and the following disclaimer.
47
 *
48
 * 2. Redistributions in binary form must reproduce the above copyright
49
 *    notice, this list of conditions and the following disclaimer in the
50
 *    documentation and/or other materials provided with the distribution.
51
 *
52
 * 3. All modifications to the source code must be clearly marked as
53
 *    such.  Binary redistributions based on modified source code
54
 *    must be clearly marked as modified versions in the documentation
55
 *    and/or other materials provided with the distribution.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68
 * SUCH DAMAGE.
69
 */
70
71
#include <cstdlib>
72
#include <cstring>
73
#include <cstdio>
74
#include <ctime>
75
76
#include "affixmgr.hxx"
77
#include "hunspell.hxx"
78
#include "suggestmgr.hxx"
79
#include "hunspell.h"
80
#include "csutil.hxx"
81
82
#include <limits>
83
#include <memory>
84
#include <string>
85
86
4.17M
#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
87
23.9k
#define MAXSPELLMLLEN 8192
88
89
class HunspellImpl
90
{
91
public:
92
 HunspellImpl(const char* affpath, const char* dpath, const char* key = nullptr);
93
 HunspellImpl(const HunspellImpl&) = delete;
94
 HunspellImpl& operator=(const HunspellImpl&) = delete;
95
 ~HunspellImpl();
96
 int add_dic(const char* dpath, const char* key = nullptr);
97
 std::vector<std::string> suffix_suggest(const std::string& root_word);
98
 std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
99
 std::vector<std::string> generate(const std::string& word, const std::string& pattern);
100
 std::vector<std::string> stem(const std::string& word);
101
 std::vector<std::string> stem(const std::vector<std::string>& morph);
102
 std::vector<std::string> analyze(const std::string& word);
103
 int get_langnum() const;
104
 bool input_conv(const std::string& word, std::string& dest);
105
 bool spell(const std::string& word,
106
            std::vector<std::string>& candidate_stack,
107
            int* info = nullptr,
108
            std::string* root = nullptr,
109
            std::chrono::steady_clock::time_point suggest_start = std::chrono::steady_clock::time_point::max());
110
 std::vector<std::string> suggest(const std::string& word);
111
 std::vector<std::string> suggest(const std::string& word,
112
                                  std::vector<std::string>& suggest_candidate_stack,
113
                                  std::chrono::steady_clock::time_point suggest_start);
114
 const std::string& get_wordchars_cpp() const;
115
 const std::vector<w_char>& get_wordchars_utf16() const;
116
 const std::string& get_dict_encoding() const;
117
 int add(const std::string& word);
118
 int add_with_flags(const std::string& word, const std::string& flags, const std::string& desc = "");
119
 int add_with_affix(const std::string& word, const std::string& example);
120
 int remove(const std::string& word);
121
 const std::string& get_version_cpp() const;
122
 struct cs_info* get_csconv();
123
124
 int spell(const char* word, int* info = nullptr, char** root = nullptr);
125
 int suggest(char*** slst, const char* word);
126
 int suffix_suggest(char*** slst, const char* root_word);
127
 void free_list(char*** slst, int n);
128
 char* get_dic_encoding();
129
 int analyze(char*** slst, const char* word);
130
 int stem(char*** slst, const char* word);
131
 int stem(char*** slst, char** morph, int n);
132
 int generate(char*** slst, const char* word, const char* word2);
133
 int generate(char*** slst, const char* word, char** desc, int n);
134
 const char* get_wordchars() const;
135
 const char* get_version() const;
136
 int input_conv(const char* word, char* dest, size_t destsize);
137
138
private:
139
  std::vector<std::unique_ptr<HashMgr>> m_HMgrs;
140
  std::unique_ptr<AffixMgr> pAMgr; // pAMgr depends on m_HMgrs
141
  std::unique_ptr<SuggestMgr> pSMgr; // pSMgr depends on pAMgr
142
  std::string affixpath;
143
  std::string encoding;
144
  const struct cs_info* csconv;
145
  int langnum;
146
  int utf8;
147
  int complexprefixes;
148
  std::vector<std::string> wordbreak;
149
150
private:
151
  std::vector<std::string> analyze_internal(const std::string& word);
152
  bool spell_internal(const std::string& word,
153
                      std::vector<std::string>& candidate_stack,
154
                      int* info = nullptr,
155
                      std::string* root = nullptr,
156
                      std::chrono::steady_clock::time_point suggest_start = std::chrono::steady_clock::time_point::max());
157
  std::vector<std::string> suggest_internal(const std::string& word,
158
                                            std::vector<std::string>& spell_candidate_stack,
159
                                            std::vector<std::string>& suggest_candidate_stack,
160
                                            bool& capitalized, size_t& abbreviated, int& captype,
161
                                            std::chrono::steady_clock::time_point suggest_start);
162
  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
163
  size_t cleanword2(std::string& dest,
164
                    std::vector<w_char>& dest_u,
165
                    const std::string& src,
166
                    int* pcaptype,
167
                    size_t* pabbrev);
168
  void clean_ignore(std::string& dest, const std::string& src);
169
  void mkinitcap(std::string& u8);
170
  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
171
  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
172
  void mkallcap(std::string& u8);
173
  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
174
  struct hentry* checkword(const std::string& source, int* info, std::string* root,
175
                           std::chrono::steady_clock::time_point suggest_start = std::chrono::steady_clock::time_point::max());
176
  std::string sharps_u8_l1(const std::string& source);
177
  hentry*
178
  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root,
179
              std::chrono::steady_clock::time_point suggest_start = std::chrono::steady_clock::time_point::max());
180
  int is_keepcase(const hentry* rv);
181
  void insert_sug(std::vector<std::string>& slst, const std::string& word);
182
  void cat_result(std::string& result, const std::string& st);
183
  std::vector<std::string> spellml(const std::string& word);
184
  std::string get_xml_par(const std::string& par, std::string::size_type pos);
185
  std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
186
  std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
187
  int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
188
};
189
190
HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key)
191
59.1k
  : affixpath(affpath) {
192
59.1k
  csconv = nullptr;
193
59.1k
  utf8 = 0;
194
59.1k
  complexprefixes = 0;
195
196
  /* first set up the hash manager */
197
59.1k
  m_HMgrs.push_back(std::make_unique<HashMgr>(dpath, affpath, key));
198
199
  /* next set up the affix manager */
200
  /* it needs access to the hash manager lookup methods */
201
59.1k
  pAMgr = std::make_unique<AffixMgr>(affpath, m_HMgrs, key);
202
203
  /* get the preferred try string and the dictionary */
204
  /* encoding from the Affix Manager for that dictionary */
205
59.1k
  std::string try_string = pAMgr->get_try_string();
206
59.1k
  encoding = pAMgr->get_encoding();
207
59.1k
  langnum = pAMgr->get_langnum();
208
59.1k
  utf8 = pAMgr->get_utf8();
209
59.1k
  if (!utf8)
210
46.0k
    csconv = get_current_cs(encoding);
211
59.1k
  complexprefixes = pAMgr->get_complexprefixes();
212
59.1k
  wordbreak = pAMgr->get_breaktable();
213
214
  /* and finally set up the suggestion manager */
215
59.1k
  pSMgr = std::make_unique<SuggestMgr>(try_string, MAXSUGGESTION, pAMgr.get());
216
59.1k
}
217
218
59.0k
HunspellImpl::~HunspellImpl() {
219
#ifdef MOZILLA_CLIENT
220
  delete[] csconv;
221
#endif
222
59.0k
  csconv = nullptr;
223
59.0k
}
224
225
// load extra dictionaries
226
0
int HunspellImpl::add_dic(const char* dpath, const char* key) {
227
0
  m_HMgrs.push_back(std::make_unique<HashMgr>(dpath, affixpath.c_str(), key));
228
0
  return 0;
229
0
}
230
231
232
// make a copy of src at dest while removing all characters
233
// specified in IGNORE rule
234
void HunspellImpl::clean_ignore(std::string& dest,
235
33.1M
                                const std::string& src) {
236
33.1M
  dest.clear();
237
33.1M
  dest.assign(src);
238
33.1M
  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : nullptr;
239
33.1M
  if (ignoredchars != nullptr) {
240
2.52M
    if (utf8) {
241
662k
      const std::vector<w_char>& ignoredchars_utf16 =
242
662k
          pAMgr->get_ignore_utf16();
243
662k
      remove_ignored_chars_utf(dest, ignoredchars_utf16);
244
1.86M
    } else {
245
1.86M
      remove_ignored_chars(dest, ignoredchars);
246
1.86M
    }
247
2.52M
  }
248
33.1M
}
249
250
251
// make a copy of src at destination while removing all leading
252
// blanks and removing any trailing periods after recording
253
// their presence with the abbreviation flag
254
// also since already going through character by character,
255
// set the capitalization type
256
// return the length of the "cleaned" (and UTF-8 encoded) word
257
258
size_t HunspellImpl::cleanword2(std::string& dest,
259
                         std::vector<w_char>& dest_utf,
260
                         const std::string& src,
261
                         int* pcaptype,
262
15.6M
                         size_t* pabbrev) {
263
15.6M
  dest.clear();
264
15.6M
  dest_utf.clear();
265
266
  // remove IGNORE characters from the string
267
15.6M
  std::string w2;
268
15.6M
  clean_ignore(w2, src);
269
270
15.6M
  const char* q = w2.c_str();
271
15.6M
  int nl = (int)w2.size();
272
273
  // first skip over any leading blanks
274
15.8M
  while (*q == ' ') {
275
239k
    ++q;
276
239k
    nl--;
277
239k
  }
278
279
  // now strip off any trailing periods (recording their presence)
280
15.6M
  *pabbrev = 0;
281
282
17.6M
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
283
2.00M
    nl--;
284
2.00M
    (*pabbrev)++;
285
2.00M
  }
286
287
  // if no characters are left it can't be capitalized
288
15.6M
  if (nl <= 0) {
289
616k
    *pcaptype = NOCAP;
290
616k
    return 0;
291
616k
  }
292
293
15.0M
  dest.append(q, nl);
294
15.0M
  nl = dest.size();
295
15.0M
  if (utf8) {
296
3.99M
    u8_u16(dest_utf, dest);
297
3.99M
    *pcaptype = get_captype_utf8(dest_utf, langnum);
298
11.0M
  } else {
299
11.0M
    *pcaptype = get_captype(dest, csconv);
300
11.0M
  }
301
15.0M
  return nl;
302
15.6M
}
303
304
void HunspellImpl::cleanword(std::string& dest,
305
                        const std::string& src,
306
                        int* pcaptype,
307
10.0k
                        int* pabbrev) {
308
10.0k
  dest.clear();
309
10.0k
  const unsigned char* q = (const unsigned char*)src.c_str();
310
10.0k
  int firstcap = 0, nl = (int)src.size();
311
312
  // first skip over any leading blanks
313
15.3k
  while (*q == ' ') {
314
5.33k
    ++q;
315
5.33k
    nl--;
316
5.33k
  }
317
318
  // now strip off any trailing periods (recording their presence)
319
10.0k
  *pabbrev = 0;
320
321
18.5k
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
322
8.49k
    nl--;
323
8.49k
    (*pabbrev)++;
324
8.49k
  }
325
326
  // if no characters are left it can't be capitalized
327
10.0k
  if (nl <= 0) {
328
547
    *pcaptype = NOCAP;
329
547
    return;
330
547
  }
331
332
  // now determine the capitalization type of the first nl letters
333
9.46k
  int ncap = 0;
334
9.46k
  int nneutral = 0;
335
9.46k
  int nc = 0;
336
337
9.46k
  if (!utf8) {
338
492k
    while (nl > 0) {
339
486k
      nc++;
340
486k
      if (csconv[(*q)].ccase)
341
399k
        ncap++;
342
486k
      if (csconv[(*q)].cupper == csconv[(*q)].clower)
343
75.5k
        nneutral++;
344
486k
      dest.push_back(*q++);
345
486k
      nl--;
346
486k
    }
347
    // remember to terminate the destination string
348
5.91k
    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
349
5.91k
  } else {
350
3.54k
    std::vector<w_char> t;
351
3.54k
    u8_u16(t, src);
352
157k
    for (auto& wc : t) {
353
157k
      const auto idx = (unsigned short)wc;
354
157k
      const auto low = unicodetolower(idx, langnum);
355
157k
      if (idx != low)
356
106k
        ncap++;
357
157k
      if (unicodetoupper(idx, langnum) == low)
358
48.9k
        nneutral++;
359
157k
    }
360
3.54k
    u16_u8(dest, t);
361
3.54k
    if (ncap) {
362
1.59k
      const auto idx = (unsigned short)t[0];
363
1.59k
      firstcap = (idx != unicodetolower(idx, langnum));
364
1.59k
    }
365
3.54k
  }
366
367
  // now finally set the captype
368
9.46k
  if (ncap == 0) {
369
5.18k
    *pcaptype = NOCAP;
370
5.18k
  } else if ((ncap == 1) && firstcap) {
371
1.34k
    *pcaptype = INITCAP;
372
2.93k
  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
373
717
    *pcaptype = ALLCAP;
374
2.21k
  } else if ((ncap > 1) && firstcap) {
375
940
    *pcaptype = HUHINITCAP;
376
1.27k
  } else {
377
1.27k
    *pcaptype = HUHCAP;
378
1.27k
  }
379
9.46k
}
380
381
10.3k
void HunspellImpl::mkallcap(std::string& u8) {
382
10.3k
  if (utf8) {
383
3.49k
    std::vector<w_char> u16;
384
3.49k
    u8_u16(u16, u8);
385
3.49k
    ::mkallcap_utf(u16, langnum);
386
3.49k
    u16_u8(u8, u16);
387
6.88k
  } else {
388
6.88k
    ::mkallcap(u8, csconv);
389
6.88k
  }
390
10.3k
}
391
392
1.63M
int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
393
1.63M
  if (utf8) {
394
394k
    ::mkallsmall_utf(u16, langnum);
395
394k
    u16_u8(u8, u16);
396
1.24M
  } else {
397
1.24M
    ::mkallsmall(u8, csconv);
398
1.24M
  }
399
1.63M
  return u8.size();
400
1.63M
}
401
402
// convert UTF-8 sharp S codes to latin 1
403
181k
std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
404
181k
  std::string dest(source);
405
181k
  mystrrep(dest, "\xC3\x9F", "\xDF");
406
181k
  return dest;
407
181k
}
408
409
// recursive search for right ss - sharp s permutations
410
hentry* HunspellImpl::spellsharps(std::string& base,
411
                              size_t n_pos,
412
                              int n,
413
                              int repnum,
414
                              int* info,
415
                              std::string* root,
416
426k
                              std::chrono::steady_clock::time_point suggest_start) {
417
426k
  size_t pos = base.find("ss", n_pos);
418
426k
  if (pos != std::string::npos && (n < MAXSHARPS)) {
419
184k
    base[pos] = '\xC3';
420
184k
    base[pos + 1] = '\x9F';
421
184k
    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root, suggest_start);
422
184k
    if (h)
423
3.29k
      return h;
424
181k
    base[pos] = 's';
425
181k
    base[pos + 1] = 's';
426
181k
    h = spellsharps(base, pos + 2, n + 1, repnum, info, root, suggest_start);
427
181k
    if (h)
428
1.43k
      return h;
429
241k
  } else if (repnum > 0) {
430
183k
    if (utf8)
431
1.97k
      return checkword(base, info, root, suggest_start);
432
181k
    std::string tmp(sharps_u8_l1(base));
433
181k
    return checkword(tmp, info, root, suggest_start);
434
183k
  }
435
238k
  return nullptr;
436
426k
}
437
438
42.4k
int HunspellImpl::is_keepcase(const hentry* rv) {
439
42.4k
  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
440
10.2k
         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
441
42.4k
}
442
443
/* insert a word to the beginning of the suggestion array */
444
201k
void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
445
201k
  slst.insert(slst.begin(), word);
446
201k
}
447
448
bool HunspellImpl::spell(const std::string& word, std::vector<std::string>& candidate_stack,
449
                         int* info, std::string* root,
450
17.8M
                         std::chrono::steady_clock::time_point suggest_start) {
451
  // something very broken if spell ends up calling itself with the same word
452
17.8M
  if (std::find(candidate_stack.begin(), candidate_stack.end(), word) != candidate_stack.end())
453
0
    return false;
454
455
17.8M
  if (candidate_stack.size() >= MAXBREAKDEPTH)
456
2.37M
    return false;
457
458
15.5M
  if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
459
2.13k
    return false;
460
461
15.5M
  candidate_stack.push_back(word);
462
15.5M
  bool r = spell_internal(word, candidate_stack, info, root, suggest_start);
463
15.5M
  candidate_stack.pop_back();
464
465
15.5M
  if (r && root) {
466
    // output conversion
467
0
    RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : nullptr;
468
0
    if (rl) {
469
0
      std::string wspace;
470
0
      if (rl->conv(*root, wspace)) {
471
0
        *root = std::move(wspace);
472
0
      }
473
0
    }
474
0
  }
475
15.5M
  return r;
476
15.5M
}
477
478
bool HunspellImpl::spell_internal(const std::string& word, std::vector<std::string>& candidate_stack,
479
                                  int* info, std::string* root,
480
15.5M
                                  std::chrono::steady_clock::time_point suggest_start) {
481
15.5M
  struct hentry* rv = nullptr;
482
483
15.5M
  int info2 = 0;
484
15.5M
  if (!info)
485
15.5M
    info = &info2;
486
5.86k
  else
487
5.86k
    *info = 0;
488
489
  // Hunspell supports XML input of the simplified API (see manual)
490
15.5M
  if (word == SPELL_XML)
491
70.2k
    return true;
492
15.4M
  if (utf8) {
493
4.07M
    if (word.size() >= MAXWORDUTF8LEN)
494
6.78k
      return false;
495
11.3M
  } else {
496
11.3M
    if (word.size() >= MAXWORDLEN)
497
21.3k
      return false;
498
11.3M
  }
499
15.4M
  int captype = NOCAP;
500
15.4M
  size_t abbv = 0;
501
15.4M
  size_t wl = 0;
502
503
15.4M
  std::string scw;
504
15.4M
  std::vector<w_char> sunicw;
505
506
  // input conversion
507
15.4M
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : nullptr;
508
15.4M
  {
509
15.4M
    std::string wspace;
510
511
15.4M
    bool convstatus = rl ? rl->conv(word, wspace) : false;
512
15.4M
    if (convstatus)
513
59.7k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
514
15.3M
    else
515
15.3M
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
516
15.4M
  }
517
518
15.4M
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
519
15.4M
    if (wl > 32768)
520
296
      return false;
521
15.4M
#endif
522
523
#ifdef MOZILLA_CLIENT
524
  // accept the abbreviated words without dots
525
  // workaround for the incomplete tokenization of Mozilla
526
  abbv = 1;
527
#endif
528
529
15.4M
  if (wl == 0 || m_HMgrs.empty())
530
594k
    return true;
531
14.8M
  if (root)
532
0
    root->clear();
533
534
  // allow numbers with dots, dashes and commas (but forbid double separators:
535
  // "..", "--" etc.)
536
14.8M
  enum { NBEGIN, NNUM, NSEP };
537
14.8M
  int nstate = NBEGIN;
538
14.8M
  size_t i;
539
540
20.4M
  for (i = 0; (i < wl); i++) {
541
19.6M
    if ((scw[i] <= '9') && (scw[i] >= '0')) {
542
5.15M
      nstate = NNUM;
543
14.5M
    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
544
3.71M
      if ((nstate == NSEP) || (i == 0))
545
3.21M
        break;
546
492k
      nstate = NSEP;
547
492k
    } else
548
10.8M
      break;
549
19.6M
  }
550
14.8M
  if ((i == wl) && (nstate == NNUM))
551
624k
    return true;
552
553
14.2M
  switch (captype) {
554
1.11M
    case HUHCAP:
555
    /* FALLTHROUGH */
556
1.62M
    case HUHINITCAP:
557
1.62M
      *info |= SPELL_ORIGCAP;
558
    /* FALLTHROUGH */
559
13.2M
    case NOCAP:
560
13.2M
      rv = checkword(scw, info, root, suggest_start);
561
13.2M
      if ((abbv) && !(rv)) {
562
1.21M
        std::string u8buffer(scw);
563
1.21M
        u8buffer.push_back('.');
564
1.21M
        rv = checkword(u8buffer, info, root, suggest_start);
565
1.21M
      }
566
13.2M
      break;
567
411k
    case ALLCAP: {
568
411k
      *info |= SPELL_ORIGCAP;
569
411k
      rv = checkword(scw, info, root, suggest_start);
570
411k
      if (rv)
571
17.3k
        break;
572
393k
      if (abbv) {
573
18.4k
        std::string u8buffer(scw);
574
18.4k
        u8buffer.push_back('.');
575
18.4k
        rv = checkword(u8buffer, info, root, suggest_start);
576
18.4k
        if (rv)
577
405
          break;
578
18.4k
      }
579
      // Spec. prefix handling for Catalan, French, Italian:
580
      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
581
393k
      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
582
393k
      if (apos != std::string::npos) {
583
119k
        mkallsmall2(scw, sunicw);
584
        //conversion may result in string with different len to pre-mkallsmall2
585
        //so re-scan
586
119k
        if (apos != std::string::npos && apos < scw.size() - 1) {
587
109k
          std::string part1 = scw.substr(0, apos + 1), part2 = scw.substr(apos + 1);
588
109k
          if (utf8) {
589
29.7k
            std::vector<w_char> part1u, part2u;
590
29.7k
            u8_u16(part1u, part1);
591
29.7k
            u8_u16(part2u, part2);
592
29.7k
            mkinitcap2(part2, part2u);
593
29.7k
            scw = part1 + part2;
594
29.7k
            sunicw = std::move(part1u);
595
29.7k
            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
596
29.7k
            rv = checkword(scw, info, root, suggest_start);
597
29.7k
            if (rv)
598
394
              break;
599
79.3k
          } else {
600
79.3k
            mkinitcap2(part2, sunicw);
601
79.3k
            scw = part1 + part2;
602
79.3k
            rv = checkword(scw, info, root, suggest_start);
603
79.3k
            if (rv)
604
540
              break;
605
79.3k
          }
606
108k
          mkinitcap2(scw, sunicw);
607
108k
          rv = checkword(scw, info, root, suggest_start);
608
108k
          if (rv)
609
403
            break;
610
108k
        }
611
119k
      }
612
392k
      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
613
614
30.8k
        mkallsmall2(scw, sunicw);
615
30.8k
        std::string u8buffer(scw);
616
30.8k
        rv = spellsharps(u8buffer, 0, 0, 0, info, root, suggest_start);
617
30.8k
        if (!rv) {
618
29.0k
          mkinitcap2(scw, sunicw);
619
29.0k
          rv = spellsharps(scw, 0, 0, 0, info, root, suggest_start);
620
29.0k
        }
621
30.8k
        if ((abbv) && !(rv)) {
622
378
          u8buffer.push_back('.');
623
378
          rv = spellsharps(u8buffer, 0, 0, 0, info, root, suggest_start);
624
378
          if (!rv) {
625
303
            u8buffer = std::string(scw);
626
303
            u8buffer.push_back('.');
627
303
            rv = spellsharps(u8buffer, 0, 0, 0, info, root, suggest_start);
628
303
          }
629
378
        }
630
30.8k
        if (rv)
631
2.25k
          break;
632
30.8k
      }
633
392k
    }
634
      /* FALLTHROUGH */
635
968k
    case INITCAP: {
636
      // handle special capitalization of dotted I
637
968k
      bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
638
968k
      *info |= SPELL_ORIGCAP;
639
968k
      if (captype == ALLCAP) {
640
389k
          mkallsmall2(scw, sunicw);
641
389k
          mkinitcap2(scw, sunicw);
642
389k
          if (Idot)
643
2.82k
             scw.replace(0, 1, "\xc4\xb0");
644
389k
      }
645
968k
      if (captype == INITCAP)
646
578k
        *info |= SPELL_INITCAP;
647
968k
      rv = checkword(scw, info, root, suggest_start);
648
968k
      if (captype == INITCAP)
649
578k
        *info &= ~SPELL_INITCAP;
650
      // forbid bad capitalization
651
      // (for example, ijs -> Ijs instead of IJs in Dutch)
652
      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
653
968k
      if (*info & SPELL_FORBIDDEN) {
654
2.37k
        rv = nullptr;
655
2.37k
        break;
656
2.37k
      }
657
966k
      if (rv && is_keepcase(rv) && (captype == ALLCAP))
658
829
        rv = nullptr;
659
966k
      if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
660
31.0k
        break;
661
662
935k
      mkallsmall2(scw, sunicw);
663
935k
      std::string u8buffer(scw);
664
935k
      mkinitcap2(scw, sunicw);
665
666
935k
      rv = checkword(u8buffer, info, root, suggest_start);
667
935k
      if (abbv && !rv) {
668
102k
        u8buffer.push_back('.');
669
102k
        rv = checkword(u8buffer, info, root, suggest_start);
670
102k
        if (!rv) {
671
102k
          u8buffer = scw;
672
102k
          u8buffer.push_back('.');
673
102k
          if (captype == INITCAP)
674
84.5k
            *info |= SPELL_INITCAP;
675
102k
          rv = checkword(u8buffer, info, root, suggest_start);
676
102k
          if (captype == INITCAP)
677
84.5k
            *info &= ~SPELL_INITCAP;
678
102k
          if (rv && is_keepcase(rv) && (captype == ALLCAP))
679
28
            rv = nullptr;
680
102k
          break;
681
102k
        }
682
102k
      }
683
832k
      if (rv && is_keepcase(rv) &&
684
3.56k
          ((captype == ALLCAP) ||
685
           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
686
           // in INITCAP form, too.
687
1.77k
           !(pAMgr->get_checksharps() &&
688
1.23k
             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
689
1.23k
              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
690
3.17k
        rv = nullptr;
691
832k
      break;
692
935k
    }
693
14.2M
  }
694
695
14.2M
  if (rv) {
696
465k
    if (pAMgr && pAMgr->get_warn() && rv->astr &&
697
11.0k
        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
698
1.24k
      *info |= SPELL_WARN;
699
1.24k
      if (pAMgr->get_forbidwarn())
700
404
        return false;
701
843
      return true;
702
1.24k
    }
703
463k
    return true;
704
465k
  }
705
706
  // recursive breaking at break points
707
13.7M
  if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
708
709
13.6M
    int nbr = 0;
710
13.6M
    wl = scw.size();
711
712
    // calculate break points for recursion limit
713
43.1M
    for (auto& j : wordbreak) {
714
43.1M
      size_t pos = 0;
715
57.1M
      while ((pos = scw.find(j, pos)) != std::string::npos) {
716
13.9M
        ++nbr;
717
13.9M
        pos += j.size();
718
13.9M
      }
719
43.1M
    }
720
13.6M
    if (nbr >= MAXBREAKDEPTH)
721
3.79k
      return false;
722
723
    // check boundary patterns (^begin and end$)
724
43.0M
    for (auto& j : wordbreak) {
725
43.0M
      size_t plen = j.size();
726
43.0M
      if (plen == 1 || plen > wl)
727
19.3M
        continue;
728
729
23.7M
      if (j[0] == '^' && scw.compare(0, plen - 1, j, 1, plen - 1) == 0 &&
730
3.63M
          spell(scw.substr(plen - 1), candidate_stack, nullptr, nullptr, suggest_start)) {
731
128k
        *info |= SPELL_COMPOUND;
732
128k
        return true;
733
128k
      }
734
735
23.5M
      if (j[plen - 1] == '$' &&
736
10.2M
          scw.compare(wl - plen + 1, plen - 1, j, 0, plen - 1) == 0) {
737
3.97M
        std::string suffix(scw.substr(wl - plen + 1));
738
3.97M
        scw.resize(wl - plen + 1);
739
3.97M
        if (spell(scw, candidate_stack, nullptr, nullptr, suggest_start)) {
740
864k
          *info |= SPELL_COMPOUND;
741
864k
          return true;
742
864k
        }
743
3.11M
        scw.append(suffix);
744
3.11M
      }
745
23.5M
    }
746
747
    // other patterns
748
40.0M
    for (auto& j : wordbreak) {
749
40.0M
      size_t plen = j.size();
750
40.0M
      size_t found = scw.find(j);
751
40.0M
      if ((found > 0) && (found < wl - plen)) {
752
4.00M
        size_t found2 = scw.find(j, found + 1);
753
        // try to break at the second occurance
754
        // to recognize dictionary words with wordbreak
755
4.00M
        if (found2 > 0 && (found2 < wl - plen))
756
1.43M
            found = found2;
757
4.00M
        std::string substring(scw.substr(found + plen));
758
4.00M
        if (!spell(substring, candidate_stack, nullptr, nullptr, suggest_start))
759
3.19M
          continue;
760
806k
        std::string suffix(scw.substr(found));
761
806k
        scw.resize(found);
762
        // examine 2 sides of the break point
763
806k
        if (spell(scw, candidate_stack, nullptr, nullptr, suggest_start)) {
764
48.2k
          *info |= SPELL_COMPOUND;
765
48.2k
          return true;
766
48.2k
        }
767
758k
        scw.append(suffix);
768
769
        // LANG_hu: spec. dash rule
770
758k
        if (langnum == LANG_hu && j == "-") {
771
77.4k
          suffix = scw.substr(found + 1);
772
77.4k
          scw.resize(found + 1);
773
77.4k
          if (spell(scw, candidate_stack, nullptr, nullptr, suggest_start)) {
774
3.23k
            *info |= SPELL_COMPOUND;
775
3.23k
            return true;  // check the first part with dash
776
3.23k
          }
777
74.1k
          scw.append(suffix);
778
74.1k
        }
779
        // end of LANG specific region
780
758k
      }
781
40.0M
    }
782
783
    // other patterns (break at first break point)
784
40.0M
    for (auto& j : wordbreak) {
785
40.0M
      size_t plen = j.size(), found = scw.find(j);
786
40.0M
      if ((found > 0) && (found < wl - plen)) {
787
3.93M
        if (!spell(scw.substr(found + plen), candidate_stack, nullptr, nullptr, suggest_start))
788
3.32M
          continue;
789
614k
        std::string suffix(scw.substr(found));
790
614k
        scw.resize(found);
791
        // examine 2 sides of the break point
792
614k
        if (spell(scw, candidate_stack, nullptr, nullptr, suggest_start)) {
793
4.54k
          *info |= SPELL_COMPOUND;
794
4.54k
          return true;
795
4.54k
        }
796
609k
        scw.append(suffix);
797
798
        // LANG_hu: spec. dash rule
799
609k
        if (langnum == LANG_hu && j == "-") {
800
69.8k
          suffix = scw.substr(found + 1);
801
69.8k
          scw.resize(found + 1);
802
69.8k
          if (spell(scw, candidate_stack, nullptr, nullptr, suggest_start)) {
803
896
            *info |= SPELL_COMPOUND;
804
896
            return true;  // check the first part with dash
805
896
          }
806
68.9k
          scw.append(suffix);
807
68.9k
        }
808
        // end of LANG specific region
809
609k
      }
810
40.0M
    }
811
12.5M
  }
812
813
12.6M
  return false;
814
13.7M
}
815
816
struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root,
817
17.5M
                                       std::chrono::steady_clock::time_point suggest_start) {
818
  // check overall suggest time limit
819
17.5M
  if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
820
353
    return nullptr;
821
822
17.5M
  std::string word;
823
824
  // remove IGNORE characters from the string
825
17.5M
  clean_ignore(word, w);
826
827
17.5M
  if (word.empty())
828
1.49k
    return nullptr;
829
830
  // word reversing wrapper for complex prefixes
831
17.5M
  if (complexprefixes) {
832
3.74M
    if (utf8)
833
127k
      reverseword_utf(word);
834
3.61M
    else
835
3.61M
      reverseword(word);
836
3.74M
  }
837
838
17.5M
  int len = word.size();
839
840
  // look word in hash table
841
17.5M
  struct hentry* he = nullptr;
842
35.0M
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
843
17.5M
    he = m_HMgrs[i]->lookup(word.c_str(), word.size());
844
845
    // check forbidden and onlyincompound words
846
17.5M
    if ((he) && (he->astr) && (pAMgr) &&
847
179k
        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
848
43.0k
      if (info)
849
43.0k
        *info |= SPELL_FORBIDDEN;
850
      // LANG_hu section: set dash information for suggestions
851
43.0k
      if (langnum == LANG_hu) {
852
16.6k
        if (pAMgr->get_compoundflag() &&
853
11.6k
            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
854
2.75k
          if (info)
855
2.75k
            *info |= SPELL_COMPOUND;
856
2.75k
        }
857
16.6k
      }
858
43.0k
      return nullptr;
859
43.0k
    }
860
861
    // he = next not needaffix, onlyincompound homonym or onlyupcase word
862
17.5M
    while (he && (he->astr) && pAMgr &&
863
138k
           ((pAMgr->get_needaffix() &&
864
26.3k
             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
865
135k
            (pAMgr->get_onlyincompound() &&
866
14.1k
             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
867
134k
            (info && (*info & SPELL_INITCAP) &&
868
5.08k
             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
869
6.63k
      he = he->next_homonym;
870
17.5M
  }
871
872
  // check with affixes
873
17.5M
  if (!he && pAMgr) {
874
    // try stripping off affixes
875
17.1M
    AffixScratch scratch;
876
17.1M
    he = pAMgr->affix_check(word, 0, len, scratch, 0);
877
878
    // check compound restriction and onlyupcase
879
17.1M
    if (he && he->astr &&
880
64.8k
        ((pAMgr->get_onlyincompound() &&
881
10.7k
          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
882
63.9k
         (info && (*info & SPELL_INITCAP) &&
883
4.17k
          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
884
1.57k
      he = nullptr;
885
1.57k
    }
886
887
17.1M
    if (he) {
888
65.3k
      if ((he->astr) && (pAMgr) &&
889
63.3k
          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
890
2.01k
        if (info)
891
2.01k
          *info |= SPELL_FORBIDDEN;
892
2.01k
        return nullptr;
893
2.01k
      }
894
63.3k
      if (root) {
895
0
        root->assign(he->word);
896
0
        if (complexprefixes) {
897
0
          if (utf8)
898
0
            reverseword_utf(*root);
899
0
          else
900
0
            reverseword(*root);
901
0
        }
902
0
      }
903
      // try check compound word
904
17.1M
    } else if (pAMgr->get_compound()) {
905
3.83M
      struct hentry* rwords[100] = {};  // buffer for COMPOUND pattern checking
906
907
      // first allow only 2 words in the compound
908
3.83M
      int setinfo = SPELL_COMPOUND_2;
909
3.83M
      if (info)
910
3.83M
        setinfo |= *info;
911
3.83M
      he = pAMgr->compound_check(word, 0, 0, 100, 0, nullptr, (hentry**)&rwords, 0, 0, &setinfo, scratch);
912
3.83M
      if (info)
913
3.83M
        *info = setinfo & ~SPELL_COMPOUND_2;
914
      // if not 2-word compoud word, try with 3 or more words
915
      // (only if original info didn't forbid it)
916
3.83M
      if (!he && info && !(*info & SPELL_COMPOUND_2)) {
917
3.83M
        *info &= ~SPELL_COMPOUND_2;
918
3.83M
        he = pAMgr->compound_check(word, 0, 0, 100, 0, nullptr, (hentry**)&rwords, 0, 0, info, scratch);
919
        // accept the compound with 3 or more words only if it is
920
        // - not a dictionary word with a typo and
921
        // - not two words written separately,
922
        // - or if it's an arbitrary number accepted by compound rules (e.g. 999%)
923
3.83M
        if (he && !isdigit(word[0]))
924
89.1k
        {
925
89.1k
          std::vector<std::string> slst;
926
89.1k
          if (pSMgr->suggest(slst, word, nullptr, /*test_simplesug=*/true))
927
38.9k
            he = nullptr;
928
89.1k
        }
929
3.83M
      }
930
931
      // LANG_hu section: `moving rule' with last dash
932
3.83M
      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
933
115k
        std::string dup(word, 0, len - 1);
934
115k
        he = pAMgr->compound_check(dup, -5, 0, 100, 0, nullptr, (hentry**)&rwords, 1, 0, info, scratch);
935
115k
      }
936
      // end of LANG specific region
937
3.83M
      if (he) {
938
60.4k
        if (root) {
939
0
          root->assign(he->word);
940
0
          if (complexprefixes) {
941
0
            if (utf8)
942
0
              reverseword_utf(*root);
943
0
            else
944
0
              reverseword(*root);
945
0
          }
946
0
        }
947
60.4k
        if (info)
948
60.4k
          *info |= SPELL_COMPOUND;
949
60.4k
      }
950
3.83M
    }
951
17.1M
  }
952
953
17.5M
  return he;
954
17.5M
}
955
956
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
957
329k
#define MAX_CANDIDATE_STACK_DEPTH 512
958
#else
959
#define MAX_CANDIDATE_STACK_DEPTH 2048
960
#endif
961
962
164k
std::vector<std::string> HunspellImpl::suggest(const std::string& word, std::vector<std::string>& suggest_candidate_stack, std::chrono::steady_clock::time_point suggest_start) {
963
964
164k
  if (suggest_candidate_stack.size() > MAX_CANDIDATE_STACK_DEPTH || // apply a fairly arbitrary depth limit
965
      // something very broken if suggest ends up calling itself with the same word
966
164k
      std::find(suggest_candidate_stack.begin(), suggest_candidate_stack.end(), word) != suggest_candidate_stack.end()) {
967
48
    return { };
968
48
  }
969
970
164k
  if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
971
2
    return { };
972
973
164k
  bool capwords;
974
164k
  size_t abbv;
975
164k
  int captype;
976
164k
  std::vector<std::string> spell_candidate_stack;
977
164k
  suggest_candidate_stack.push_back(word);
978
164k
  std::vector<std::string> slst = suggest_internal(word, spell_candidate_stack, suggest_candidate_stack,
979
164k
                                                   capwords, abbv, captype, suggest_start);
980
164k
  suggest_candidate_stack.pop_back();
981
  // word reversing wrapper for complex prefixes
982
164k
  if (complexprefixes) {
983
61.1k
    for (auto& j : slst) {
984
61.1k
      if (utf8)
985
46.3k
        reverseword_utf(j);
986
14.8k
      else
987
14.8k
        reverseword(j);
988
61.1k
    }
989
14.8k
  }
990
991
  // capitalize
992
164k
  if (capwords) {
993
28.7k
    for (auto& j : slst) {
994
20.1k
      std::string capitalized(j);
995
20.1k
      mkinitcap(capitalized);
996
20.1k
      if (capitalized == word)
997
63
        continue;  // capitalizing would just reproduce the misspelled word
998
20.0k
      j = std::move(capitalized);
999
20.0k
    }
1000
28.7k
  }
1001
1002
  // expand suggestions with dot(s)
1003
164k
  if (abbv && pAMgr && pAMgr->get_sugswithdots() && word.size() >= abbv) {
1004
125
    for (auto& j : slst) {
1005
125
      j.append(word.substr(word.size() - abbv));
1006
125
    }
1007
27
  }
1008
1009
  // remove bad capitalized and forbidden forms
1010
164k
  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
1011
164k
    switch (captype) {
1012
14.2k
      case INITCAP:
1013
32.6k
      case ALLCAP: {
1014
32.6k
        size_t l = 0;
1015
57.8k
        for (size_t j = 0; j < slst.size(); ++j) {
1016
25.1k
          if (slst[j].find(' ') == std::string::npos && !spell(slst[j], spell_candidate_stack, nullptr, nullptr, suggest_start)) {
1017
7.89k
            std::string s;
1018
7.89k
            std::vector<w_char> w;
1019
7.89k
            if (utf8) {
1020
5.37k
              u8_u16(w, slst[j]);
1021
5.37k
            } else {
1022
2.52k
              s = slst[j];
1023
2.52k
            }
1024
7.89k
            mkallsmall2(s, w);
1025
7.89k
            if (spell(s, spell_candidate_stack, nullptr, nullptr, suggest_start)) {
1026
459
              slst[l] = std::move(s);
1027
459
              ++l;
1028
7.44k
            } else {
1029
7.44k
              mkinitcap2(s, w);
1030
7.44k
              if (spell(s, spell_candidate_stack, nullptr, nullptr, suggest_start)) {
1031
174
                slst[l] = std::move(s);
1032
174
                ++l;
1033
174
              }
1034
7.44k
            }
1035
17.2k
          } else {
1036
17.2k
            slst[l] = slst[j];
1037
17.2k
            ++l;
1038
17.2k
          }
1039
25.1k
        }
1040
32.6k
        slst.resize(l);
1041
32.6k
      }
1042
164k
    }
1043
164k
  }
1044
1045
  // remove duplications
1046
164k
  size_t l = 0;
1047
444k
  for (size_t j = 0; j < slst.size(); ++j) {
1048
279k
    slst[l] = slst[j];
1049
6.03M
    for (size_t k = 0; k < l; ++k) {
1050
5.78M
      if (slst[k] == slst[j]) {
1051
26.6k
        --l;
1052
26.6k
        break;
1053
26.6k
      }
1054
5.78M
    }
1055
279k
    ++l;
1056
279k
  }
1057
164k
  slst.resize(l);
1058
1059
  // output conversion
1060
164k
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : nullptr;
1061
164k
  if (rl) {
1062
6.69k
    size_t l = 0;
1063
11.3k
    for (size_t i = 0; i < slst.size(); ++i) {
1064
4.70k
      std::string wspace;
1065
4.70k
      if (rl->conv(slst[i], wspace)) {
1066
1.77k
        slst[i] = std::move(wspace);
1067
1.77k
      }
1068
      // gh#1002: OCONV can map a generated form back to the input word
1069
      // (e.g. "románórum" -> "romanórum" when the user typed "romanórum"),
1070
      // leaving the misspelled word as its own suggestion.
1071
4.70k
      if (slst[i] == word)
1072
3
        continue;
1073
4.69k
      if (l != i)
1074
3
        slst[l] = std::move(slst[i]);
1075
4.69k
      ++l;
1076
4.69k
    }
1077
6.69k
    slst.resize(l);
1078
6.69k
  }
1079
164k
  return slst;
1080
164k
}
1081
1082
80.2k
std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
1083
80.2k
  std::vector<std::string> suggest_candidate_stack;
1084
80.2k
  auto suggest_start = std::chrono::steady_clock::now();
1085
80.2k
  return suggest(word, suggest_candidate_stack, suggest_start);
1086
80.2k
}
1087
1088
std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
1089
        std::vector<std::string>& spell_candidate_stack,
1090
        std::vector<std::string>& suggest_candidate_stack,
1091
        bool& capwords, size_t& abbv, int& captype,
1092
164k
        std::chrono::steady_clock::time_point suggest_start) {
1093
164k
  captype = NOCAP;
1094
164k
  abbv = 0;
1095
164k
  capwords = false;
1096
1097
164k
  std::vector<std::string> slst;
1098
1099
164k
  int onlycmpdsug = 0;
1100
164k
  if (!pSMgr || m_HMgrs.empty())
1101
0
    return slst;
1102
1103
  // process XML input of the simplified API (see manual)
1104
164k
  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
1105
23.9k
    if (word.size() > MAXSPELLMLLEN)
1106
461
      return slst;
1107
23.4k
    return spellml(word);
1108
23.9k
  }
1109
140k
  if (utf8) {
1110
79.0k
    if (word.size() >= MAXWORDUTF8LEN)
1111
338
      return slst;
1112
79.0k
  } else {
1113
61.6k
    if (word.size() >= MAXWORDLEN)
1114
365
      return slst;
1115
61.6k
  }
1116
139k
  size_t wl = 0;
1117
1118
139k
  std::string scw;
1119
139k
  std::vector<w_char> sunicw;
1120
1121
  // input conversion
1122
139k
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : nullptr;
1123
139k
  {
1124
139k
    std::string wspace;
1125
1126
139k
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1127
139k
    if (convstatus)
1128
49.5k
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1129
90.4k
    else
1130
90.4k
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1131
1132
139k
    if (wl == 0)
1133
0
      return slst;
1134
1135
139k
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
1136
139k
    if (wl > 32768)
1137
17
      return slst;
1138
139k
#endif
1139
139k
  }
1140
1141
139k
  bool good = false;
1142
1143
  // check capitalized form for FORCEUCASE
1144
139k
  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
1145
303
    int info = SPELL_ORIGCAP;
1146
303
    if (checkword(scw, &info, nullptr, suggest_start)) {
1147
3
      std::string form(std::move(scw));
1148
3
      mkinitcap(form);
1149
3
      slst.push_back(std::move(form));
1150
3
      return slst;
1151
3
    }
1152
303
  }
1153
1154
139k
  switch (captype) {
1155
67.1k
    case NOCAP: {
1156
67.1k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1157
67.1k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1158
25
          return slst;
1159
67.1k
      if (abbv) {
1160
2.31k
        std::string wspace(scw);
1161
2.31k
        wspace.push_back('.');
1162
2.31k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1163
2.31k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1164
0
            return slst;
1165
2.31k
      }
1166
67.1k
      break;
1167
67.1k
    }
1168
1169
67.1k
    case INITCAP: {
1170
14.2k
      capwords = true;
1171
14.2k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1172
14.2k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1173
0
          return slst;
1174
14.2k
      std::string wspace(scw);
1175
14.2k
      mkallsmall2(wspace, sunicw);
1176
14.2k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1177
14.2k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1178
1
          return slst;
1179
14.2k
      break;
1180
14.2k
    }
1181
14.4k
    case HUHINITCAP:
1182
14.4k
      capwords = true;
1183
      /* FALLTHROUGH */
1184
40.0k
    case HUHCAP: {
1185
40.0k
      good |= pSMgr->suggest(slst, scw, &onlycmpdsug);
1186
40.0k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1187
23
          return slst;
1188
      // something.The -> something. The
1189
40.0k
      size_t dot_pos = scw.find('.');
1190
40.0k
      if (dot_pos != std::string::npos) {
1191
9.82k
        std::string postdot = scw.substr(dot_pos + 1);
1192
9.82k
        int captype_;
1193
9.82k
        if (utf8) {
1194
2.53k
          std::vector<w_char> postdotu;
1195
2.53k
          u8_u16(postdotu, postdot);
1196
2.53k
          captype_ = get_captype_utf8(postdotu, langnum);
1197
7.28k
        } else {
1198
7.28k
          captype_ = get_captype(postdot, csconv);
1199
7.28k
        }
1200
9.82k
        if (captype_ == INITCAP) {
1201
2.11k
          std::string str(scw);
1202
2.11k
          str.insert(dot_pos + 1, 1, ' ');
1203
2.11k
          insert_sug(slst, str);
1204
2.11k
        }
1205
9.82k
      }
1206
1207
40.0k
      std::string wspace;
1208
1209
40.0k
      if (captype == HUHINITCAP) {
1210
        // TheOpenOffice.org -> The OpenOffice.org
1211
14.4k
        wspace = scw;
1212
14.4k
        mkinitsmall2(wspace, sunicw);
1213
14.4k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1214
14.4k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1215
9
            return slst;
1216
14.4k
      }
1217
40.0k
      wspace = scw;
1218
40.0k
      mkallsmall2(wspace, sunicw);
1219
40.0k
      if (spell(wspace, spell_candidate_stack, nullptr, nullptr, suggest_start))
1220
1.01k
        insert_sug(slst, wspace);
1221
40.0k
      size_t prevns = slst.size();
1222
40.0k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1223
40.0k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1224
90
          return slst;
1225
39.9k
      if (captype == HUHINITCAP) {
1226
14.4k
        mkinitcap2(wspace, sunicw);
1227
14.4k
        if (spell(wspace, spell_candidate_stack, nullptr, nullptr, suggest_start))
1228
294
          insert_sug(slst, wspace);
1229
14.4k
        good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1230
14.4k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1231
9
            return slst;
1232
14.4k
      }
1233
      // aNew -> "a New" (instead of "a new")
1234
46.7k
      for (size_t j = prevns; j < slst.size(); ++j) {
1235
6.83k
        const char* space = strchr(slst[j].c_str(), ' ');
1236
6.83k
        if (space) {
1237
1.35k
          size_t slen = strlen(space + 1);
1238
          // different case after space (need capitalisation)
1239
1.35k
          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1) != 0) {
1240
1.04k
            std::string first(slst[j].c_str(), space + 1);
1241
1.04k
            std::string second(space + 1);
1242
1.04k
            std::vector<w_char> w;
1243
1.04k
            if (utf8)
1244
205
              u8_u16(w, second);
1245
1.04k
            mkinitcap2(second, w);
1246
            // set as first suggestion
1247
1.04k
            slst.erase(slst.begin() + j);
1248
1.04k
            slst.insert(slst.begin(), first + second);
1249
1.04k
          }
1250
1.35k
        }
1251
6.83k
      }
1252
39.9k
      break;
1253
39.9k
    }
1254
1255
18.4k
    case ALLCAP: {
1256
18.4k
      std::string wspace(scw);
1257
18.4k
      mkallsmall2(wspace, sunicw);
1258
18.4k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1259
18.4k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1260
14
          return slst;
1261
18.4k
      if (pAMgr && pAMgr->get_keepcase() && spell(wspace, spell_candidate_stack, nullptr, nullptr, suggest_start))
1262
1
        insert_sug(slst, wspace);
1263
18.4k
      mkinitcap2(wspace, sunicw);
1264
18.4k
      good |= pSMgr->suggest(slst, wspace, &onlycmpdsug);
1265
18.4k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1266
16
          return slst;
1267
18.4k
      for (auto& j : slst) {
1268
5.93k
        mkallcap(j);
1269
5.93k
        if (pAMgr && pAMgr->get_checksharps()) {
1270
528
          if (utf8) {
1271
139
            mystrrep(j, "\xC3\x9F", "SS");
1272
389
          } else {
1273
389
            mystrrep(j, "\xDF", "SS");
1274
389
          }
1275
528
        }
1276
5.93k
      }
1277
18.4k
      break;
1278
18.4k
    }
1279
139k
  }
1280
1281
  // LANG_hu section: replace '-' with ' ' in Hungarian
1282
139k
  if (langnum == LANG_hu) {
1283
7.90k
    for (auto& j : slst) {
1284
7.90k
      size_t pos = j.find('-');
1285
7.90k
      if (pos != std::string::npos) {
1286
5.92k
        int info = 0;
1287
5.92k
        std::string w(j.substr(0, pos));
1288
5.92k
        w.append(j.substr(pos + 1));
1289
5.92k
        (void)spell(w, spell_candidate_stack, &info, nullptr, suggest_start);
1290
5.92k
        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1291
20
          j[pos] = ' ';
1292
20
        } else
1293
5.90k
          j[pos] = '-';
1294
5.92k
      }
1295
7.90k
    }
1296
6.17k
  }
1297
  // END OF LANG_hu section
1298
  // try ngram approach since found nothing good suggestion
1299
139k
  if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1300
130k
    switch (captype) {
1301
64.1k
      case NOCAP: {
1302
64.1k
        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
1303
64.1k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1304
78
            return slst;
1305
64.0k
        break;
1306
64.1k
      }
1307
      /* FALLTHROUGH */
1308
64.0k
      case HUHINITCAP:
1309
13.6k
        capwords = true;
1310
      /* FALLTHROUGH */
1311
37.1k
      case HUHCAP: {
1312
37.1k
        std::string wspace(scw);
1313
37.1k
        mkallsmall2(wspace, sunicw);
1314
37.1k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
1315
37.1k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1316
5
            return slst;
1317
37.1k
        break;
1318
37.1k
      }
1319
37.1k
      case INITCAP: {
1320
12.3k
        capwords = true;
1321
12.3k
        std::string wspace(scw);
1322
12.3k
        mkallsmall2(wspace, sunicw);
1323
12.3k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
1324
12.3k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1325
0
            return slst;
1326
12.3k
        break;
1327
12.3k
      }
1328
16.9k
      case ALLCAP: {
1329
16.9k
        std::string wspace(scw);
1330
16.9k
        mkallsmall2(wspace, sunicw);
1331
16.9k
        size_t oldns = slst.size();
1332
16.9k
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
1333
16.9k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1334
0
            return slst;
1335
21.3k
        for (size_t j = oldns; j < slst.size(); ++j) {
1336
4.42k
          mkallcap(slst[j]);
1337
4.42k
        }
1338
16.9k
        break;
1339
16.9k
      }
1340
130k
    }
1341
130k
  }
1342
1343
  // try dash suggestion (Afo-American -> Afro-American)
1344
  // Note: LibreOffice was modified to treat dashes as word
1345
  // characters to check "scot-free" etc. word forms, but
1346
  // we need to handle suggestions for "Afo-American", etc.,
1347
  // while "Afro-American" is missing from the dictionary.
1348
  // TODO avoid possible overgeneration
1349
139k
  size_t dash_pos = scw.find('-');
1350
139k
  if (dash_pos != std::string::npos) {
1351
89.9k
    int nodashsug = 1;
1352
143k
    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1353
53.8k
      if (slst[j].find('-') != std::string::npos)
1354
4.45k
        nodashsug = 0;
1355
53.8k
    }
1356
1357
89.9k
    size_t prev_pos = 0;
1358
89.9k
    bool last = false;
1359
1360
273k
    while (!good && nodashsug && !last) {
1361
202k
      if (dash_pos == scw.size())
1362
53.1k
        last = true;
1363
202k
      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1364
202k
      if (chunk != word && !spell(chunk, spell_candidate_stack, nullptr, nullptr, suggest_start)) {
1365
84.3k
        std::vector<std::string> nlst = suggest(chunk, suggest_candidate_stack, suggest_start);
1366
84.3k
        if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1367
18.5k
            return slst;
1368
263k
        for (auto j = nlst.rbegin(); j != nlst.rend(); ++j) {
1369
197k
          std::string wspace = scw.substr(0, prev_pos);
1370
197k
          wspace.append(*j);
1371
197k
          if (!last) {
1372
20.2k
            wspace.append("-");
1373
20.2k
            wspace.append(scw.substr(dash_pos + 1));
1374
20.2k
          }
1375
197k
          int info = 0;
1376
197k
          if (pAMgr && pAMgr->get_forbiddenword())
1377
197k
            checkword(wspace, &info, nullptr, suggest_start);
1378
197k
          if (!(info & SPELL_FORBIDDEN))
1379
197k
            insert_sug(slst, wspace);
1380
197k
        }
1381
65.7k
        nodashsug = 0;
1382
65.7k
      }
1383
183k
      if (!last) {
1384
148k
        prev_pos = dash_pos + 1;
1385
148k
        dash_pos = scw.find('-', prev_pos);
1386
148k
      }
1387
183k
      if (dash_pos == std::string::npos)
1388
63.3k
        dash_pos = scw.size();
1389
183k
    }
1390
89.9k
  }
1391
121k
  return slst;
1392
139k
}
1393
1394
0
const std::string& HunspellImpl::get_dict_encoding() const {
1395
0
  return encoding;
1396
0
}
1397
1398
19.9k
std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1399
19.9k
  std::vector<std::string> slst;
1400
1401
19.9k
  std::string result2;
1402
19.9k
  if (desc.empty())
1403
15.9k
    return slst;
1404
1.13M
  for (const auto& i : desc) {
1405
1.13M
    std::string result;
1406
1407
    // add compound word parts (except the last one)
1408
1.13M
    const char* s = i.c_str();
1409
1.13M
    const char* part = strstr(s, MORPH_PART);
1410
1.13M
    if (part) {
1411
762k
      const char* nextpart = strstr(part + 1, MORPH_PART);
1412
1.29M
      while (nextpart) {
1413
535k
        std::string field;
1414
535k
        copy_field(field, part, MORPH_PART);
1415
535k
        result.append(field);
1416
535k
        part = nextpart;
1417
535k
        nextpart = strstr(part + 1, MORPH_PART);
1418
535k
      }
1419
762k
      s = part;
1420
762k
    }
1421
1422
1.13M
    std::string tok(s);
1423
1.13M
    size_t alt = 0;
1424
6.50M
    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1425
5.37M
      tok[alt + 1] = MSEP_ALT;
1426
5.37M
    }
1427
1.13M
    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1428
6.46M
    for (auto& k : pl) {
1429
      // add derivational suffixes
1430
6.46M
      if (k.find(MORPH_DERI_SFX) != std::string::npos) {
1431
        // remove inflectional suffixes
1432
25.0k
        const size_t is = k.find(MORPH_INFL_SFX);
1433
25.0k
        if (is != std::string::npos)
1434
121
          k.resize(is);
1435
25.0k
        std::vector<std::string> singlepl;
1436
25.0k
        singlepl.push_back(k);
1437
25.0k
        std::string sg = pSMgr->suggest_gen(singlepl, k);
1438
25.0k
        if (!sg.empty()) {
1439
11.8k
          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1440
19.3k
          for (auto& j : gen) {
1441
19.3k
            result2.push_back(MSEP_REC);
1442
19.3k
            result2.append(result);
1443
19.3k
            result2.append(j);
1444
19.3k
          }
1445
11.8k
        }
1446
6.43M
      } else {
1447
6.43M
        result2.push_back(MSEP_REC);
1448
6.43M
        result2.append(result);
1449
6.43M
        if (k.find(MORPH_SURF_PFX) != std::string::npos) {
1450
6.94k
          std::string field;
1451
6.94k
          copy_field(field, k, MORPH_SURF_PFX);
1452
6.94k
          result2.append(field);
1453
6.94k
        }
1454
6.43M
        std::string field;
1455
6.43M
        copy_field(field, k, MORPH_STEM);
1456
6.43M
        result2.append(field);
1457
6.43M
      }
1458
6.46M
    }
1459
1.13M
  }
1460
4.01k
  slst = line_tok(result2, MSEP_REC);
1461
4.01k
  uniqlist(slst);
1462
4.01k
  return slst;
1463
19.9k
}
1464
1465
19.9k
std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1466
19.9k
  return stem(analyze(word));
1467
19.9k
}
1468
1469
0
const std::string& HunspellImpl::get_wordchars_cpp() const {
1470
0
  return pAMgr->get_wordchars();
1471
0
}
1472
1473
0
const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1474
0
  return pAMgr->get_wordchars_utf16();
1475
0
}
1476
1477
150k
void HunspellImpl::mkinitcap(std::string& u8) {
1478
150k
  if (utf8) {
1479
27.1k
    std::vector<w_char> u16;
1480
27.1k
    u8_u16(u16, u8);
1481
27.1k
    ::mkinitcap_utf(u16, langnum);
1482
27.1k
    u16_u8(u8, u16);
1483
123k
  } else {
1484
123k
    ::mkinitcap(u8, csconv);
1485
123k
  }
1486
150k
}
1487
1488
1.62M
int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1489
1.62M
  if (utf8) {
1490
383k
    ::mkinitcap_utf(u16, langnum);
1491
383k
    u16_u8(u8, u16);
1492
1.24M
  } else {
1493
1.24M
    ::mkinitcap(u8, csconv);
1494
1.24M
  }
1495
1.62M
  return u8.size();
1496
1.62M
}
1497
1498
14.4k
int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1499
14.4k
  if (utf8) {
1500
3.59k
    ::mkinitsmall_utf(u16, langnum);
1501
3.59k
    u16_u8(u8, u16);
1502
10.8k
  } else {
1503
10.8k
    ::mkinitsmall(u8, csconv);
1504
10.8k
  }
1505
14.4k
  return u8.size();
1506
14.4k
}
1507
1508
38.0k
int HunspellImpl::add(const std::string& word) {
1509
38.0k
  if (!m_HMgrs.empty())
1510
38.0k
    return m_HMgrs[0]->add(word);
1511
0
  return 0;
1512
38.0k
}
1513
1514
0
int HunspellImpl::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
1515
0
  if (!m_HMgrs.empty())
1516
0
    return m_HMgrs[0]->add_with_flags(word, flags, desc);
1517
0
  return 0;
1518
0
}
1519
1520
37.1k
int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1521
37.1k
  if (!m_HMgrs.empty())
1522
37.1k
    return m_HMgrs[0]->add_with_affix(word, example);
1523
0
  return 0;
1524
37.1k
}
1525
1526
31.2k
int HunspellImpl::remove(const std::string& word) {
1527
31.2k
  if (!m_HMgrs.empty())
1528
31.2k
    return m_HMgrs[0]->remove(word);
1529
0
  return 0;
1530
31.2k
}
1531
1532
0
const std::string& HunspellImpl::get_version_cpp() const {
1533
0
  return pAMgr->get_version();
1534
0
}
1535
1536
0
struct cs_info* HunspellImpl::get_csconv() {
1537
  // Preserve pre-1.7.3 ABI: returned pointer is now to read-only data,
1538
  // but the public signature still says non-const. Callers must not
1539
  // write through it.
1540
0
  return const_cast<struct cs_info*>(csconv);
1541
0
}
1542
1543
2.38M
void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1544
2.38M
  if (!st.empty()) {
1545
39.1k
    if (!result.empty())
1546
19.9k
      result.append("\n");
1547
39.1k
    result.append(st);
1548
39.1k
  }
1549
2.38M
}
1550
1551
71.7k
std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1552
71.7k
  std::vector<std::string> slst = analyze_internal(word);
1553
  // output conversion
1554
71.7k
  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : nullptr;
1555
71.7k
  if (rl) {
1556
161k
    for (size_t i = 0; rl && i < slst.size(); ++i) {
1557
160k
      std::string wspace;
1558
160k
      if (rl->conv(slst[i], wspace)) {
1559
71
        slst[i] = std::move(wspace);
1560
71
      }
1561
160k
    }
1562
1.25k
  }
1563
71.7k
  return slst;
1564
71.7k
}
1565
1566
71.7k
std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
1567
71.7k
  std::vector<std::string> candidate_stack, slst;
1568
71.7k
  if (!pSMgr || m_HMgrs.empty())
1569
0
    return slst;
1570
71.7k
  if (utf8) {
1571
16.4k
    if (word.size() >= MAXWORDUTF8LEN)
1572
244
      return slst;
1573
55.3k
  } else {
1574
55.3k
    if (word.size() >= MAXWORDLEN)
1575
1.08k
      return slst;
1576
55.3k
  }
1577
70.4k
  int captype = NOCAP;
1578
70.4k
  size_t abbv = 0;
1579
70.4k
  size_t wl = 0;
1580
1581
70.4k
  std::string scw;
1582
70.4k
  std::vector<w_char> sunicw;
1583
1584
  // input conversion
1585
70.4k
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : nullptr;
1586
70.4k
  {
1587
70.4k
    std::string wspace;
1588
1589
70.4k
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1590
70.4k
    if (convstatus)
1591
461
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1592
69.9k
    else
1593
69.9k
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1594
1595
70.4k
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
1596
70.4k
    if (wl > 32768)
1597
19
      return slst;
1598
70.4k
#endif
1599
70.4k
  }
1600
1601
70.4k
  if (wl == 0) {
1602
21.8k
    if (abbv) {
1603
323
      scw.clear();
1604
5.65k
      for (wl = 0; wl < abbv; wl++)
1605
5.33k
        scw.push_back('.');
1606
323
      abbv = 0;
1607
323
    } else
1608
21.4k
      return slst;
1609
21.8k
  }
1610
1611
48.9k
  std::string result;
1612
1613
48.9k
  auto suggest_start = std::chrono::steady_clock::now();
1614
1615
48.9k
  size_t n = 0;
1616
  // test numbers
1617
  // LANG_hu section: set dash information for suggestions
1618
48.9k
  if (langnum == LANG_hu) {
1619
15.5k
    size_t n2 = 0;
1620
15.5k
    size_t n3 = 0;
1621
1622
21.2k
    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1623
15.5k
                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1624
5.74k
      n++;
1625
5.74k
      if ((scw[n] == '.') || (scw[n] == ',')) {
1626
284
        if (((n2 == 0) && (n > 3)) ||
1627
274
            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1628
34
          break;
1629
250
        n2++;
1630
250
        n3 = n;
1631
250
      }
1632
5.74k
    }
1633
1634
15.5k
    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1635
7
      return slst;
1636
15.5k
    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) && checkword(scw.substr(n), nullptr, nullptr, suggest_start))) {
1637
235
      result.append(scw);
1638
235
      result.resize(n - 1);
1639
235
      if (n == wl)
1640
225
        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1641
10
      else {
1642
10
        std::string chunk = scw.substr(n - 1, 1);
1643
10
        cat_result(result, pSMgr->suggest_morph(chunk));
1644
10
        result.push_back('+');  // XXX SPEC. MORPHCODE
1645
10
        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1646
10
      }
1647
235
      return line_tok(result, MSEP_REC);
1648
235
    }
1649
15.5k
  }
1650
  // END OF LANG_hu section
1651
1652
48.7k
  switch (captype) {
1653
9.05k
    case HUHCAP:
1654
11.6k
    case HUHINITCAP:
1655
35.7k
    case NOCAP: {
1656
35.7k
      cat_result(result, pSMgr->suggest_morph(scw));
1657
35.7k
      if (abbv) {
1658
707
        std::string u8buffer(scw);
1659
707
        u8buffer.push_back('.');
1660
707
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1661
707
      }
1662
35.7k
      break;
1663
11.6k
    }
1664
2.70k
    case INITCAP: {
1665
2.70k
      mkallsmall2(scw, sunicw);
1666
2.70k
      std::string u8buffer(scw);
1667
2.70k
      mkinitcap2(scw, sunicw);
1668
2.70k
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1669
2.70k
      cat_result(result, pSMgr->suggest_morph(scw));
1670
2.70k
      if (abbv) {
1671
377
        u8buffer.push_back('.');
1672
377
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1673
1674
377
        u8buffer = scw;
1675
377
        u8buffer.push_back('.');
1676
1677
377
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1678
377
      }
1679
2.70k
      break;
1680
11.6k
    }
1681
10.2k
    case ALLCAP: {
1682
10.2k
      cat_result(result, pSMgr->suggest_morph(scw));
1683
10.2k
      if (abbv) {
1684
857
        std::string u8buffer(scw);
1685
857
        u8buffer.push_back('.');
1686
857
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1687
857
      }
1688
10.2k
      mkallsmall2(scw, sunicw);
1689
10.2k
      std::string u8buffer(scw);
1690
10.2k
      mkinitcap2(scw, sunicw);
1691
1692
10.2k
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1693
10.2k
      cat_result(result, pSMgr->suggest_morph(scw));
1694
10.2k
      if (abbv) {
1695
857
        u8buffer.push_back('.');
1696
857
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1697
1698
857
        u8buffer = scw;
1699
857
        u8buffer.push_back('.');
1700
1701
857
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1702
857
      }
1703
10.2k
      break;
1704
11.6k
    }
1705
48.7k
  }
1706
1707
48.7k
  if (!result.empty()) {
1708
    // word reversing wrapper for complex prefixes
1709
17.3k
    if (complexprefixes) {
1710
2.29k
      if (utf8)
1711
392
        reverseword_utf(result);
1712
1.89k
      else
1713
1.89k
        reverseword(result);
1714
2.29k
    }
1715
17.3k
    return line_tok(result, MSEP_REC);
1716
17.3k
  }
1717
1718
  // compound word with dash (HU) I18n
1719
  // LANG_hu section: set dash information for suggestions
1720
1721
31.3k
  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1722
31.3k
  if (dash_pos != std::string::npos) {
1723
7.21k
    int nresult = 0;
1724
1725
7.21k
    std::string part1 = scw.substr(0, dash_pos), part2 = scw.substr(dash_pos + 1);
1726
1727
    // examine 2 sides of the dash
1728
7.21k
    if (part2.empty()) {  // base word ending with dash
1729
1.51k
      if (spell(part1, candidate_stack, nullptr, nullptr, suggest_start)) {
1730
190
        std::string p = pSMgr->suggest_morph(part1);
1731
190
        if (!p.empty()) {
1732
135
          slst = line_tok(p, MSEP_REC);
1733
135
          return slst;
1734
135
        }
1735
190
      }
1736
5.69k
    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1737
74
      if (spell(part1, candidate_stack, nullptr, nullptr, suggest_start) &&
1738
43
          (spell("-e", candidate_stack, nullptr, nullptr, suggest_start))) {
1739
23
        std::string st = pSMgr->suggest_morph(part1);
1740
23
        if (!st.empty()) {
1741
15
          result.append(st);
1742
15
        }
1743
23
        result.push_back('+');  // XXX spec. separator in MORPHCODE
1744
23
        st = pSMgr->suggest_morph("-e");
1745
23
        if (!st.empty()) {
1746
5
          result.append(st);
1747
5
        }
1748
23
        return line_tok(result, MSEP_REC);
1749
23
      }
1750
5.62k
    } else {
1751
      // first word ending with dash: word- XXX ???
1752
5.62k
      part1.push_back(' ');
1753
5.62k
      nresult = spell(part1, candidate_stack, nullptr, nullptr, suggest_start);
1754
5.62k
      part1.erase(part1.size() - 1);
1755
5.62k
      if (nresult && spell(part2, candidate_stack, nullptr, nullptr, suggest_start) &&
1756
521
          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1757
507
        std::string st = pSMgr->suggest_morph(part1);
1758
507
        if (!st.empty()) {
1759
401
          result.append(st);
1760
401
          result.push_back('+');  // XXX spec. separator in MORPHCODE
1761
401
        }
1762
507
        st = pSMgr->suggest_morph(part2);
1763
507
        if (!st.empty()) {
1764
271
          result.append(st);
1765
271
        }
1766
507
        return line_tok(result, MSEP_REC);
1767
507
      }
1768
5.62k
    }
1769
    // affixed number in correct word
1770
6.54k
    if (nresult && (dash_pos > 0) &&
1771
431
        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1772
360
         (scw[dash_pos - 1] == '.'))) {
1773
97
      n = 1;
1774
97
      if (scw[dash_pos - n] == '.')
1775
26
        n++;
1776
      // search first not a number character to left from dash
1777
267
      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1778
170
             (n < 6)) {
1779
170
        n++;
1780
170
      }
1781
97
      if (dash_pos < n)
1782
1
        n--;
1783
      // numbers: valami1000000-hoz
1784
      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1785
      // 56-hoz, 6-hoz
1786
327
      for (; n >= 1; n--) {
1787
269
        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1788
137
            continue;
1789
137
        }
1790
132
        std::string chunk = scw.substr(dash_pos - n);
1791
132
        if (checkword(chunk, nullptr, nullptr, suggest_start)) {
1792
39
          result.append(chunk);
1793
39
          std::string st = pSMgr->suggest_morph(chunk);
1794
39
          if (!st.empty()) {
1795
31
            result.append(st);
1796
31
          }
1797
39
          return line_tok(result, MSEP_REC);
1798
39
        }
1799
132
      }
1800
97
    }
1801
6.54k
  }
1802
30.6k
  return slst;
1803
31.3k
}
1804
1805
27.8k
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1806
27.8k
  std::vector<std::string> slst;
1807
27.8k
  if (!pSMgr || pl.empty())
1808
17.8k
    return slst;
1809
10.0k
  std::vector<std::string> pl2 = analyze(word);
1810
10.0k
  int captype = NOCAP, abbv = 0;
1811
10.0k
  std::string cw;
1812
10.0k
  cleanword(cw, word, &captype, &abbv);
1813
10.0k
  std::string result;
1814
1815
10.0k
  auto suggest_start = std::chrono::steady_clock::now();
1816
2.31M
  for (const auto& i : pl) {
1817
2.31M
    cat_result(result, pSMgr->suggest_gen(pl2, i, suggest_start));
1818
2.31M
  }
1819
1820
10.0k
  if (!result.empty()) {
1821
    // allcap
1822
1.82k
    if (captype == ALLCAP)
1823
10
      mkallcap(result);
1824
1825
    // line split
1826
1.82k
    slst = line_tok(result, MSEP_REC);
1827
1828
    // capitalize
1829
1.82k
    if (captype == INITCAP || captype == HUHINITCAP) {
1830
130k
      for (auto& str : slst) {
1831
130k
        mkinitcap(str);
1832
130k
      }
1833
147
    }
1834
1835
    // temporary filtering of prefix related errors (eg.
1836
    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1837
1.82k
    auto it = slst.begin();
1838
249k
    while (it != slst.end()) {
1839
247k
      if (std::chrono::steady_clock::now() - suggest_start > TIMELIMIT_GLOBAL_MS)
1840
91
        break;
1841
247k
      std::vector<std::string> candidate_stack;
1842
247k
      if (!spell(*it, candidate_stack, nullptr, nullptr, suggest_start)) {
1843
86.8k
        it = slst.erase(it);
1844
160k
      } else  {
1845
160k
        ++it;
1846
160k
      }
1847
247k
    }
1848
1.82k
  }
1849
10.0k
  return slst;
1850
27.8k
}
1851
1852
22.5k
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1853
22.5k
  std::vector<std::string> pl = analyze(pattern);
1854
22.5k
  std::vector<std::string> slst = generate(word, pl);
1855
22.5k
  uniqlist(slst);
1856
22.5k
  return slst;
1857
22.5k
}
1858
1859
// minimal XML parser functions
1860
472k
std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
1861
472k
  std::string dest;
1862
472k
  if (pos == std::string::npos)
1863
6.03k
    return dest;
1864
466k
  const char* par = in_par.c_str() + pos;
1865
466k
  char end = *par;
1866
466k
  if (end == '>')
1867
463k
    end = '<';
1868
3.06k
  else if (end != '\'' && end != '"')
1869
1.45k
    return dest;  // bad XML
1870
11.6M
  for (par++; *par != '\0' && *par != end; ++par) {
1871
11.2M
    dest.push_back(*par);
1872
11.2M
  }
1873
464k
  mystrrep(dest, "&lt;", "<");
1874
464k
  mystrrep(dest, "&amp;", "&");
1875
464k
  return dest;
1876
466k
}
1877
1878
0
int HunspellImpl::get_langnum() const {
1879
0
  return langnum;
1880
0
}
1881
1882
0
bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1883
0
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : nullptr;
1884
0
  if (rl) {
1885
0
    return rl->conv(word, dest);
1886
0
  }
1887
0
  dest.assign(word);
1888
0
  return false;
1889
0
}
1890
1891
// return the beginning of the element (attr == NULL) or the attribute
1892
74.3k
std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
1893
74.3k
  if (pos == std::string::npos)
1894
0
    return std::string::npos;
1895
1896
74.3k
  std::string::size_type endpos = s.find('>', pos);
1897
74.3k
  if (attr == nullptr)
1898
0
    return endpos;
1899
259k
  while (true) {
1900
259k
    pos = s.find(attr, pos);
1901
259k
    if (pos == std::string::npos || pos >= endpos)
1902
4.81k
      return std::string::npos;
1903
254k
    if (pos == 0 || s[pos - 1] == ' ' || s[pos - 1] == '\n')
1904
69.5k
      break;
1905
185k
    pos += strlen(attr);
1906
185k
  }
1907
69.5k
  return pos + strlen(attr);
1908
74.3k
}
1909
1910
int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
1911
                                const char* attr,
1912
74.3k
                                const char* value) {
1913
74.3k
  const std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
1914
74.3k
  return cw == value ? 1 : 0;
1915
74.3k
}
1916
1917
5.89k
std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
1918
5.89k
  std::vector<std::string> slst;
1919
5.89k
  if (pos == std::string::npos)
1920
20
    return slst;
1921
376k
  while (true) {
1922
376k
    pos = list.find(tag, pos);
1923
376k
    if (pos == std::string::npos)
1924
4.35k
        break;
1925
372k
    std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
1926
372k
    if (cw.empty()) {
1927
1.52k
      break;
1928
1.52k
    }
1929
370k
    slst.push_back(std::move(cw));
1930
370k
    ++pos;
1931
370k
  }
1932
5.87k
  return slst;
1933
5.89k
}
1934
1935
23.4k
std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1936
23.4k
  std::vector<std::string> slst;
1937
1938
23.4k
  std::string::size_type qpos = in_word.find("<query");
1939
23.4k
  if (qpos == std::string::npos)
1940
689
    return slst;  // bad XML input
1941
1942
22.7k
  std::string::size_type q2pos = in_word.find('>', qpos);
1943
22.7k
  if (q2pos == std::string::npos)
1944
151
    return slst;  // bad XML input
1945
1946
22.6k
  q2pos = in_word.find("<word", q2pos);
1947
22.6k
  if (q2pos == std::string::npos)
1948
315
    return slst;  // bad XML input
1949
1950
22.3k
  if (check_xml_par(in_word, qpos, "type=", "analyze")) {
1951
631
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1952
631
    if (!cw.empty())
1953
326
      slst = analyze(cw);
1954
631
    if (slst.empty())
1955
560
      return slst;
1956
    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1957
71
    std::string r;
1958
71
    r.append("<code>");
1959
229
    for (auto entry : slst) {
1960
229
      r.append("<a>");
1961
1962
229
      mystrrep(entry, "\t", " ");
1963
229
      mystrrep(entry, "&", "&amp;");
1964
229
      mystrrep(entry, "<", "&lt;");
1965
229
      r.append(entry);
1966
1967
229
      r.append("</a>");
1968
229
    }
1969
71
    r.append("</code>");
1970
71
    slst.clear();
1971
71
    slst.push_back(std::move(r));
1972
71
    return slst;
1973
21.6k
  } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
1974
1.48k
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1975
1.48k
    if (!cw.empty())
1976
1.10k
      return stem(cw);
1977
20.1k
  } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
1978
9.97k
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
1979
9.97k
    if (cw.empty())
1980
212
      return slst;
1981
9.76k
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
1982
9.76k
    if (q3pos != std::string::npos) {
1983
3.72k
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
1984
3.72k
      if (!cw2.empty()) {
1985
3.65k
        return generate(cw, cw2);
1986
3.65k
      }
1987
6.03k
    } else {
1988
6.03k
      q2pos = in_word.find("<code", q2pos + 1);
1989
6.03k
      if (q2pos != std::string::npos) {
1990
5.89k
        std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
1991
5.89k
        if (!slst2.empty()) {
1992
5.30k
          slst = generate(cw, slst2);
1993
5.30k
          uniqlist(slst);
1994
5.30k
          return slst;
1995
5.30k
        }
1996
5.89k
      }
1997
6.03k
    }
1998
10.2k
  } else if (check_xml_par(in_word, qpos, "type=", "add")) {
1999
7.19k
    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
2000
7.19k
    if (cw.empty())
2001
115
      return slst;
2002
7.08k
    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
2003
7.08k
    if (q3pos != std::string::npos) {
2004
2.38k
      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
2005
2.38k
      if (!cw2.empty()) {
2006
2.04k
        add_with_affix(cw, cw2);
2007
2.04k
      } else {
2008
341
        add(cw);
2009
341
      }
2010
4.69k
    } else {
2011
4.69k
        add(cw);
2012
4.69k
    }
2013
7.08k
  }
2014
11.2k
  return slst;
2015
22.3k
}
2016
2017
0
std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
2018
0
  std::vector<std::string> slst;
2019
0
  struct hentry* he = nullptr;
2020
0
  int len;
2021
0
  std::string w2;
2022
0
  const char* word;
2023
0
  const char* ignoredchars = pAMgr->get_ignore();
2024
0
  if (ignoredchars != nullptr) {
2025
0
    w2.assign(root_word);
2026
0
    if (utf8) {
2027
0
      const std::vector<w_char>& ignoredchars_utf16 =
2028
0
          pAMgr->get_ignore_utf16();
2029
0
      remove_ignored_chars_utf(w2, ignoredchars_utf16);
2030
0
    } else {
2031
0
      remove_ignored_chars(w2, ignoredchars);
2032
0
    }
2033
0
    word = w2.c_str();
2034
0
    len = (int)w2.size();
2035
0
  } else {
2036
0
    word = root_word.c_str();
2037
0
    len = (int)root_word.size();
2038
0
  }
2039
2040
0
  if (!len)
2041
0
    return slst;
2042
2043
0
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2044
0
    he = m_HMgrs[i]->lookup(word, len);
2045
0
  }
2046
0
  if (he) {
2047
0
    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word);
2048
0
  }
2049
0
  return slst;
2050
0
}
2051
2052
namespace {
2053
  // using malloc because this is for the c-api where the callers
2054
  // expect to be able to use free
2055
0
  char* stringdup(const std::string& s) {
2056
0
    size_t sl = s.size() + 1;
2057
0
    char* d = (char*)malloc(sl);
2058
0
    if (d)
2059
0
      memcpy(d, s.c_str(), sl);
2060
0
    return d;
2061
0
  }
2062
2063
0
  int munge_vector(char*** slst, const std::vector<std::string>& items) {
2064
0
    if (items.empty()) {
2065
0
      *slst = nullptr;
2066
0
      return 0;
2067
0
    } else {
2068
0
      *slst = new char*[items.size()];
2069
0
      for (size_t i = 0; i < items.size(); ++i)
2070
0
        (*slst)[i] = stringdup(items[i]);
2071
0
    }
2072
0
    return items.size();
2073
0
  }
2074
}
2075
2076
0
int HunspellImpl::spell(const char* word, int* info, char** root) {
2077
0
  std::string sroot;
2078
0
  std::vector<std::string> candidate_stack;
2079
0
  bool ret = spell(word, candidate_stack, info, root ? &sroot : nullptr);
2080
0
  if (root) {
2081
0
    if (sroot.empty()) {
2082
0
      *root = nullptr;
2083
0
    } else {
2084
0
      *root = stringdup(sroot);
2085
0
    }
2086
0
  }
2087
0
  return ret;
2088
0
}
2089
2090
0
int HunspellImpl::suggest(char*** slst, const char* word) {
2091
0
  std::vector<std::string> suggests = suggest(word);
2092
0
  return munge_vector(slst, suggests);
2093
0
}
2094
2095
0
int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
2096
0
  std::vector<std::string> stems = suffix_suggest(root_word);
2097
0
  return munge_vector(slst, stems);
2098
0
}
2099
2100
0
void HunspellImpl::free_list(char*** slst, int n) {
2101
0
  if (slst && *slst) {
2102
0
    for (int i = 0; i < n; i++)
2103
0
      free((*slst)[i]);
2104
0
    delete[] *slst;
2105
0
    *slst = nullptr;
2106
0
  }
2107
0
}
2108
2109
0
char* HunspellImpl::get_dic_encoding() {
2110
0
  return &encoding[0];
2111
0
}
2112
2113
0
int HunspellImpl::analyze(char*** slst, const char* word) {
2114
0
  std::vector<std::string> stems = analyze(word);
2115
0
  return munge_vector(slst, stems);
2116
0
}
2117
2118
0
int HunspellImpl::stem(char*** slst, const char* word) {
2119
0
  std::vector<std::string> stems = stem(word);
2120
0
  return munge_vector(slst, stems);
2121
0
}
2122
2123
0
int HunspellImpl::stem(char*** slst, char** desc, int n) {
2124
0
  std::vector<std::string> morph;
2125
0
  morph.reserve(n);
2126
0
  for (int i = 0; i < n; ++i) morph.emplace_back(desc[i]);
2127
2128
0
  std::vector<std::string> stems = stem(morph);
2129
0
  return munge_vector(slst, stems);
2130
0
}
2131
2132
0
int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
2133
0
  std::vector<std::string> stems = generate(word, pattern);
2134
0
  return munge_vector(slst, stems);
2135
0
}
2136
2137
0
int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
2138
0
  std::vector<std::string> morph;
2139
0
  morph.reserve(pln);
2140
0
  for (int i = 0; i < pln; ++i) morph.emplace_back(pl[i]);
2141
2142
0
  std::vector<std::string> stems = generate(word, morph);
2143
0
  return munge_vector(slst, stems);
2144
0
}
2145
2146
0
const char* HunspellImpl::get_wordchars() const {
2147
0
  return get_wordchars_cpp().c_str();
2148
0
}
2149
2150
0
const char* HunspellImpl::get_version() const {
2151
0
  return get_version_cpp().c_str();
2152
0
}
2153
2154
0
int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
2155
0
  std::string d;
2156
0
  bool ret = input_conv(word, d);
2157
0
  if (ret && d.size() < destsize) {
2158
0
    strncpy(dest, d.c_str(), destsize);
2159
0
    return 1;
2160
0
  }
2161
0
  return 0;
2162
0
}
2163
2164
Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
2165
59.1k
  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
2166
59.1k
}
2167
2168
59.0k
Hunspell::~Hunspell() {
2169
59.0k
  delete m_Impl;
2170
59.0k
}
2171
2172
// load extra dictionaries
2173
0
int Hunspell::add_dic(const char* dpath, const char* key) {
2174
0
  return m_Impl->add_dic(dpath, key);
2175
0
}
2176
2177
222k
bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
2178
222k
  std::vector<std::string> candidate_stack;
2179
222k
  return m_Impl->spell(word, candidate_stack, info, root,
2180
222k
                       std::chrono::steady_clock::now());
2181
222k
}
2182
2183
80.2k
std::vector<std::string> Hunspell::suggest(const std::string& word) {
2184
80.2k
  return m_Impl->suggest(word);
2185
80.2k
}
2186
2187
0
std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
2188
0
  return m_Impl->suffix_suggest(root_word);
2189
0
}
2190
2191
0
const std::string& Hunspell::get_dict_encoding() const {
2192
0
  return m_Impl->get_dict_encoding();
2193
0
}
2194
2195
0
std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
2196
0
  return m_Impl->stem(desc);
2197
0
}
2198
2199
18.8k
std::vector<std::string> Hunspell::stem(const std::string& word) {
2200
18.8k
  return m_Impl->stem(word);
2201
18.8k
}
2202
2203
0
const std::string& Hunspell::get_wordchars_cpp() const {
2204
0
  return m_Impl->get_wordchars_cpp();
2205
0
}
2206
2207
0
const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
2208
0
  return m_Impl->get_wordchars_utf16();
2209
0
}
2210
2211
33.0k
int Hunspell::add(const std::string& word) {
2212
33.0k
  return m_Impl->add(word);
2213
33.0k
}
2214
2215
0
int Hunspell::add_with_flags(const std::string& word, const std::string& flags, const std::string& desc) {
2216
0
  return m_Impl->add_with_flags(word, flags, desc);
2217
0
}
2218
2219
35.0k
int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
2220
35.0k
  return m_Impl->add_with_affix(word, example);
2221
35.0k
}
2222
2223
31.2k
int Hunspell::remove(const std::string& word) {
2224
31.2k
  return m_Impl->remove(word);
2225
31.2k
}
2226
2227
0
const std::string& Hunspell::get_version_cpp() const {
2228
0
  return m_Impl->get_version_cpp();
2229
0
}
2230
2231
0
struct cs_info* Hunspell::get_csconv() {
2232
0
  return m_Impl->get_csconv();
2233
0
}
2234
2235
18.8k
std::vector<std::string> Hunspell::analyze(const std::string& word) {
2236
18.8k
  return m_Impl->analyze(word);
2237
18.8k
}
2238
2239
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
2240
0
  return m_Impl->generate(word, pl);
2241
0
}
2242
2243
18.8k
std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
2244
18.8k
  return m_Impl->generate(word, pattern);
2245
18.8k
}
2246
2247
0
int Hunspell::get_langnum() const {
2248
0
  return m_Impl->get_langnum();
2249
0
}
2250
2251
0
bool Hunspell::input_conv(const std::string& word, std::string& dest) {
2252
0
  return m_Impl->input_conv(word, dest);
2253
0
}
2254
2255
0
int Hunspell::spell(const char* word, int* info, char** root) {
2256
0
  return m_Impl->spell(word, info, root);
2257
0
}
2258
2259
0
int Hunspell::suggest(char*** slst, const char* word) {
2260
0
  return m_Impl->suggest(slst, word);
2261
0
}
2262
2263
0
int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
2264
0
  return m_Impl->suffix_suggest(slst, root_word);
2265
0
}
2266
2267
0
void Hunspell::free_list(char*** slst, int n) {
2268
0
  m_Impl->free_list(slst, n);
2269
0
}
2270
2271
0
char* Hunspell::get_dic_encoding() {
2272
0
  return m_Impl->get_dic_encoding();
2273
0
}
2274
2275
0
int Hunspell::analyze(char*** slst, const char* word) {
2276
0
  return m_Impl->analyze(slst, word);
2277
0
}
2278
2279
0
int Hunspell::stem(char*** slst, const char* word) {
2280
0
  return m_Impl->stem(slst, word);
2281
0
}
2282
2283
0
int Hunspell::stem(char*** slst, char** desc, int n) {
2284
0
  return m_Impl->stem(slst, desc, n);
2285
0
}
2286
2287
0
int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
2288
0
  return m_Impl->generate(slst, word, pattern);
2289
0
}
2290
2291
0
int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
2292
0
  return m_Impl->generate(slst, word, pl, pln);
2293
0
}
2294
2295
0
const char* Hunspell::get_wordchars() const {
2296
0
  return m_Impl->get_wordchars();
2297
0
}
2298
2299
0
const char* Hunspell::get_version() const {
2300
0
  return m_Impl->get_version();
2301
0
}
2302
2303
0
int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
2304
0
  return m_Impl->input_conv(word, dest, destsize);
2305
0
}
2306
2307
0
Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
2308
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
2309
0
}
2310
2311
Hunhandle* Hunspell_create_key(const char* affpath,
2312
                               const char* dpath,
2313
0
                               const char* key) {
2314
0
  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
2315
0
}
2316
2317
0
void Hunspell_destroy(Hunhandle* pHunspell) {
2318
0
  delete reinterpret_cast<HunspellImpl*>(pHunspell);
2319
0
}
2320
2321
0
int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
2322
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
2323
0
}
2324
2325
0
int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
2326
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
2327
0
}
2328
2329
0
char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
2330
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
2331
0
}
2332
2333
0
int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
2334
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
2335
0
}
2336
2337
0
int Hunspell_suffix_suggest(Hunhandle* pHunspell, char*** slst, const char* root_word) {
2338
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->suffix_suggest(slst, root_word);
2339
0
}
2340
2341
0
int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
2342
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
2343
0
}
2344
2345
0
int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
2346
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
2347
0
}
2348
2349
0
int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
2350
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
2351
0
}
2352
2353
int Hunspell_generate(Hunhandle* pHunspell,
2354
                      char*** slst,
2355
                      const char* word,
2356
                      const char* pattern)
2357
0
{
2358
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
2359
0
}
2360
2361
int Hunspell_generate2(Hunhandle* pHunspell,
2362
                       char*** slst,
2363
                       const char* word,
2364
                       char** desc,
2365
                       int n)
2366
0
{
2367
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
2368
0
}
2369
2370
/* functions for run-time modification of the dictionary */
2371
2372
/* add word to the run-time dictionary */
2373
2374
0
int Hunspell_add(Hunhandle* pHunspell, const char* word) {
2375
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
2376
0
}
2377
2378
0
int Hunspell_add_with_flags(Hunhandle* pHunspell, const char* word, const char* flags, const char* desc) {
2379
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_flags(word, flags, desc);
2380
0
}
2381
2382
/* add word to the run-time dictionary with affix flags of
2383
 * the example (a dictionary word): Hunspell will recognize
2384
 * affixed forms of the new word, too.
2385
 */
2386
2387
int Hunspell_add_with_affix(Hunhandle* pHunspell,
2388
                            const char* word,
2389
0
                            const char* example) {
2390
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
2391
0
}
2392
2393
/* remove word from the run-time dictionary */
2394
2395
0
int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
2396
0
  return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
2397
0
}
2398
2399
0
void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
2400
0
  reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
2401
0
}