Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/extensions/spellcheck/hunspell/src/hunspell.cxx
Line
Count
Source (jump to first uncovered line)
1
/* ***** BEGIN LICENSE BLOCK *****
2
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3
 *
4
 * Copyright (C) 2002-2017 Németh László
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version
7
 * 1.1 (the "License"); you may not use this file except in compliance with
8
 * the License. You may obtain a copy of the License at
9
 * http://www.mozilla.org/MPL/
10
 *
11
 * Software distributed under the License is distributed on an "AS IS" basis,
12
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
 * for the specific language governing rights and limitations under the
14
 * License.
15
 *
16
 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17
 *
18
 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19
 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20
 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21
 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22
 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23
 *
24
 * Alternatively, the contents of this file may be used under the terms of
25
 * either the GNU General Public License Version 2 or later (the "GPL"), or
26
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
 * in which case the provisions of the GPL or the LGPL are applicable instead
28
 * of those above. If you wish to allow use of your version of this file only
29
 * under the terms of either the GPL or the LGPL, and not to allow others to
30
 * use your version of this file under the terms of the MPL, indicate your
31
 * decision by deleting the provisions above and replace them with the notice
32
 * and other provisions required by the GPL or the LGPL. If you do not delete
33
 * the provisions above, a recipient may use your version of this file under
34
 * the terms of any one of the MPL, the GPL or the LGPL.
35
 *
36
 * ***** END LICENSE BLOCK ***** */
37
/*
38
 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39
 * And Contributors.  All rights reserved.
40
 *
41
 * Redistribution and use in source and binary forms, with or without
42
 * modification, are permitted provided that the following conditions
43
 * are met:
44
 *
45
 * 1. Redistributions of source code must retain the above copyright
46
 *    notice, this list of conditions and the following disclaimer.
47
 *
48
 * 2. Redistributions in binary form must reproduce the above copyright
49
 *    notice, this list of conditions and the following disclaimer in the
50
 *    documentation and/or other materials provided with the distribution.
51
 *
52
 * 3. All modifications to the source code must be clearly marked as
53
 *    such.  Binary redistributions based on modified source code
54
 *    must be clearly marked as modified versions in the documentation
55
 *    and/or other materials provided with the distribution.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68
 * SUCH DAMAGE.
69
 */
70
71
#include <stdlib.h>
72
#include <string.h>
73
#include <stdio.h>
74
75
#include "affixmgr.hxx"
76
#include "hunspell.hxx"
77
#include "suggestmgr.hxx"
78
#include "hunspell.h"
79
#include "csutil.hxx"
80
81
#include <limits>
82
#include <string>
83
84
0
#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
85
86
class HunspellImpl
87
{
88
public:
89
  HunspellImpl(const char* affpath, const char* dpath, const char* key);
90
  ~HunspellImpl();
91
  int add_dic(const char* dpath, const char* key);
92
  std::vector<std::string> suffix_suggest(const std::string& root_word);
93
  std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
94
  std::vector<std::string> generate(const std::string& word, const std::string& pattern);
95
  std::vector<std::string> stem(const std::string& word);
96
  std::vector<std::string> stem(const std::vector<std::string>& morph);
97
  std::vector<std::string> analyze(const std::string& word);
98
  int get_langnum() const;
99
  bool input_conv(const std::string& word, std::string& dest);
100
  bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
101
  std::vector<std::string> suggest(const std::string& word);
102
  const std::string& get_wordchars() const;
103
  const std::vector<w_char>& get_wordchars_utf16() const;
104
  const std::string& get_dict_encoding() const;
105
  int add(const std::string& word);
106
  int add_with_affix(const std::string& word, const std::string& example);
107
  int remove(const std::string& word);
108
  const std::string& get_version() const;
109
  struct cs_info* get_csconv();
110
  std::vector<char> dic_encoding_vec;
111
112
private:
113
  AffixMgr* pAMgr;
114
  std::vector<HashMgr*> m_HMgrs;
115
  SuggestMgr* pSMgr;
116
  char* affixpath;
117
  std::string encoding;
118
  struct cs_info* csconv;
119
  int langnum;
120
  int utf8;
121
  int complexprefixes;
122
  std::vector<std::string> wordbreak;
123
124
private:
125
  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
126
  size_t cleanword2(std::string& dest,
127
                    std::vector<w_char>& dest_u,
128
                    const std::string& src,
129
                    int* pcaptype,
130
                    size_t* pabbrev);
131
  void mkinitcap(std::string& u8);
132
  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
133
  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
134
  void mkallcap(std::string& u8);
135
  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
136
  struct hentry* checkword(const std::string& source, int* info, std::string* root);
137
  std::string sharps_u8_l1(const std::string& source);
138
  hentry*
139
  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
140
  int is_keepcase(const hentry* rv);
141
  void insert_sug(std::vector<std::string>& slst, const std::string& word);
142
  void cat_result(std::string& result, const std::string& st);
143
  std::vector<std::string> spellml(const std::string& word);
144
  std::string get_xml_par(const char* par);
145
  const char* get_xml_pos(const char* s, const char* attr);
146
  std::vector<std::string> get_xml_list(const char* list, const char* tag);
147
  int check_xml_par(const char* q, const char* attr, const char* value);
148
private:
149
  HunspellImpl(const HunspellImpl&);
150
  HunspellImpl& operator=(const HunspellImpl&);
151
};
152
153
Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
154
0
  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
155
0
}
156
157
0
HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
158
0
  csconv = NULL;
159
0
  utf8 = 0;
160
0
  complexprefixes = 0;
161
0
  affixpath = mystrdup(affpath);
162
0
163
0
  /* first set up the hash manager */
164
0
  m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
165
0
166
0
  /* next set up the affix manager */
167
0
  /* it needs access to the hash manager lookup methods */
168
0
  pAMgr = new AffixMgr(affpath, m_HMgrs, key);
169
0
170
0
  /* get the preferred try string and the dictionary */
171
0
  /* encoding from the Affix Manager for that dictionary */
172
0
  char* try_string = pAMgr->get_try_string();
173
0
  encoding = pAMgr->get_encoding();
174
0
  langnum = pAMgr->get_langnum();
175
0
  utf8 = pAMgr->get_utf8();
176
0
  if (!utf8)
177
0
    csconv = get_current_cs(encoding);
178
0
  complexprefixes = pAMgr->get_complexprefixes();
179
0
  wordbreak = pAMgr->get_breaktable();
180
0
181
0
  dic_encoding_vec.resize(encoding.size()+1);
182
0
  strcpy(&dic_encoding_vec[0], encoding.c_str());
183
0
184
0
  /* and finally set up the suggestion manager */
185
0
  pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
186
0
  if (try_string)
187
0
    free(try_string);
188
0
}
189
190
0
Hunspell::~Hunspell() {
191
0
  delete m_Impl;
192
0
}
193
194
0
HunspellImpl::~HunspellImpl() {
195
0
  delete pSMgr;
196
0
  delete pAMgr;
197
0
  for (size_t i = 0; i < m_HMgrs.size(); ++i)
198
0
    delete m_HMgrs[i];
199
0
  pSMgr = NULL;
200
0
  pAMgr = NULL;
201
0
#ifdef MOZILLA_CLIENT
202
0
  delete[] csconv;
203
0
#endif
204
0
  csconv = NULL;
205
0
  if (affixpath)
206
0
    free(affixpath);
207
0
  affixpath = NULL;
208
0
}
209
210
// load extra dictionaries
211
0
int Hunspell::add_dic(const char* dpath, const char* key) {
212
0
  return m_Impl->add_dic(dpath, key);
213
0
}
214
215
// load extra dictionaries
216
0
int HunspellImpl::add_dic(const char* dpath, const char* key) {
217
0
  if (!affixpath)
218
0
    return 1;
219
0
  m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
220
0
  return 0;
221
0
}
222
223
// make a copy of src at destination while removing all leading
224
// blanks and removing any trailing periods after recording
225
// their presence with the abbreviation flag
226
// also since already going through character by character,
227
// set the capitalization type
228
// return the length of the "cleaned" (and UTF-8 encoded) word
229
230
size_t HunspellImpl::cleanword2(std::string& dest,
231
                         std::vector<w_char>& dest_utf,
232
                         const std::string& src,
233
                         int* pcaptype,
234
0
                         size_t* pabbrev) {
235
0
  dest.clear();
236
0
  dest_utf.clear();
237
0
238
0
  const char* q = src.c_str();
239
0
240
0
  // first skip over any leading blanks
241
0
  while (*q == ' ')
242
0
    ++q;
243
0
244
0
  // now strip off any trailing periods (recording their presence)
245
0
  *pabbrev = 0;
246
0
  int nl = strlen(q);
247
0
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
248
0
    nl--;
249
0
    (*pabbrev)++;
250
0
  }
251
0
252
0
  // if no characters are left it can't be capitalized
253
0
  if (nl <= 0) {
254
0
    *pcaptype = NOCAP;
255
0
    return 0;
256
0
  }
257
0
258
0
  dest.append(q, nl);
259
0
  nl = dest.size();
260
0
  if (utf8) {
261
0
    u8_u16(dest_utf, dest);
262
0
    *pcaptype = get_captype_utf8(dest_utf, langnum);
263
0
  } else {
264
0
    *pcaptype = get_captype(dest, csconv);
265
0
  }
266
0
  return nl;
267
0
}
268
269
void HunspellImpl::cleanword(std::string& dest,
270
                        const std::string& src,
271
                        int* pcaptype,
272
0
                        int* pabbrev) {
273
0
  dest.clear();
274
0
  const unsigned char* q = (const unsigned char*)src.c_str();
275
0
  int firstcap = 0;
276
0
277
0
  // first skip over any leading blanks
278
0
  while (*q == ' ')
279
0
    ++q;
280
0
281
0
  // now strip off any trailing periods (recording their presence)
282
0
  *pabbrev = 0;
283
0
  int nl = strlen((const char*)q);
284
0
  while ((nl > 0) && (*(q + nl - 1) == '.')) {
285
0
    nl--;
286
0
    (*pabbrev)++;
287
0
  }
288
0
289
0
  // if no characters are left it can't be capitalized
290
0
  if (nl <= 0) {
291
0
    *pcaptype = NOCAP;
292
0
    return;
293
0
  }
294
0
295
0
  // now determine the capitalization type of the first nl letters
296
0
  int ncap = 0;
297
0
  int nneutral = 0;
298
0
  int nc = 0;
299
0
300
0
  if (!utf8) {
301
0
    while (nl > 0) {
302
0
      nc++;
303
0
      if (csconv[(*q)].ccase)
304
0
        ncap++;
305
0
      if (csconv[(*q)].cupper == csconv[(*q)].clower)
306
0
        nneutral++;
307
0
      dest.push_back(*q++);
308
0
      nl--;
309
0
    }
310
0
    // remember to terminate the destination string
311
0
    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
312
0
  } else {
313
0
    std::vector<w_char> t;
314
0
    u8_u16(t, src);
315
0
    for (size_t i = 0; i < t.size(); ++i) {
316
0
      unsigned short idx = (t[i].h << 8) + t[i].l;
317
0
      unsigned short low = unicodetolower(idx, langnum);
318
0
      if (idx != low)
319
0
        ncap++;
320
0
      if (unicodetoupper(idx, langnum) == low)
321
0
        nneutral++;
322
0
    }
323
0
    u16_u8(dest, t);
324
0
    if (ncap) {
325
0
      unsigned short idx = (t[0].h << 8) + t[0].l;
326
0
      firstcap = (idx != unicodetolower(idx, langnum));
327
0
    }
328
0
  }
329
0
330
0
  // now finally set the captype
331
0
  if (ncap == 0) {
332
0
    *pcaptype = NOCAP;
333
0
  } else if ((ncap == 1) && firstcap) {
334
0
    *pcaptype = INITCAP;
335
0
  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
336
0
    *pcaptype = ALLCAP;
337
0
  } else if ((ncap > 1) && firstcap) {
338
0
    *pcaptype = HUHINITCAP;
339
0
  } else {
340
0
    *pcaptype = HUHCAP;
341
0
  }
342
0
}
343
344
0
void HunspellImpl::mkallcap(std::string& u8) {
345
0
  if (utf8) {
346
0
    std::vector<w_char> u16;
347
0
    u8_u16(u16, u8);
348
0
    ::mkallcap_utf(u16, langnum);
349
0
    u16_u8(u8, u16);
350
0
  } else {
351
0
    ::mkallcap(u8, csconv);
352
0
  }
353
0
}
354
355
0
int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
356
0
  if (utf8) {
357
0
    ::mkallsmall_utf(u16, langnum);
358
0
    u16_u8(u8, u16);
359
0
  } else {
360
0
    ::mkallsmall(u8, csconv);
361
0
  }
362
0
  return u8.size();
363
0
}
364
365
// convert UTF-8 sharp S codes to latin 1
366
0
std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
367
0
  std::string dest(source);
368
0
  mystrrep(dest, "\xC3\x9F", "\xDF");
369
0
  return dest;
370
0
}
371
372
// recursive search for right ss - sharp s permutations
373
hentry* HunspellImpl::spellsharps(std::string& base,
374
                              size_t n_pos,
375
                              int n,
376
                              int repnum,
377
                              int* info,
378
0
                              std::string* root) {
379
0
  size_t pos = base.find("ss", n_pos);
380
0
  if (pos != std::string::npos && (n < MAXSHARPS)) {
381
0
    base[pos] = '\xC3';
382
0
    base[pos + 1] = '\x9F';
383
0
    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
384
0
    if (h)
385
0
      return h;
386
0
    base[pos] = 's';
387
0
    base[pos + 1] = 's';
388
0
    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
389
0
    if (h)
390
0
      return h;
391
0
  } else if (repnum > 0) {
392
0
    if (utf8)
393
0
      return checkword(base, info, root);
394
0
    std::string tmp(sharps_u8_l1(base));
395
0
    return checkword(tmp, info, root);
396
0
  }
397
0
  return NULL;
398
0
}
399
400
0
int HunspellImpl::is_keepcase(const hentry* rv) {
401
0
  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
402
0
         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
403
0
}
404
405
/* insert a word to the beginning of the suggestion array */
406
0
void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
407
0
  slst.insert(slst.begin(), word);
408
0
}
409
410
0
bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
411
0
  return m_Impl->spell(word, info, root);
412
0
}
413
414
0
bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
415
0
  struct hentry* rv = NULL;
416
0
417
0
  int info2 = 0;
418
0
  if (!info)
419
0
    info = &info2;
420
0
  else
421
0
    *info = 0;
422
0
423
0
  // Hunspell supports XML input of the simplified API (see manual)
424
0
  if (word == SPELL_XML)
425
0
    return true;
426
0
  if (utf8) {
427
0
    if (word.size() >= MAXWORDUTF8LEN)
428
0
      return false;
429
0
  } else {
430
0
    if (word.size() >= MAXWORDLEN)
431
0
      return false;
432
0
  }
433
0
  int captype = NOCAP;
434
0
  size_t abbv = 0;
435
0
  size_t wl = 0;
436
0
437
0
  std::string scw;
438
0
  std::vector<w_char> sunicw;
439
0
440
0
  // input conversion
441
0
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
442
0
  {
443
0
    std::string wspace;
444
0
445
0
    bool convstatus = rl ? rl->conv(word, wspace) : false;
446
0
    if (convstatus)
447
0
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
448
0
    else
449
0
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
450
0
  }
451
0
452
0
#ifdef MOZILLA_CLIENT
453
0
  // accept the abbreviated words without dots
454
0
  // workaround for the incomplete tokenization of Mozilla
455
0
  abbv = 1;
456
0
#endif
457
0
458
0
  if (wl == 0 || m_HMgrs.empty())
459
0
    return true;
460
0
  if (root)
461
0
    root->clear();
462
0
463
0
  // allow numbers with dots, dashes and commas (but forbid double separators:
464
0
  // "..", "--" etc.)
465
0
  enum { NBEGIN, NNUM, NSEP };
466
0
  int nstate = NBEGIN;
467
0
  size_t i;
468
0
469
0
  for (i = 0; (i < wl); i++) {
470
0
    if ((scw[i] <= '9') && (scw[i] >= '0')) {
471
0
      nstate = NNUM;
472
0
    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
473
0
      if ((nstate == NSEP) || (i == 0))
474
0
        break;
475
0
      nstate = NSEP;
476
0
    } else
477
0
      break;
478
0
  }
479
0
  if ((i == wl) && (nstate == NNUM))
480
0
    return true;
481
0
482
0
  switch (captype) {
483
0
    case HUHCAP:
484
0
    /* FALLTHROUGH */
485
0
    case HUHINITCAP:
486
0
      *info += SPELL_ORIGCAP;
487
0
    /* FALLTHROUGH */
488
0
    case NOCAP:
489
0
      rv = checkword(scw, info, root);
490
0
      if ((abbv) && !(rv)) {
491
0
        std::string u8buffer(scw);
492
0
        u8buffer.push_back('.');
493
0
        rv = checkword(u8buffer, info, root);
494
0
      }
495
0
      break;
496
0
    case ALLCAP: {
497
0
      *info += SPELL_ORIGCAP;
498
0
      rv = checkword(scw, info, root);
499
0
      if (rv)
500
0
        break;
501
0
      if (abbv) {
502
0
        std::string u8buffer(scw);
503
0
        u8buffer.push_back('.');
504
0
        rv = checkword(u8buffer, info, root);
505
0
        if (rv)
506
0
          break;
507
0
      }
508
0
      // Spec. prefix handling for Catalan, French, Italian:
509
0
      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
510
0
      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
511
0
      if (apos != std::string::npos) {
512
0
        mkallsmall2(scw, sunicw);
513
0
        //conversion may result in string with different len to pre-mkallsmall2
514
0
        //so re-scan
515
0
        if (apos != std::string::npos && apos < scw.size() - 1) {
516
0
          std::string part1 = scw.substr(0, apos+1);
517
0
          std::string part2 = scw.substr(apos+1);
518
0
          if (utf8) {
519
0
            std::vector<w_char> part1u, part2u;
520
0
            u8_u16(part1u, part1);
521
0
            u8_u16(part2u, part2);
522
0
            mkinitcap2(part2, part2u);
523
0
            scw = part1 + part2;
524
0
            sunicw = part1u;
525
0
            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
526
0
            rv = checkword(scw, info, root);
527
0
            if (rv)
528
0
              break;
529
0
          } else {
530
0
            mkinitcap2(part2, sunicw);
531
0
            scw = part1 + part2;
532
0
            rv = checkword(scw, info, root);
533
0
            if (rv)
534
0
              break;
535
0
          }
536
0
          mkinitcap2(scw, sunicw);
537
0
          rv = checkword(scw, info, root);
538
0
          if (rv)
539
0
            break;
540
0
        }
541
0
      }
542
0
      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
543
0
544
0
        mkallsmall2(scw, sunicw);
545
0
        std::string u8buffer(scw);
546
0
        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
547
0
        if (!rv) {
548
0
          mkinitcap2(scw, sunicw);
549
0
          rv = spellsharps(scw, 0, 0, 0, info, root);
550
0
        }
551
0
        if ((abbv) && !(rv)) {
552
0
          u8buffer.push_back('.');
553
0
          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
554
0
          if (!rv) {
555
0
            u8buffer = std::string(scw);
556
0
            u8buffer.push_back('.');
557
0
            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
558
0
          }
559
0
        }
560
0
        if (rv)
561
0
          break;
562
0
      }
563
0
    }
564
0
    case INITCAP: {
565
0
566
0
      *info += SPELL_ORIGCAP;
567
0
      mkallsmall2(scw, sunicw);
568
0
      std::string u8buffer(scw);
569
0
      mkinitcap2(scw, sunicw);
570
0
      if (captype == INITCAP)
571
0
        *info += SPELL_INITCAP;
572
0
      rv = checkword(scw, info, root);
573
0
      if (captype == INITCAP)
574
0
        *info -= SPELL_INITCAP;
575
0
      // forbid bad capitalization
576
0
      // (for example, ijs -> Ijs instead of IJs in Dutch)
577
0
      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
578
0
      if (*info & SPELL_FORBIDDEN) {
579
0
        rv = NULL;
580
0
        break;
581
0
      }
582
0
      if (rv && is_keepcase(rv) && (captype == ALLCAP))
583
0
        rv = NULL;
584
0
      if (rv)
585
0
        break;
586
0
587
0
      rv = checkword(u8buffer, info, root);
588
0
      if (abbv && !rv) {
589
0
        u8buffer.push_back('.');
590
0
        rv = checkword(u8buffer, info, root);
591
0
        if (!rv) {
592
0
          u8buffer = scw;
593
0
          u8buffer.push_back('.');
594
0
          if (captype == INITCAP)
595
0
            *info += SPELL_INITCAP;
596
0
          rv = checkword(u8buffer, info, root);
597
0
          if (captype == INITCAP)
598
0
            *info -= SPELL_INITCAP;
599
0
          if (rv && is_keepcase(rv) && (captype == ALLCAP))
600
0
            rv = NULL;
601
0
          break;
602
0
        }
603
0
      }
604
0
      if (rv && is_keepcase(rv) &&
605
0
          ((captype == ALLCAP) ||
606
0
           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
607
0
           // in INITCAP form, too.
608
0
           !(pAMgr->get_checksharps() &&
609
0
             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
610
0
              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
611
0
        rv = NULL;
612
0
      break;
613
0
    }
614
0
  }
615
0
616
0
  if (rv) {
617
0
    if (pAMgr && pAMgr->get_warn() && rv->astr &&
618
0
        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
619
0
      *info += SPELL_WARN;
620
0
      if (pAMgr->get_forbidwarn())
621
0
        return false;
622
0
      return true;
623
0
    }
624
0
    return true;
625
0
  }
626
0
627
0
  // recursive breaking at break points
628
0
  if (!wordbreak.empty()) {
629
0
630
0
    int nbr = 0;
631
0
    wl = scw.size();
632
0
633
0
    // calculate break points for recursion limit
634
0
    for (size_t j = 0; j < wordbreak.size(); ++j) {
635
0
      size_t pos = 0;
636
0
      while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
637
0
        ++nbr;
638
0
        pos += wordbreak[j].size();
639
0
      }
640
0
    }
641
0
    if (nbr >= 10)
642
0
      return false;
643
0
644
0
    // check boundary patterns (^begin and end$)
645
0
    for (size_t j = 0; j < wordbreak.size(); ++j) {
646
0
      size_t plen = wordbreak[j].size();
647
0
      if (plen == 1 || plen > wl)
648
0
        continue;
649
0
650
0
      if (wordbreak[j][0] == '^' &&
651
0
          scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
652
0
        return true;
653
0
654
0
      if (wordbreak[j][plen - 1] == '$' &&
655
0
          scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
656
0
        std::string suffix(scw.substr(wl - plen + 1));
657
0
        scw.resize(wl - plen + 1);
658
0
        if (spell(scw))
659
0
          return true;
660
0
        scw.append(suffix);
661
0
      }
662
0
    }
663
0
664
0
    // other patterns
665
0
    for (size_t j = 0; j < wordbreak.size(); ++j) {
666
0
      size_t plen = wordbreak[j].size();
667
0
      size_t found = scw.find(wordbreak[j]);
668
0
      if ((found > 0) && (found < wl - plen)) {
669
0
        if (!spell(scw.substr(found + plen)))
670
0
          continue;
671
0
        std::string suffix(scw.substr(found));
672
0
        scw.resize(found);
673
0
        // examine 2 sides of the break point
674
0
        if (spell(scw))
675
0
          return true;
676
0
        scw.append(suffix);
677
0
678
0
        // LANG_hu: spec. dash rule
679
0
        if (langnum == LANG_hu && wordbreak[j] == "-") {
680
0
          suffix = scw.substr(found + 1);
681
0
          scw.resize(found + 1);
682
0
          if (spell(scw))
683
0
            return true;  // check the first part with dash
684
0
          scw.append(suffix);
685
0
        }
686
0
        // end of LANG specific region
687
0
      }
688
0
    }
689
0
  }
690
0
691
0
  return false;
692
0
}
693
694
0
struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
695
0
  bool usebuffer = false;
696
0
  std::string w2;
697
0
  const char* word;
698
0
  int len;
699
0
700
0
  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
701
0
  if (ignoredchars != NULL) {
702
0
    w2.assign(w);
703
0
    if (utf8) {
704
0
      const std::vector<w_char>& ignoredchars_utf16 =
705
0
          pAMgr->get_ignore_utf16();
706
0
      remove_ignored_chars_utf(w2, ignoredchars_utf16);
707
0
    } else {
708
0
      remove_ignored_chars(w2, ignoredchars);
709
0
    }
710
0
    word = w2.c_str();
711
0
    len = w2.size();
712
0
    usebuffer = true;
713
0
  } else {
714
0
    word = w.c_str();
715
0
    len = w.size();
716
0
  }
717
0
718
0
  if (!len)
719
0
    return NULL;
720
0
721
0
  // word reversing wrapper for complex prefixes
722
0
  if (complexprefixes) {
723
0
    if (!usebuffer) {
724
0
      w2.assign(word);
725
0
      usebuffer = true;
726
0
    }
727
0
    if (utf8)
728
0
      reverseword_utf(w2);
729
0
    else
730
0
      reverseword(w2);
731
0
  }
732
0
733
0
  if (usebuffer) {
734
0
    word = w2.c_str();
735
0
  }
736
0
737
0
  // look word in hash table
738
0
  struct hentry* he = NULL;
739
0
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
740
0
    he = m_HMgrs[i]->lookup(word);
741
0
742
0
    // check forbidden and onlyincompound words
743
0
    if ((he) && (he->astr) && (pAMgr) &&
744
0
        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
745
0
      if (info)
746
0
        *info += SPELL_FORBIDDEN;
747
0
      // LANG_hu section: set dash information for suggestions
748
0
      if (langnum == LANG_hu) {
749
0
        if (pAMgr->get_compoundflag() &&
750
0
            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
751
0
          if (info)
752
0
            *info += SPELL_COMPOUND;
753
0
        }
754
0
      }
755
0
      return NULL;
756
0
    }
757
0
758
0
    // he = next not needaffix, onlyincompound homonym or onlyupcase word
759
0
    while (he && (he->astr) && pAMgr &&
760
0
           ((pAMgr->get_needaffix() &&
761
0
             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
762
0
            (pAMgr->get_onlyincompound() &&
763
0
             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
764
0
            (info && (*info & SPELL_INITCAP) &&
765
0
             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
766
0
      he = he->next_homonym;
767
0
  }
768
0
769
0
  // check with affixes
770
0
  if (!he && pAMgr) {
771
0
    // try stripping off affixes */
772
0
    he = pAMgr->affix_check(word, len, 0);
773
0
774
0
    // check compound restriction and onlyupcase
775
0
    if (he && he->astr &&
776
0
        ((pAMgr->get_onlyincompound() &&
777
0
          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
778
0
         (info && (*info & SPELL_INITCAP) &&
779
0
          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
780
0
      he = NULL;
781
0
    }
782
0
783
0
    if (he) {
784
0
      if ((he->astr) && (pAMgr) &&
785
0
          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
786
0
        if (info)
787
0
          *info += SPELL_FORBIDDEN;
788
0
        return NULL;
789
0
      }
790
0
      if (root) {
791
0
        root->assign(he->word);
792
0
        if (complexprefixes) {
793
0
          if (utf8)
794
0
            reverseword_utf(*root);
795
0
          else
796
0
            reverseword(*root);
797
0
        }
798
0
      }
799
0
      // try check compound word
800
0
    } else if (pAMgr->get_compound()) {
801
0
      struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
802
0
      he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
803
0
      // LANG_hu section: `moving rule' with last dash
804
0
      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
805
0
        std::string dup(word, len - 1);
806
0
        he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
807
0
      }
808
0
      // end of LANG specific region
809
0
      if (he) {
810
0
        if (root) {
811
0
          root->assign(he->word);
812
0
          if (complexprefixes) {
813
0
            if (utf8)
814
0
              reverseword_utf(*root);
815
0
            else
816
0
              reverseword(*root);
817
0
          }
818
0
        }
819
0
        if (info)
820
0
          *info += SPELL_COMPOUND;
821
0
      }
822
0
    }
823
0
  }
824
0
825
0
  return he;
826
0
}
827
828
0
std::vector<std::string> Hunspell::suggest(const std::string& word) {
829
0
  return m_Impl->suggest(word);
830
0
}
831
832
0
std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
833
0
  std::vector<std::string> slst;
834
0
835
0
  int onlycmpdsug = 0;
836
0
  if (!pSMgr || m_HMgrs.empty())
837
0
    return slst;
838
0
839
0
  // process XML input of the simplified API (see manual)
840
0
  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
841
0
    return spellml(word);
842
0
  }
843
0
  if (utf8) {
844
0
    if (word.size() >= MAXWORDUTF8LEN)
845
0
      return slst;
846
0
  } else {
847
0
    if (word.size() >= MAXWORDLEN)
848
0
      return slst;
849
0
  }
850
0
  int captype = NOCAP;
851
0
  size_t abbv = 0;
852
0
  size_t wl = 0;
853
0
854
0
  std::string scw;
855
0
  std::vector<w_char> sunicw;
856
0
857
0
  // input conversion
858
0
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
859
0
  {
860
0
    std::string wspace;
861
0
862
0
    bool convstatus = rl ? rl->conv(word, wspace) : false;
863
0
    if (convstatus)
864
0
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
865
0
    else
866
0
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
867
0
868
0
    if (wl == 0)
869
0
      return slst;
870
0
  }
871
0
872
0
  int capwords = 0;
873
0
874
0
  // check capitalized form for FORCEUCASE
875
0
  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
876
0
    int info = SPELL_ORIGCAP;
877
0
    if (checkword(scw, &info, NULL)) {
878
0
      std::string form(scw);
879
0
      mkinitcap(form);
880
0
      slst.push_back(form);
881
0
      return slst;
882
0
    }
883
0
  }
884
0
885
0
  switch (captype) {
886
0
    case NOCAP: {
887
0
      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
888
0
      break;
889
0
    }
890
0
891
0
    case INITCAP: {
892
0
      capwords = 1;
893
0
      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
894
0
      std::string wspace(scw);
895
0
      mkallsmall2(wspace, sunicw);
896
0
      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
897
0
      break;
898
0
    }
899
0
    case HUHINITCAP:
900
0
      capwords = 1;
901
0
    case HUHCAP: {
902
0
      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
903
0
      // something.The -> something. The
904
0
      size_t dot_pos = scw.find('.');
905
0
      if (dot_pos != std::string::npos) {
906
0
        std::string postdot = scw.substr(dot_pos + 1);
907
0
        int captype_;
908
0
        if (utf8) {
909
0
          std::vector<w_char> postdotu;
910
0
          u8_u16(postdotu, postdot);
911
0
          captype_ = get_captype_utf8(postdotu, langnum);
912
0
        } else {
913
0
          captype_ = get_captype(postdot, csconv);
914
0
        }
915
0
        if (captype_ == INITCAP) {
916
0
          std::string str(scw);
917
0
          str.insert(dot_pos + 1, 1, ' ');
918
0
          insert_sug(slst, str);
919
0
        }
920
0
      }
921
0
922
0
      std::string wspace;
923
0
924
0
      if (captype == HUHINITCAP) {
925
0
        // TheOpenOffice.org -> The OpenOffice.org
926
0
        wspace = scw;
927
0
        mkinitsmall2(wspace, sunicw);
928
0
        pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
929
0
      }
930
0
      wspace = scw;
931
0
      mkallsmall2(wspace, sunicw);
932
0
      if (spell(wspace.c_str()))
933
0
        insert_sug(slst, wspace);
934
0
      size_t prevns = slst.size();
935
0
      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
936
0
      if (captype == HUHINITCAP) {
937
0
        mkinitcap2(wspace, sunicw);
938
0
        if (spell(wspace.c_str()))
939
0
          insert_sug(slst, wspace);
940
0
        pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
941
0
      }
942
0
      // aNew -> "a New" (instead of "a new")
943
0
      for (size_t j = prevns; j < slst.size(); ++j) {
944
0
        const char* space = strchr(slst[j].c_str(), ' ');
945
0
        if (space) {
946
0
          size_t slen = strlen(space + 1);
947
0
          // different case after space (need capitalisation)
948
0
          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
949
0
            std::string first(slst[j].c_str(), space + 1);
950
0
            std::string second(space + 1);
951
0
            std::vector<w_char> w;
952
0
            if (utf8)
953
0
              u8_u16(w, second);
954
0
            mkinitcap2(second, w);
955
0
            // set as first suggestion
956
0
            slst.erase(slst.begin() + j);
957
0
            slst.insert(slst.begin(), first + second);
958
0
          }
959
0
        }
960
0
      }
961
0
      break;
962
0
    }
963
0
964
0
    case ALLCAP: {
965
0
      std::string wspace(scw);
966
0
      mkallsmall2(wspace, sunicw);
967
0
      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
968
0
      if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
969
0
        insert_sug(slst, wspace);
970
0
      mkinitcap2(wspace, sunicw);
971
0
      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
972
0
      for (size_t j = 0; j < slst.size(); ++j) {
973
0
        mkallcap(slst[j]);
974
0
        if (pAMgr && pAMgr->get_checksharps()) {
975
0
          if (utf8) {
976
0
            mystrrep(slst[j], "\xC3\x9F", "SS");
977
0
          } else {
978
0
            mystrrep(slst[j], "\xDF", "SS");
979
0
          }
980
0
        }
981
0
      }
982
0
      break;
983
0
    }
984
0
  }
985
0
986
0
  // LANG_hu section: replace '-' with ' ' in Hungarian
987
0
  if (langnum == LANG_hu) {
988
0
    for (size_t j = 0; j < slst.size(); ++j) {
989
0
      size_t pos = slst[j].find('-');
990
0
      if (pos != std::string::npos) {
991
0
        int info;
992
0
        std::string w(slst[j].substr(0, pos));
993
0
        w.append(slst[j].substr(pos + 1));
994
0
        (void)spell(w, &info, NULL);
995
0
        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
996
0
          slst[j][pos] = ' ';
997
0
        } else
998
0
          slst[j][pos] = '-';
999
0
      }
1000
0
    }
1001
0
  }
1002
0
  // END OF LANG_hu section
1003
0
1004
0
  // try ngram approach since found nothing or only compound words
1005
0
  if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1006
0
    switch (captype) {
1007
0
      case NOCAP: {
1008
0
        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
1009
0
        break;
1010
0
      }
1011
0
      case HUHINITCAP:
1012
0
        capwords = 1;
1013
0
      case HUHCAP: {
1014
0
        std::string wspace(scw);
1015
0
        mkallsmall2(wspace, sunicw);
1016
0
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1017
0
        break;
1018
0
      }
1019
0
      case INITCAP: {
1020
0
        capwords = 1;
1021
0
        std::string wspace(scw);
1022
0
        mkallsmall2(wspace, sunicw);
1023
0
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1024
0
        break;
1025
0
      }
1026
0
      case ALLCAP: {
1027
0
        std::string wspace(scw);
1028
0
        mkallsmall2(wspace, sunicw);
1029
0
        size_t oldns = slst.size();
1030
0
        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1031
0
        for (size_t j = oldns; j < slst.size(); ++j) {
1032
0
          mkallcap(slst[j]);
1033
0
        }
1034
0
        break;
1035
0
      }
1036
0
    }
1037
0
  }
1038
0
1039
0
  // try dash suggestion (Afo-American -> Afro-American)
1040
0
  size_t dash_pos = scw.find('-');
1041
0
  if (dash_pos != std::string::npos) {
1042
0
    int nodashsug = 1;
1043
0
    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1044
0
      if (slst[j].find('-') != std::string::npos)
1045
0
        nodashsug = 0;
1046
0
    }
1047
0
1048
0
    size_t prev_pos = 0;
1049
0
    bool last = false;
1050
0
1051
0
    while (nodashsug && !last) {
1052
0
      if (dash_pos == scw.size())
1053
0
        last = 1;
1054
0
      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1055
0
      if (!spell(chunk.c_str())) {
1056
0
        std::vector<std::string> nlst = suggest(chunk.c_str());
1057
0
        for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
1058
0
          std::string wspace = scw.substr(0, prev_pos);
1059
0
          wspace.append(*j);
1060
0
          if (!last) {
1061
0
            wspace.append("-");
1062
0
            wspace.append(scw.substr(dash_pos + 1));
1063
0
          }
1064
0
          insert_sug(slst, wspace);
1065
0
        }
1066
0
        nodashsug = 0;
1067
0
      }
1068
0
      if (!last) {
1069
0
        prev_pos = dash_pos + 1;
1070
0
        dash_pos = scw.find('-', prev_pos);
1071
0
      }
1072
0
      if (dash_pos == std::string::npos)
1073
0
        dash_pos = scw.size();
1074
0
    }
1075
0
  }
1076
0
1077
0
  // word reversing wrapper for complex prefixes
1078
0
  if (complexprefixes) {
1079
0
    for (size_t j = 0; j < slst.size(); ++j) {
1080
0
      if (utf8)
1081
0
        reverseword_utf(slst[j]);
1082
0
      else
1083
0
        reverseword(slst[j]);
1084
0
    }
1085
0
  }
1086
0
1087
0
  // capitalize
1088
0
  if (capwords)
1089
0
    for (size_t j = 0; j < slst.size(); ++j) {
1090
0
      mkinitcap(slst[j]);
1091
0
    }
1092
0
1093
0
  // expand suggestions with dot(s)
1094
0
  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1095
0
    for (size_t j = 0; j < slst.size(); ++j) {
1096
0
      slst[j].append(word.substr(word.size() - abbv));
1097
0
    }
1098
0
  }
1099
0
1100
0
  // remove bad capitalized and forbidden forms
1101
0
  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
1102
0
    switch (captype) {
1103
0
      case INITCAP:
1104
0
      case ALLCAP: {
1105
0
        size_t l = 0;
1106
0
        for (size_t j = 0; j < slst.size(); ++j) {
1107
0
          if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
1108
0
            std::string s;
1109
0
            std::vector<w_char> w;
1110
0
            if (utf8) {
1111
0
              u8_u16(w, slst[j]);
1112
0
            } else {
1113
0
              s = slst[j];
1114
0
            }
1115
0
            mkallsmall2(s, w);
1116
0
            if (spell(s)) {
1117
0
              slst[l] = s;
1118
0
              ++l;
1119
0
            } else {
1120
0
              mkinitcap2(s, w);
1121
0
              if (spell(s)) {
1122
0
                slst[l] = s;
1123
0
                ++l;
1124
0
              }
1125
0
            }
1126
0
          } else {
1127
0
            slst[l] = slst[j];
1128
0
            ++l;
1129
0
          }
1130
0
        }
1131
0
        slst.resize(l);
1132
0
      }
1133
0
    }
1134
0
  }
1135
0
1136
0
  // remove duplications
1137
0
  size_t l = 0;
1138
0
  for (size_t j = 0; j < slst.size(); ++j) {
1139
0
    slst[l] = slst[j];
1140
0
    for (size_t k = 0; k < l; ++k) {
1141
0
      if (slst[k] == slst[j]) {
1142
0
        --l;
1143
0
        break;
1144
0
      }
1145
0
    }
1146
0
    ++l;
1147
0
  }
1148
0
  slst.resize(l);
1149
0
1150
0
  // output conversion
1151
0
  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1152
0
  for (size_t j = 0; rl && j < slst.size(); ++j) {
1153
0
    std::string wspace;
1154
0
    if (rl->conv(slst[j], wspace)) {
1155
0
      slst[j] = wspace;
1156
0
    }
1157
0
  }
1158
0
1159
0
  return slst;
1160
0
}
1161
1162
0
const std::string& Hunspell::get_dict_encoding() const {
1163
0
  return m_Impl->get_dict_encoding();
1164
0
}
1165
1166
0
const std::string& HunspellImpl::get_dict_encoding() const {
1167
0
  return encoding;
1168
0
}
1169
1170
0
std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
1171
0
  return m_Impl->stem(desc);
1172
0
}
1173
1174
0
std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1175
0
  std::vector<std::string> slst;
1176
0
1177
0
  std::string result2;
1178
0
  if (desc.empty())
1179
0
    return slst;
1180
0
  for (size_t i = 0; i < desc.size(); ++i) {
1181
0
1182
0
    std::string result;
1183
0
1184
0
    // add compound word parts (except the last one)
1185
0
    const char* s = desc[i].c_str();
1186
0
    const char* part = strstr(s, MORPH_PART);
1187
0
    if (part) {
1188
0
      const char* nextpart = strstr(part + 1, MORPH_PART);
1189
0
      while (nextpart) {
1190
0
        std::string field;
1191
0
        copy_field(field, part, MORPH_PART);
1192
0
        result.append(field);
1193
0
        part = nextpart;
1194
0
        nextpart = strstr(part + 1, MORPH_PART);
1195
0
      }
1196
0
      s = part;
1197
0
    }
1198
0
1199
0
    std::string tok(s);
1200
0
    size_t alt = 0;
1201
0
    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1202
0
      tok[alt + 1] = MSEP_ALT;
1203
0
    }
1204
0
    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1205
0
    for (size_t k = 0; k < pl.size(); ++k) {
1206
0
      // add derivational suffixes
1207
0
      if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
1208
0
        // remove inflectional suffixes
1209
0
        const size_t is = pl[k].find(MORPH_INFL_SFX);
1210
0
        if (is != std::string::npos)
1211
0
          pl[k].resize(is);
1212
0
        std::vector<std::string> singlepl;
1213
0
        singlepl.push_back(pl[k]);
1214
0
        std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
1215
0
        if (!sg.empty()) {
1216
0
          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1217
0
          for (size_t j = 0; j < gen.size(); ++j) {
1218
0
            result2.push_back(MSEP_REC);
1219
0
            result2.append(result);
1220
0
            result2.append(gen[j]);
1221
0
          }
1222
0
        }
1223
0
      } else {
1224
0
        result2.push_back(MSEP_REC);
1225
0
        result2.append(result);
1226
0
        if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
1227
0
          std::string field;
1228
0
          copy_field(field, pl[k], MORPH_SURF_PFX);
1229
0
          result2.append(field);
1230
0
        }
1231
0
        std::string field;
1232
0
        copy_field(field, pl[k], MORPH_STEM);
1233
0
        result2.append(field);
1234
0
      }
1235
0
    }
1236
0
  }
1237
0
  slst = line_tok(result2, MSEP_REC);
1238
0
  uniqlist(slst);
1239
0
  return slst;
1240
0
}
1241
1242
0
std::vector<std::string> Hunspell::stem(const std::string& word) {
1243
0
  return m_Impl->stem(word);
1244
0
}
1245
1246
0
std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1247
0
  return stem(analyze(word));
1248
0
}
1249
1250
0
const char* Hunspell::get_wordchars() const {
1251
0
  return m_Impl->get_wordchars().c_str();
1252
0
}
1253
1254
0
const std::string& Hunspell::get_wordchars_cpp() const {
1255
0
  return m_Impl->get_wordchars();
1256
0
}
1257
1258
0
const std::string& HunspellImpl::get_wordchars() const {
1259
0
  return pAMgr->get_wordchars();
1260
0
}
1261
1262
0
const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
1263
0
  return m_Impl->get_wordchars_utf16();
1264
0
}
1265
1266
0
const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1267
0
  return pAMgr->get_wordchars_utf16();
1268
0
}
1269
1270
0
void HunspellImpl::mkinitcap(std::string& u8) {
1271
0
  if (utf8) {
1272
0
    std::vector<w_char> u16;
1273
0
    u8_u16(u16, u8);
1274
0
    ::mkinitcap_utf(u16, langnum);
1275
0
    u16_u8(u8, u16);
1276
0
  } else {
1277
0
    ::mkinitcap(u8, csconv);
1278
0
  }
1279
0
}
1280
1281
0
int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1282
0
  if (utf8) {
1283
0
    ::mkinitcap_utf(u16, langnum);
1284
0
    u16_u8(u8, u16);
1285
0
  } else {
1286
0
    ::mkinitcap(u8, csconv);
1287
0
  }
1288
0
  return u8.size();
1289
0
}
1290
1291
0
int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1292
0
  if (utf8) {
1293
0
    ::mkinitsmall_utf(u16, langnum);
1294
0
    u16_u8(u8, u16);
1295
0
  } else {
1296
0
    ::mkinitsmall(u8, csconv);
1297
0
  }
1298
0
  return u8.size();
1299
0
}
1300
1301
0
int Hunspell::add(const std::string& word) {
1302
0
  return m_Impl->add(word);
1303
0
}
1304
1305
0
int HunspellImpl::add(const std::string& word) {
1306
0
  if (!m_HMgrs.empty())
1307
0
    return m_HMgrs[0]->add(word);
1308
0
  return 0;
1309
0
}
1310
1311
0
int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
1312
0
  return m_Impl->add_with_affix(word, example);
1313
0
}
1314
1315
0
int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1316
0
  if (!m_HMgrs.empty())
1317
0
    return m_HMgrs[0]->add_with_affix(word, example);
1318
0
  return 0;
1319
0
}
1320
1321
0
int Hunspell::remove(const std::string& word) {
1322
0
  return m_Impl->remove(word);
1323
0
}
1324
1325
0
int HunspellImpl::remove(const std::string& word) {
1326
0
  if (!m_HMgrs.empty())
1327
0
    return m_HMgrs[0]->remove(word);
1328
0
  return 0;
1329
0
}
1330
1331
0
const char* Hunspell::get_version() const {
1332
0
  return m_Impl->get_version().c_str();
1333
0
}
1334
1335
0
const std::string& Hunspell::get_version_cpp() const {
1336
0
  return m_Impl->get_version();
1337
0
}
1338
1339
0
const std::string& HunspellImpl::get_version() const {
1340
0
  return pAMgr->get_version();
1341
0
}
1342
1343
0
struct cs_info* HunspellImpl::get_csconv() {
1344
0
  return csconv;
1345
0
}
1346
1347
0
struct cs_info* Hunspell::get_csconv() {
1348
0
  return m_Impl->get_csconv();
1349
0
}
1350
1351
0
void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1352
0
  if (!st.empty()) {
1353
0
    if (!result.empty())
1354
0
      result.append("\n");
1355
0
    result.append(st);
1356
0
  }
1357
0
}
1358
1359
0
std::vector<std::string> Hunspell::analyze(const std::string& word) {
1360
0
  return m_Impl->analyze(word);
1361
0
}
1362
1363
0
std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1364
0
  std::vector<std::string> slst;
1365
0
  if (!pSMgr || m_HMgrs.empty())
1366
0
    return slst;
1367
0
  if (utf8) {
1368
0
    if (word.size() >= MAXWORDUTF8LEN)
1369
0
      return slst;
1370
0
  } else {
1371
0
    if (word.size() >= MAXWORDLEN)
1372
0
      return slst;
1373
0
  }
1374
0
  int captype = NOCAP;
1375
0
  size_t abbv = 0;
1376
0
  size_t wl = 0;
1377
0
1378
0
  std::string scw;
1379
0
  std::vector<w_char> sunicw;
1380
0
1381
0
  // input conversion
1382
0
  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1383
0
  {
1384
0
    std::string wspace;
1385
0
1386
0
    bool convstatus = rl ? rl->conv(word, wspace) : false;
1387
0
    if (convstatus)
1388
0
      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1389
0
    else
1390
0
      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1391
0
  }
1392
0
1393
0
  if (wl == 0) {
1394
0
    if (abbv) {
1395
0
      scw.clear();
1396
0
      for (wl = 0; wl < abbv; wl++)
1397
0
        scw.push_back('.');
1398
0
      abbv = 0;
1399
0
    } else
1400
0
      return slst;
1401
0
  }
1402
0
1403
0
  std::string result;
1404
0
1405
0
  size_t n = 0;
1406
0
  // test numbers
1407
0
  // LANG_hu section: set dash information for suggestions
1408
0
  if (langnum == LANG_hu) {
1409
0
    size_t n2 = 0;
1410
0
    size_t n3 = 0;
1411
0
1412
0
    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1413
0
                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1414
0
      n++;
1415
0
      if ((scw[n] == '.') || (scw[n] == ',')) {
1416
0
        if (((n2 == 0) && (n > 3)) ||
1417
0
            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1418
0
          break;
1419
0
        n2++;
1420
0
        n3 = n;
1421
0
      }
1422
0
    }
1423
0
1424
0
    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1425
0
      return slst;
1426
0
    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1427
0
                      checkword(scw.substr(n), NULL, NULL))) {
1428
0
      result.append(scw);
1429
0
      result.resize(n - 1);
1430
0
      if (n == wl)
1431
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1432
0
      else {
1433
0
        std::string chunk = scw.substr(n - 1, 1);
1434
0
        cat_result(result, pSMgr->suggest_morph(chunk));
1435
0
        result.push_back('+');  // XXX SPEC. MORPHCODE
1436
0
        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1437
0
      }
1438
0
      return line_tok(result, MSEP_REC);
1439
0
    }
1440
0
  }
1441
0
  // END OF LANG_hu section
1442
0
1443
0
  switch (captype) {
1444
0
    case HUHCAP:
1445
0
    case HUHINITCAP:
1446
0
    case NOCAP: {
1447
0
      cat_result(result, pSMgr->suggest_morph(scw));
1448
0
      if (abbv) {
1449
0
        std::string u8buffer(scw);
1450
0
        u8buffer.push_back('.');
1451
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1452
0
      }
1453
0
      break;
1454
0
    }
1455
0
    case INITCAP: {
1456
0
      mkallsmall2(scw, sunicw);
1457
0
      std::string u8buffer(scw);
1458
0
      mkinitcap2(scw, sunicw);
1459
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1460
0
      cat_result(result, pSMgr->suggest_morph(scw));
1461
0
      if (abbv) {
1462
0
        u8buffer.push_back('.');
1463
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1464
0
1465
0
        u8buffer = scw;
1466
0
        u8buffer.push_back('.');
1467
0
1468
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1469
0
      }
1470
0
      break;
1471
0
    }
1472
0
    case ALLCAP: {
1473
0
      cat_result(result, pSMgr->suggest_morph(scw));
1474
0
      if (abbv) {
1475
0
        std::string u8buffer(scw);
1476
0
        u8buffer.push_back('.');
1477
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1478
0
      }
1479
0
      mkallsmall2(scw, sunicw);
1480
0
      std::string u8buffer(scw);
1481
0
      mkinitcap2(scw, sunicw);
1482
0
1483
0
      cat_result(result, pSMgr->suggest_morph(u8buffer));
1484
0
      cat_result(result, pSMgr->suggest_morph(scw));
1485
0
      if (abbv) {
1486
0
        u8buffer.push_back('.');
1487
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1488
0
1489
0
        u8buffer = scw;
1490
0
        u8buffer.push_back('.');
1491
0
1492
0
        cat_result(result, pSMgr->suggest_morph(u8buffer));
1493
0
      }
1494
0
      break;
1495
0
    }
1496
0
  }
1497
0
1498
0
  if (!result.empty()) {
1499
0
    // word reversing wrapper for complex prefixes
1500
0
    if (complexprefixes) {
1501
0
      if (utf8)
1502
0
        reverseword_utf(result);
1503
0
      else
1504
0
        reverseword(result);
1505
0
    }
1506
0
    return line_tok(result, MSEP_REC);
1507
0
  }
1508
0
1509
0
  // compound word with dash (HU) I18n
1510
0
  // LANG_hu section: set dash information for suggestions
1511
0
1512
0
  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1513
0
  if (dash_pos != std::string::npos) {
1514
0
    int nresult = 0;
1515
0
1516
0
    std::string part1 = scw.substr(0, dash_pos);
1517
0
    std::string part2 = scw.substr(dash_pos+1);
1518
0
1519
0
    // examine 2 sides of the dash
1520
0
    if (part2.empty()) {  // base word ending with dash
1521
0
      if (spell(part1)) {
1522
0
        std::string p = pSMgr->suggest_morph(part1);
1523
0
        if (!p.empty()) {
1524
0
          slst = line_tok(p, MSEP_REC);
1525
0
          return slst;
1526
0
        }
1527
0
      }
1528
0
    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1529
0
      if (spell(part1) && (spell("-e"))) {
1530
0
        std::string st = pSMgr->suggest_morph(part1);
1531
0
        if (!st.empty()) {
1532
0
          result.append(st);
1533
0
        }
1534
0
        result.push_back('+');  // XXX spec. separator in MORPHCODE
1535
0
        st = pSMgr->suggest_morph("-e");
1536
0
        if (!st.empty()) {
1537
0
          result.append(st);
1538
0
        }
1539
0
        return line_tok(result, MSEP_REC);
1540
0
      }
1541
0
    } else {
1542
0
      // first word ending with dash: word- XXX ???
1543
0
      part1.push_back(' ');
1544
0
      nresult = spell(part1);
1545
0
      part1.erase(part1.size() - 1);
1546
0
      if (nresult && spell(part2) &&
1547
0
          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1548
0
        std::string st = pSMgr->suggest_morph(part1);
1549
0
        if (!st.empty()) {
1550
0
          result.append(st);
1551
0
          result.push_back('+');  // XXX spec. separator in MORPHCODE
1552
0
        }
1553
0
        st = pSMgr->suggest_morph(part2);
1554
0
        if (!st.empty()) {
1555
0
          result.append(st);
1556
0
        }
1557
0
        return line_tok(result, MSEP_REC);
1558
0
      }
1559
0
    }
1560
0
    // affixed number in correct word
1561
0
    if (nresult && (dash_pos > 0) &&
1562
0
        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1563
0
         (scw[dash_pos - 1] == '.'))) {
1564
0
      n = 1;
1565
0
      if (scw[dash_pos - n] == '.')
1566
0
        n++;
1567
0
      // search first not a number character to left from dash
1568
0
      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1569
0
             (n < 6)) {
1570
0
        n++;
1571
0
      }
1572
0
      if (dash_pos < n)
1573
0
        n--;
1574
0
      // numbers: valami1000000-hoz
1575
0
      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1576
0
      // 56-hoz, 6-hoz
1577
0
      for (; n >= 1; n--) {
1578
0
        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1579
0
            continue;
1580
0
        }
1581
0
        std::string chunk = scw.substr(dash_pos - n);
1582
0
        if (checkword(chunk, NULL, NULL)) {
1583
0
          result.append(chunk);
1584
0
          std::string st = pSMgr->suggest_morph(chunk);
1585
0
          if (!st.empty()) {
1586
0
            result.append(st);
1587
0
          }
1588
0
          return line_tok(result, MSEP_REC);
1589
0
        }
1590
0
      }
1591
0
    }
1592
0
  }
1593
0
  return slst;
1594
0
}
1595
1596
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
1597
0
  return m_Impl->generate(word, pl);
1598
0
}
1599
1600
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1601
0
  std::vector<std::string> slst;
1602
0
  if (!pSMgr || pl.empty())
1603
0
    return slst;
1604
0
  std::vector<std::string> pl2 = analyze(word);
1605
0
  int captype = NOCAP;
1606
0
  int abbv = 0;
1607
0
  std::string cw;
1608
0
  cleanword(cw, word, &captype, &abbv);
1609
0
  std::string result;
1610
0
1611
0
  for (size_t i = 0; i < pl.size(); ++i) {
1612
0
    cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
1613
0
  }
1614
0
1615
0
  if (!result.empty()) {
1616
0
    // allcap
1617
0
    if (captype == ALLCAP)
1618
0
      mkallcap(result);
1619
0
1620
0
    // line split
1621
0
    slst = line_tok(result, MSEP_REC);
1622
0
1623
0
    // capitalize
1624
0
    if (captype == INITCAP || captype == HUHINITCAP) {
1625
0
      for (size_t j = 0; j < slst.size(); ++j) {
1626
0
        mkinitcap(slst[j]);
1627
0
      }
1628
0
    }
1629
0
1630
0
    // temporary filtering of prefix related errors (eg.
1631
0
    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1632
0
    std::vector<std::string>::iterator it = slst.begin();
1633
0
    while (it != slst.end()) {
1634
0
      if (!spell(*it)) {
1635
0
        it = slst.erase(it);
1636
0
      } else  {
1637
0
        ++it;
1638
0
      }
1639
0
    }
1640
0
  }
1641
0
  return slst;
1642
0
}
1643
1644
0
std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
1645
0
  return m_Impl->generate(word, pattern);
1646
0
}
1647
1648
0
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1649
0
  std::vector<std::string> pl = analyze(pattern);
1650
0
  std::vector<std::string> slst = generate(word, pl);
1651
0
  uniqlist(slst);
1652
0
  return slst;
1653
0
}
1654
1655
// minimal XML parser functions
1656
0
std::string HunspellImpl::get_xml_par(const char* par) {
1657
0
  std::string dest;
1658
0
  if (!par)
1659
0
    return dest;
1660
0
  char end = *par;
1661
0
  if (end == '>')
1662
0
    end = '<';
1663
0
  else if (end != '\'' && end != '"')
1664
0
    return dest;  // bad XML
1665
0
  for (par++; *par != '\0' && *par != end; ++par) {
1666
0
    dest.push_back(*par);
1667
0
  }
1668
0
  mystrrep(dest, "&lt;", "<");
1669
0
  mystrrep(dest, "&amp;", "&");
1670
0
  return dest;
1671
0
}
1672
1673
0
int Hunspell::get_langnum() const {
1674
0
  return m_Impl->get_langnum();
1675
0
}
1676
1677
0
int HunspellImpl::get_langnum() const {
1678
0
  return langnum;
1679
0
}
1680
1681
0
bool Hunspell::input_conv(const std::string& word, std::string& dest) {
1682
0
  return m_Impl->input_conv(word, dest);
1683
0
}
1684
1685
0
int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
1686
0
  std::string d;
1687
0
  bool ret = input_conv(word, d);
1688
0
  if (ret && d.size() < destsize) {
1689
0
    strncpy(dest, d.c_str(), destsize);
1690
0
    return 1;
1691
0
  }
1692
0
  return 0;
1693
0
}
1694
1695
0
bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1696
0
  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1697
0
  if (rl) {
1698
0
    return rl->conv(word, dest);
1699
0
  }
1700
0
  dest.assign(word);
1701
0
  return false;
1702
0
}
1703
1704
// return the beginning of the element (attr == NULL) or the attribute
1705
0
const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
1706
0
  const char* end = strchr(s, '>');
1707
0
  if (attr == NULL)
1708
0
    return end;
1709
0
  const char* p = s;
1710
0
  while (1) {
1711
0
    p = strstr(p, attr);
1712
0
    if (!p || p >= end)
1713
0
      return 0;
1714
0
    if (*(p - 1) == ' ' || *(p - 1) == '\n')
1715
0
      break;
1716
0
    p += strlen(attr);
1717
0
  }
1718
0
  return p + strlen(attr);
1719
0
}
1720
1721
int HunspellImpl::check_xml_par(const char* q,
1722
                            const char* attr,
1723
0
                            const char* value) {
1724
0
  std::string cw = get_xml_par(get_xml_pos(q, attr));
1725
0
  if (cw == value)
1726
0
    return 1;
1727
0
  return 0;
1728
0
}
1729
1730
0
std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
1731
0
  std::vector<std::string> slst;
1732
0
  if (!list)
1733
0
    return slst;
1734
0
  const char* p = list;
1735
0
  for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
1736
0
    std::string cw = get_xml_par(p + strlen(tag) - 1);
1737
0
    if (cw.empty()) {
1738
0
      break;
1739
0
    }
1740
0
    slst.push_back(cw);
1741
0
  }
1742
0
  return slst;
1743
0
}
1744
1745
0
std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1746
0
  std::vector<std::string> slst;
1747
0
1748
0
  const char* word = in_word.c_str();
1749
0
1750
0
  const char* q = strstr(word, "<query");
1751
0
  if (!q)
1752
0
    return slst;  // bad XML input
1753
0
  const char* q2 = strchr(q, '>');
1754
0
  if (!q2)
1755
0
    return slst;  // bad XML input
1756
0
  q2 = strstr(q2, "<word");
1757
0
  if (!q2)
1758
0
    return slst;  // bad XML input
1759
0
  if (check_xml_par(q, "type=", "analyze")) {
1760
0
    std::string cw = get_xml_par(strchr(q2, '>'));
1761
0
    if (!cw.empty())
1762
0
      slst = analyze(cw);
1763
0
    if (slst.empty())
1764
0
      return slst;
1765
0
    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1766
0
    std::string r;
1767
0
    r.append("<code>");
1768
0
    for (size_t i = 0; i < slst.size(); ++i) {
1769
0
      r.append("<a>");
1770
0
1771
0
      std::string entry(slst[i]);
1772
0
      mystrrep(entry, "\t", " ");
1773
0
      mystrrep(entry, "&", "&amp;");
1774
0
      mystrrep(entry, "<", "&lt;");
1775
0
      r.append(entry);
1776
0
1777
0
      r.append("</a>");
1778
0
    }
1779
0
    r.append("</code>");
1780
0
    slst.clear();
1781
0
    slst.push_back(r);
1782
0
    return slst;
1783
0
  } else if (check_xml_par(q, "type=", "stem")) {
1784
0
    std::string cw = get_xml_par(strchr(q2, '>'));
1785
0
    if (!cw.empty())
1786
0
      return stem(cw);
1787
0
  } else if (check_xml_par(q, "type=", "generate")) {
1788
0
    std::string cw = get_xml_par(strchr(q2, '>'));
1789
0
    if (cw.empty())
1790
0
      return slst;
1791
0
    const char* q3 = strstr(q2 + 1, "<word");
1792
0
    if (q3) {
1793
0
      std::string cw2 = get_xml_par(strchr(q3, '>'));
1794
0
      if (!cw2.empty()) {
1795
0
        return generate(cw, cw2);
1796
0
      }
1797
0
    } else {
1798
0
      if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1799
0
        std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
1800
0
        if (!slst2.empty()) {
1801
0
          slst = generate(cw, slst2);
1802
0
          uniqlist(slst);
1803
0
          return slst;
1804
0
        }
1805
0
      }
1806
0
    }
1807
0
  }
1808
0
  return slst;
1809
0
}
1810
1811
0
int Hunspell::spell(const char* word, int* info, char** root) {
1812
0
  std::string sroot;
1813
0
  bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
1814
0
  if (root) {
1815
0
    if (sroot.empty()) {
1816
0
      *root = NULL;
1817
0
    } else {
1818
0
      *root = mystrdup(sroot.c_str());
1819
0
    }
1820
0
  }
1821
0
  return ret;
1822
0
}
1823
1824
namespace {
1825
0
  int munge_vector(char*** slst, const std::vector<std::string>& items) {
1826
0
    if (items.empty()) {
1827
0
      *slst = NULL;
1828
0
      return 0;
1829
0
    } else {
1830
0
      *slst = (char**)malloc(sizeof(char*) * items.size());
1831
0
      if (!*slst)
1832
0
        return 0;
1833
0
      for (size_t i = 0; i < items.size(); ++i)
1834
0
        (*slst)[i] = mystrdup(items[i].c_str());
1835
0
    }
1836
0
    return items.size();
1837
0
  }
1838
}
1839
1840
0
void Hunspell::free_list(char*** slst, int n) {
1841
0
  Hunspell_free_list((Hunhandle*)(this), slst, n);
1842
0
}
1843
1844
0
int Hunspell::suggest(char*** slst, const char* word) {
1845
0
  return Hunspell_suggest((Hunhandle*)(this), slst, word);
1846
0
}
1847
1848
0
int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
1849
0
  std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
1850
0
  return munge_vector(slst, stems);
1851
0
}
1852
1853
0
char* Hunspell::get_dic_encoding() {
1854
0
  return &(m_Impl->dic_encoding_vec[0]);
1855
0
}
1856
1857
0
int Hunspell::stem(char*** slst, char** desc, int n) {
1858
0
  return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
1859
0
}
1860
1861
0
int Hunspell::stem(char*** slst, const char* word) {
1862
0
  return Hunspell_stem((Hunhandle*)(this), slst, word);
1863
0
}
1864
1865
0
int Hunspell::analyze(char*** slst, const char* word) {
1866
0
  return Hunspell_analyze((Hunhandle*)(this), slst, word);
1867
0
}
1868
1869
0
int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
1870
0
  return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
1871
0
}
1872
1873
0
int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
1874
0
  return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
1875
0
}
1876
1877
0
Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
1878
0
  return (Hunhandle*)(new Hunspell(affpath, dpath));
1879
0
}
1880
1881
Hunhandle* Hunspell_create_key(const char* affpath,
1882
                               const char* dpath,
1883
0
                               const char* key) {
1884
0
  return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
1885
0
}
1886
1887
0
void Hunspell_destroy(Hunhandle* pHunspell) {
1888
0
  delete reinterpret_cast<Hunspell*>(pHunspell);
1889
0
}
1890
1891
0
int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
1892
0
  return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
1893
0
}
1894
1895
0
int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
1896
0
  return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
1897
0
}
1898
1899
0
char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
1900
0
  return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
1901
0
}
1902
1903
0
int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
1904
0
  std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
1905
0
  return munge_vector(slst, suggests);
1906
0
}
1907
1908
0
int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
1909
0
  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
1910
0
  return munge_vector(slst, stems);
1911
0
}
1912
1913
0
int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
1914
0
1915
0
  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
1916
0
  return munge_vector(slst, stems);
1917
0
}
1918
1919
0
int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
1920
0
  std::vector<std::string> morph;
1921
0
  for (int i = 0; i < n; ++i)
1922
0
    morph.push_back(desc[i]);
1923
0
1924
0
  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
1925
0
  return munge_vector(slst, stems);
1926
0
}
1927
1928
int Hunspell_generate(Hunhandle* pHunspell,
1929
                      char*** slst,
1930
                      const char* word,
1931
0
                      const char* pattern) {
1932
0
  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
1933
0
  return munge_vector(slst, stems);
1934
0
}
1935
1936
int Hunspell_generate2(Hunhandle* pHunspell,
1937
                       char*** slst,
1938
                       const char* word,
1939
                       char** desc,
1940
0
                       int n) {
1941
0
  std::vector<std::string> morph;
1942
0
  for (int i = 0; i < n; ++i)
1943
0
    morph.push_back(desc[i]);
1944
0
1945
0
  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
1946
0
  return munge_vector(slst, stems);
1947
0
}
1948
1949
/* functions for run-time modification of the dictionary */
1950
1951
/* add word to the run-time dictionary */
1952
1953
0
int Hunspell_add(Hunhandle* pHunspell, const char* word) {
1954
0
  return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
1955
0
}
1956
1957
/* add word to the run-time dictionary with affix flags of
1958
 * the example (a dictionary word): Hunspell will recognize
1959
 * affixed forms of the new word, too.
1960
 */
1961
1962
int Hunspell_add_with_affix(Hunhandle* pHunspell,
1963
                            const char* word,
1964
0
                            const char* example) {
1965
0
  return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
1966
0
}
1967
1968
/* remove word from the run-time dictionary */
1969
1970
0
int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
1971
0
  return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
1972
0
}
1973
1974
0
void Hunspell_free_list(Hunhandle*, char*** list, int n) {
1975
0
  if (list && *list) {
1976
0
    for (int i = 0; i < n; i++)
1977
0
      free((*list)[i]);
1978
0
    free(*list);
1979
0
    *list = NULL;
1980
0
  }
1981
0
}
1982
1983
0
std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
1984
0
  return m_Impl->suffix_suggest(root_word);
1985
0
}
1986
1987
0
std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1988
0
  std::vector<std::string> slst;
1989
0
  struct hentry* he = NULL;
1990
0
  int len;
1991
0
  std::string w2;
1992
0
  const char* word;
1993
0
  const char* ignoredchars = pAMgr->get_ignore();
1994
0
  if (ignoredchars != NULL) {
1995
0
    w2.assign(root_word);
1996
0
    if (utf8) {
1997
0
      const std::vector<w_char>& ignoredchars_utf16 =
1998
0
          pAMgr->get_ignore_utf16();
1999
0
      remove_ignored_chars_utf(w2, ignoredchars_utf16);
2000
0
    } else {
2001
0
      remove_ignored_chars(w2, ignoredchars);
2002
0
    }
2003
0
    word = w2.c_str();
2004
0
  } else
2005
0
    word = root_word.c_str();
2006
0
2007
0
  len = strlen(word);
2008
0
2009
0
  if (!len)
2010
0
    return slst;
2011
0
2012
0
  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2013
0
    he = m_HMgrs[i]->lookup(word);
2014
0
  }
2015
0
  if (he) {
2016
0
    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
2017
0
  }
2018
0
  return slst;
2019
0
}