Coverage Report

Created: 2023-12-08 06:59

/src/aspell/modules/speller/default/writable.cpp
Line
Count
Source (jump to first uncovered line)
1
// This file is part of The New Aspell
2
// Copyright (C) 2000,2011 by Kevin Atkinson under the GNU LGPL
3
// license version 2.0 or 2.1.  You should have received a copy of the
4
// LGPL license along with this library if you did not you can find it
5
// at http://www.gnu.org/.
6
7
#include <time.h>
8
9
#include "hash-t.hpp"
10
#include "data.hpp"
11
#include "data_util.hpp"
12
#include "enumeration.hpp"
13
#include "errors.hpp"
14
#include "file_util.hpp"
15
#include "fstream.hpp"
16
#include "language.hpp"
17
#include "getdata.hpp"
18
19
namespace {
20
21
//////////////////////////////////////////////////////////////////////
22
//
23
// WritableBase
24
//
25
26
using namespace std;
27
using namespace aspeller;
28
using namespace acommon;
29
30
typedef const char * Str;
31
typedef unsigned char byte;
32
33
struct Hash {
34
  InsensitiveHash<> f;
35
2.13k
  Hash(const Language * l) : f(l) {}
36
17.2M
  size_t operator() (Str s) const {
37
17.2M
    return f(s);
38
17.2M
  }
39
};
40
41
struct Equal {
42
  InsensitiveEqual f;
43
2.13k
  Equal(const Language * l) : f(l) {}
44
0
  bool operator() (Str a, Str b) const {
45
0
    return f(a, b);
46
0
  }
47
};
48
49
0
void write_n_escape(FStream & o, const char * str) {
50
0
  while (*str != '\0') {
51
0
    if (*str == '\n') o << "\\n";
52
0
    else if (*str == '\r') o << "\\r";
53
0
    else if (*str == '\\') o << "\\\\";
54
0
    else o << *str;
55
0
    ++str;
56
0
  }
57
0
}
58
59
0
static inline char f_getc(FStream & in) {
60
0
  int c = in.get();
61
0
  return c == EOF ? '\0' : (char)c;
62
0
}
63
  
64
0
bool getline_n_unescape(FStream & in, String & str, char delem) {
65
0
  str.clear();
66
0
  char c = f_getc(in);
67
0
  if (!c) return false;
68
0
  while (c && c != delem) {
69
0
    if (c == '\\') {
70
0
      c = f_getc(in);
71
0
      if (c == 'n') str.append('\n');
72
0
      else if (c == 'r') str.append('\r');
73
0
      else if (c == '\\') str.append('\\');
74
0
      else {str.append('\\'); continue;}
75
0
    } else {
76
0
      str.append(c);
77
0
    }
78
0
    c = f_getc(in);
79
0
  }
80
0
  return true;
81
0
}
82
83
bool getline_n_unescape(FStream & in, DataPair & d, String & buf)
84
0
{
85
0
  if (!getline_n_unescape(in, buf, '\n')) return false;
86
0
  d.value.str  = buf.mstr();
87
0
  d.value.size = buf.size();
88
0
  return true;
89
0
}
90
91
typedef Vector<Str> StrVector;
92
93
typedef hash_multiset<Str,Hash,Equal> WordLookup;
94
typedef hash_map<Str,StrVector>  SoundslikeLookup;
95
96
class WritableBase : public Dictionary {
97
protected:
98
  String suffix;
99
  String compatibility_suffix;
100
    
101
  time_t cur_file_date;
102
  
103
  String compatibility_file_name;
104
    
105
  WritableBase(BasicType t, const char * n, const char * s, const char * cs, const Config & cfg)
106
    : Dictionary(t,n),
107
      suffix(s), compatibility_suffix(cs),
108
2.13k
      use_soundslike(true) {
109
2.13k
    fast_lookup = true;
110
2.13k
    validate_words = cfg.retrieve_bool("validate-words");
111
2.13k
  }
112
2.13k
  virtual ~WritableBase() {}
113
  
114
  virtual PosibErr<void> save(FStream &, ParmString) = 0;
115
  virtual PosibErr<void> merge(FStream &, ParmString, Config * = 0) = 0;
116
    
117
  PosibErr<void> save2(FStream &, ParmString);
118
  PosibErr<void> update(FStream &, ParmString);
119
  PosibErr<void> save(bool do_update);
120
  PosibErr<void> update_file_date_info(FStream &);
121
  PosibErr<void> load(ParmString, Config &, DictList *, SpellerImpl *);
122
  PosibErr<void> merge(ParmString);
123
  PosibErr<void> save_as(ParmString);
124
  PosibErr<void> clear();
125
126
  String file_encoding;
127
  ConvObj iconv;
128
  ConvObj oconv;
129
  PosibErr<void> set_file_encoding(ParmString, Config & c);
130
131
0
  PosibErr<void> synchronize() {return save(true);}
132
0
  PosibErr<void> save_noupdate() {return save(false);}
133
134
  bool use_soundslike;
135
  StackPtr<WordLookup> word_lookup;
136
  SoundslikeLookup     soundslike_lookup_;
137
  ObjStack             buffer;
138
 
139
2.13k
  void set_lang_hook(Config & c) {
140
2.13k
    set_file_encoding(lang()->data_encoding(), c);
141
2.13k
    word_lookup.reset(new WordLookup(10, Hash(lang()), Equal(lang())));
142
2.13k
    use_soundslike = lang()->have_soundslike();
143
2.13k
  }
144
};
145
146
0
PosibErr<void> WritableBase::update_file_date_info(FStream & f) {
147
0
  RET_ON_ERR(update_file_info(f));
148
0
  cur_file_date = get_modification_time(f);
149
0
  return no_err;
150
0
}
151
  
152
PosibErr<void> WritableBase::load(ParmString f0, Config & config,
153
                                  DictList *, SpellerImpl *)
154
1.42k
{
155
1.42k
  set_file_name(f0);
156
1.42k
  const String f = file_name();
157
1.42k
  FStream in;
158
159
1.42k
  if (file_exists(f)) {
160
      
161
0
    RET_ON_ERR(open_file_readlock(in, f));
162
0
    if (in.peek() == EOF) return make_err(cant_read_file,f); 
163
    // ^^ FIXME 
164
0
    RET_ON_ERR(merge(in, f, &config));
165
      
166
1.42k
  } else if (f.size() >= suffix.size() &&
167
1.42k
             f.substr(f.size()-suffix.size(),suffix.size())
168
1.42k
             == suffix) {
169
      
170
1.42k
    compatibility_file_name = f.substr(0,f.size() - suffix.size());
171
1.42k
    compatibility_file_name += compatibility_suffix;
172
      
173
1.42k
    {
174
1.42k
      PosibErr<void> pe = open_file_readlock(in, compatibility_file_name);
175
1.42k
      if (pe.has_err()) {compatibility_file_name = ""; return pe;}
176
1.42k
    } {
177
0
      PosibErr<void> pe = merge(in, compatibility_file_name, &config);
178
0
      if (pe.has_err()) {compatibility_file_name = ""; return pe;}
179
0
    }
180
      
181
2
  } else {
182
      
183
2
    return make_err(cant_read_file,f);
184
      
185
2
  }
186
187
0
  return update_file_date_info(in);
188
1.42k
}
189
190
0
PosibErr<void> WritableBase::merge(ParmString f0) {
191
0
  FStream in;
192
0
  Dict::FileName fn(f0);
193
0
  RET_ON_ERR(open_file_readlock(in, fn.path()));
194
0
  RET_ON_ERR(merge(in, fn.path()));
195
0
  return no_err;
196
0
}
197
198
0
PosibErr<void> WritableBase::update(FStream & in, ParmString fn) {
199
0
  typedef PosibErr<void> Ret;
200
0
  {
201
0
    Ret pe = merge(in, fn);
202
0
    if (pe.has_err() && compatibility_file_name.empty()) return pe;
203
0
  } {
204
0
    Ret pe = update_file_date_info(in);
205
0
    if (pe.has_err() && compatibility_file_name.empty()) return pe;
206
0
  }
207
0
  return no_err;
208
0
}
209
    
210
0
PosibErr<void> WritableBase::save2(FStream & out, ParmString fn) {
211
0
  truncate_file(out, fn);
212
      
213
0
  RET_ON_ERR(save(out,fn));
214
215
0
  out.flush();
216
217
0
  return no_err;
218
0
}
219
220
0
PosibErr<void> WritableBase::save_as(ParmString fn) {
221
0
  compatibility_file_name = "";
222
0
  set_file_name(fn);
223
0
  FStream inout;
224
0
  RET_ON_ERR(open_file_writelock(inout, file_name()));
225
0
  RET_ON_ERR(save2(inout, file_name()));
226
0
  RET_ON_ERR(update_file_date_info(inout));
227
0
  return no_err;
228
0
}
229
230
0
PosibErr<void> WritableBase::save(bool do_update) {
231
0
  FStream inout;
232
0
  RET_ON_ERR_SET(open_file_writelock(inout, file_name()),
233
0
                 bool, prev_existed);
234
235
0
  if (do_update
236
0
      && prev_existed 
237
0
      && get_modification_time(inout) > cur_file_date)
238
0
    RET_ON_ERR(update(inout, file_name()));
239
240
0
  RET_ON_ERR(save2(inout, file_name()));
241
0
  RET_ON_ERR(update_file_date_info(inout));
242
    
243
0
  if (compatibility_file_name.size() != 0) {
244
0
    remove_file(compatibility_file_name.c_str());
245
0
    compatibility_file_name = "";
246
0
  }
247
248
0
  return no_err;
249
0
}
250
251
0
PosibErr<void> WritableBase::clear() {
252
0
  word_lookup->clear();
253
0
  soundslike_lookup_.clear();
254
0
  buffer.reset();
255
0
  return no_err;
256
0
}
257
258
PosibErr<void> WritableBase::set_file_encoding(ParmString enc, Config & c)
259
2.13k
{
260
2.13k
  if (enc == file_encoding) return no_err;
261
2.13k
  if (enc == "") enc = lang()->charmap();
262
2.13k
  RET_ON_ERR(iconv.setup(c, enc, lang()->charmap(), NormFrom));
263
2.13k
  RET_ON_ERR(oconv.setup(c, lang()->charmap(), enc, NormTo));
264
2.13k
  if (iconv || oconv) 
265
0
    file_encoding = enc;
266
2.13k
  else
267
2.13k
    file_encoding = "";
268
2.13k
  return no_err;
269
2.13k
}
270
271
272
/////////////////////////////////////////////////////////////////////
273
// 
274
//  Common Stuff
275
//
276
277
// a word is stored in memory as follows
278
//   <word info><size><word...><null>
279
// the hash table points to the word and not the start of the block
280
281
static inline void set_word(WordEntry & res, Str w)
282
0
{
283
0
  res.word      = w;
284
0
  res.word_size = (byte)w[-1];
285
0
  res.word_info = (byte)w[-2];
286
0
  res.aff       = "";
287
0
}
288
289
// a soundslike is stored in memory as follows
290
//   <word info><size><sl...><null>
291
// the hash table points to the sl and not the start of the block
292
293
static inline void set_sl(WordEntry & res, Str w)
294
0
{
295
0
  res.word      = w;
296
0
  res.word_size = (byte)w[-1];
297
0
}
298
299
static void soundslike_next(WordEntry * w)
300
0
{
301
0
  const Str * & i   = (const Str * &)(w->intr[0]);
302
0
  const Str *   end = (const Str *  )(w->intr[1]);
303
0
  set_word(*w, *i);
304
0
  ++i;
305
0
  if (i == end) w->adv_ = 0;
306
0
}
307
308
static void sl_init(const StrVector * tmp, WordEntry & o)
309
0
{
310
0
  const Str * i   = tmp->pbegin();
311
0
  const Str * end = tmp->pend();
312
0
  set_word(o, *i);
313
0
  ++i;
314
0
  if (i != end) {
315
0
    o.intr[0] = (void *)i;
316
0
    o.intr[1] = (void *)end;
317
0
    o.adv_ = soundslike_next;
318
0
  } else {
319
0
    o.intr[0] = 0;
320
0
  }
321
0
}
322
323
struct SoundslikeElements : public SoundslikeEnumeration {
324
325
  typedef SoundslikeLookup::const_iterator Itr;
326
327
  Itr i;
328
  Itr end;
329
330
  WordEntry d;
331
332
38.4k
  SoundslikeElements(Itr i0, Itr end0) : i(i0), end(end0) {
333
38.4k
    d.what = WordEntry::Soundslike;
334
38.4k
  }
335
336
38.4k
  WordEntry * next(int) {
337
38.4k
    if (i == end) return 0;
338
0
    set_sl(d, i->first);
339
0
    d.intr[0] = (void *)(&i->second);
340
0
    ++i;
341
0
    return &d;
342
38.4k
  }
343
};
344
345
struct CleanElements : public SoundslikeEnumeration {
346
347
  typedef WordLookup::const_iterator Itr;
348
349
  Itr i;
350
  Itr end;
351
352
  WordEntry d;
353
354
0
  CleanElements(Itr i0, Itr end0) : i(i0), end(end0) {
355
0
    d.what = WordEntry::Word;
356
0
  }
357
358
0
  WordEntry * next(int) {
359
0
    if (i == end) return 0;
360
0
    set_word(d, *i);
361
0
    ++i;
362
0
    return &d;
363
0
  }
364
};
365
366
struct ElementsParms {
367
  typedef WordEntry *                Value;
368
  typedef WordLookup::const_iterator Iterator;
369
  Iterator end_;
370
  WordEntry data;
371
0
  ElementsParms(Iterator e) : end_(e) {}
372
0
  bool endf(Iterator i) const {return i==end_;}
373
0
  Value deref(Iterator i) {set_word(data, *i); return &data;}
374
0
  static Value end_state() {return 0;}
375
};
376
377
/////////////////////////////////////////////////////////////////////
378
// 
379
//  WritableDict
380
//
381
382
class WritableDict : public WritableBase
383
{
384
public: //but don't use
385
  PosibErr<void> save(FStream &, ParmString);
386
  PosibErr<void> merge(FStream &, ParmString, Config * config);
387
388
public:
389
390
  WritableDict(const Config & cfg)
391
1.42k
    : WritableBase(basic_dict, "WritableDict", ".pws", ".per", cfg) {}
392
393
  Size   size()     const;
394
  bool   empty()    const;
395
  
396
0
  PosibErr<void> add(ParmString w) {return Dictionary::add(w);}
397
  PosibErr<void> add(ParmString w, ParmString s);
398
399
  bool lookup(ParmString word, const SensitiveCompare *, WordEntry &) const;
400
401
  bool clean_lookup(ParmString sondslike, WordEntry &) const;
402
403
  bool soundslike_lookup(const WordEntry & soundslike, WordEntry &) const;
404
  bool soundslike_lookup(ParmString soundslike, WordEntry &) const;
405
406
  WordEntryEnumeration * detailed_elements() const;
407
408
  SoundslikeEnumeration * soundslike_elements() const;
409
};
410
411
WritableDict::Size WritableDict::size() const 
412
6.33k
{
413
6.33k
  return word_lookup->size();
414
6.33k
}
415
416
bool WritableDict::empty() const 
417
0
{
418
0
  return word_lookup->empty();
419
0
}
420
421
bool WritableDict::lookup(ParmString word, const SensitiveCompare * c,
422
                          WordEntry & o) const
423
970k
{
424
970k
  o.clear();
425
970k
  pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(word));
426
970k
  while (p.first != p.second) {
427
0
    if ((*c)(word,*p.first)) {
428
0
      o.what = WordEntry::Word;
429
0
      set_word(o, *p.first);
430
0
      return true;
431
0
    }
432
0
    ++p.first;
433
0
  }
434
970k
  return false;
435
970k
}
436
437
bool WritableDict::clean_lookup(ParmString sl, WordEntry & o) const
438
10.8M
{
439
10.8M
  o.clear();
440
10.8M
  pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(sl));
441
10.8M
  if (p.first == p.second) return false;
442
0
  o.what = WordEntry::Word;
443
0
  set_word(o, *p.first);
444
0
  return true;
445
  // FIXME: Deal with multiple entries
446
10.8M
}  
447
448
bool WritableDict::soundslike_lookup(const WordEntry & word, WordEntry & o) const 
449
0
{
450
0
  if (use_soundslike) {
451
452
0
    const StrVector * tmp 
453
0
      = (const StrVector *)(word.intr[0]);
454
0
    o.clear();
455
456
0
    o.what = WordEntry::Word;
457
0
    sl_init(tmp, o);
458
459
0
  } else {
460
      
461
0
    o.what = WordEntry::Word;
462
0
    o.word = word.word;
463
0
    o.word_size = word.word_size;
464
0
    o.word_info = word.word_info;
465
0
    o.aff  = "";
466
    
467
0
  }
468
0
  return true;
469
0
}
470
471
bool WritableDict::soundslike_lookup(ParmString word, WordEntry & o) const 
472
0
{
473
0
  if (use_soundslike) {
474
475
0
    o.clear();
476
0
    SoundslikeLookup::const_iterator i = soundslike_lookup_.find(word);
477
0
    if (i == soundslike_lookup_.end()) {
478
0
      return false;
479
0
    } else {
480
0
      o.what = WordEntry::Word;
481
0
      sl_init(&i->second, o);
482
0
      return true;
483
0
    }
484
  
485
0
  } else {
486
487
0
    return WritableDict::clean_lookup(word, o);
488
489
0
  }
490
0
}
491
492
25.6k
SoundslikeEnumeration * WritableDict::soundslike_elements() const {
493
25.6k
  if (use_soundslike)
494
25.6k
    return new SoundslikeElements(soundslike_lookup_.begin(), 
495
25.6k
                                  soundslike_lookup_.end());
496
0
  else
497
0
    return new CleanElements(word_lookup->begin(),
498
0
                             word_lookup->end());
499
25.6k
}
500
501
0
WritableDict::Enum * WritableDict::detailed_elements() const {
502
0
  return new MakeEnumeration<ElementsParms>
503
0
    (word_lookup->begin(),ElementsParms(word_lookup->end()));
504
0
}
505
506
0
PosibErr<void> WritableDict::add(ParmString w, ParmString s) {
507
0
  if (validate_words)
508
0
    RET_ON_ERR(check_if_valid(*lang(),w));
509
0
  else
510
0
    RET_ON_ERR(check_if_sane(*lang(),w));
511
0
  SensitiveCompare c(lang());
512
0
  WordEntry we;
513
0
  if (WritableDict::lookup(w,&c,we)) return no_err;
514
0
  byte * w2;
515
0
  w2 = (byte *)buffer.alloc(w.size() + 3);
516
0
  *w2++ = lang()->get_word_info(w);
517
0
  *w2++ = w.size();
518
0
  memcpy(w2, w.str(), w.size() + 1);
519
0
  word_lookup->insert((char *)w2);
520
0
  if (use_soundslike) {
521
0
    byte * s2;
522
0
    s2 = (byte *)buffer.alloc(s.size() + 2);
523
0
    *s2++ = s.size();
524
0
    memcpy(s2, s.str(), s.size() + 1);
525
0
    soundslike_lookup_[(char *)s2].push_back((char *)w2);
526
0
  }
527
0
  return no_err;
528
0
}
529
530
PosibErr<void> WritableDict::merge(FStream & in, 
531
                                   ParmString file_name, 
532
                                   Config * config)
533
0
{
534
0
  typedef PosibErr<void> Ret;
535
0
  unsigned int ver;
536
537
0
  String buf;
538
0
  DataPair dp;
539
540
0
  if (!getline(in, dp, buf))
541
0
    make_err(bad_file_format, file_name);
542
543
0
  split(dp);
544
0
  if (dp.key == "personal_wl")
545
0
    ver = 10;
546
0
  else if (dp.key == "personal_ws-1.1")
547
0
    ver = 11;
548
0
  else 
549
0
    return make_err(bad_file_format, file_name);
550
551
0
  split(dp);
552
0
  {
553
0
    Ret pe = set_check_lang(dp.key, *config);
554
0
    if (pe.has_err())
555
0
      return pe.with_file(file_name);
556
0
  }
557
558
0
  split(dp); // count not used at the moment
559
560
0
  split(dp);
561
0
  if (dp.key.size > 0)
562
0
    set_file_encoding(dp.key, *config);
563
0
  else
564
0
    set_file_encoding("", *config);
565
  
566
0
  ConvP conv(iconv);
567
0
  while (getline_n_unescape(in, dp, buf)) {
568
0
    if (ver == 10)
569
0
      split(dp);
570
0
    else
571
0
      dp.key = dp.value;
572
0
    Ret pe = add(conv(dp.key));
573
0
    if (pe.has_err()) {
574
0
      clear();
575
0
      return pe.with_file(file_name);
576
0
    }
577
0
  }
578
0
  return no_err;
579
0
}
580
581
struct CStrLess {
582
0
  bool operator() (const char * x, const char * y) const {
583
0
    return strcmp(x, y) < 0;
584
0
  }
585
};
586
587
PosibErr<void> WritableDict::save(FStream & out, ParmString file_name) 
588
0
{
589
0
  out.printf("personal_ws-1.1 %s %i %s\n", 
590
0
             lang_name(), word_lookup->size(), file_encoding.c_str());
591
592
0
  Vector<const char *> words;
593
0
  words.reserve(word_lookup->size());
594
0
  for (WordLookup::const_iterator i = word_lookup->begin(), e = word_lookup->end();
595
0
       i != e; ++i)
596
0
    words.push_back(*i);
597
0
  std::sort(words.begin(), words.end(), CStrLess());
598
    
599
0
  ConvP conv(oconv);
600
0
  for (Vector<const char *>::const_iterator i = words.begin(), e = words.end();
601
0
       i != e; ++i) {
602
0
    write_n_escape(out, conv(*i));
603
0
    out << '\n';
604
0
  }
605
0
  return no_err;
606
0
}
607
608
/////////////////////////////////////////////////////////////////////
609
// 
610
//  WritableReplList
611
//
612
613
static inline StrVector * get_vector(Str s) 
614
0
{
615
0
  return (StrVector *)(s - sizeof(StrVector) - 2);
616
0
}
617
618
class WritableReplDict : public WritableBase
619
{
620
  WritableReplDict(const WritableReplDict&);
621
  WritableReplDict& operator=(const WritableReplDict&);
622
623
public:
624
  WritableReplDict(const Config & cfg)
625
    : WritableBase(replacement_dict, "WritableReplDict", ".prepl",".rpl", cfg) 
626
711
  {
627
711
    fast_lookup = true;
628
711
  }
629
  ~WritableReplDict();
630
631
  Size   size()     const;
632
  bool   empty()    const;
633
634
  bool lookup(ParmString, const SensitiveCompare *, WordEntry &) const;
635
636
  bool clean_lookup(ParmString sondslike, WordEntry &) const;
637
638
  bool soundslike_lookup(const WordEntry &, WordEntry &) const;
639
  bool soundslike_lookup(ParmString, WordEntry &) const;
640
641
  bool repl_lookup(const WordEntry &, WordEntry &) const;
642
  bool repl_lookup(ParmString, WordEntry &) const;
643
      
644
  WordEntryEnumeration * detailed_elements() const;
645
  SoundslikeEnumeration * soundslike_elements() const;
646
      
647
0
  PosibErr<void> add_repl(ParmString mis, ParmString cor) {
648
0
    return Dictionary::add_repl(mis,cor);}
649
  PosibErr<void> add_repl(ParmString mis, ParmString cor, ParmString s);
650
651
private:
652
  PosibErr<void> save(FStream &, ParmString );
653
  PosibErr<void> merge(FStream &, ParmString , Config * config);
654
};
655
656
WritableReplDict::Size WritableReplDict::size() const 
657
2.81k
{
658
2.81k
  return word_lookup->size();
659
2.81k
}
660
661
bool WritableReplDict::empty() const 
662
0
{
663
0
  return word_lookup->empty();
664
0
}
665
    
666
bool WritableReplDict::lookup(ParmString word, const SensitiveCompare * c,
667
                              WordEntry & o) const
668
0
{
669
0
  o.clear();
670
0
  pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(word));
671
0
  while (p.first != p.second) {
672
0
    if ((*c)(word,*p.first)) {
673
0
      o.what = WordEntry::Misspelled;
674
0
      set_word(o, *p.first);
675
0
      o.intr[0] = (void *)*p.first;
676
0
      return true;
677
0
    }
678
0
    ++p.first;
679
0
  }
680
0
  return false;
681
0
}
682
683
bool WritableReplDict::clean_lookup(ParmString sl, WordEntry & o) const
684
5.41M
{
685
5.41M
  o.clear();
686
5.41M
  pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(sl));
687
5.41M
  if (p.first == p.second) return false;
688
0
  o.what = WordEntry::Misspelled;
689
0
  set_word(o, *p.first);
690
0
  o.intr[0] = (void *)*p.first;
691
0
  return true;
692
  // FIXME: Deal with multiple entries
693
5.41M
}  
694
695
bool WritableReplDict::soundslike_lookup(const WordEntry & word, WordEntry & o) const 
696
0
{
697
0
  if (use_soundslike) {
698
0
    const StrVector * tmp = (const StrVector *)(word.intr[0]);
699
0
    o.clear();
700
0
    o.what = WordEntry::Misspelled;
701
0
    sl_init(tmp, o);
702
0
  } else {
703
0
    o.what = WordEntry::Misspelled;
704
0
    o.word = word.word;
705
0
    o.word_size = word.word_size;
706
0
    o.aff = "";
707
0
  }
708
0
  return true;
709
0
}
710
711
bool WritableReplDict::soundslike_lookup(ParmString soundslike, WordEntry & o) const
712
0
{
713
0
  if (use_soundslike) {
714
0
    o.clear();
715
0
    SoundslikeLookup::const_iterator i = soundslike_lookup_.find(soundslike);
716
0
    if (i == soundslike_lookup_.end()) {
717
0
      return false;
718
0
    } else {
719
0
      o.what = WordEntry::Misspelled;
720
0
      sl_init(&(i->second), o);
721
0
      return true;
722
0
    }
723
0
  } else {
724
0
    return WritableReplDict::clean_lookup(soundslike, o);
725
0
  }
726
0
}
727
728
12.8k
SoundslikeEnumeration * WritableReplDict::soundslike_elements() const {
729
12.8k
  if (use_soundslike)
730
12.8k
    return new SoundslikeElements(soundslike_lookup_.begin(), 
731
12.8k
                                  soundslike_lookup_.end());
732
0
  else
733
0
    return new CleanElements(word_lookup->begin(),
734
0
                             word_lookup->end());
735
12.8k
}
736
737
0
WritableReplDict::Enum * WritableReplDict::detailed_elements() const {
738
0
  return new MakeEnumeration<ElementsParms>
739
0
    (word_lookup->begin(),ElementsParms(word_lookup->end()));
740
0
}
741
742
static void repl_next(WordEntry * w)
743
0
{
744
0
  const Str * & i   = (const Str * &)(w->intr[0]);
745
0
  const Str *   end = (const Str *  )(w->intr[1]);
746
0
  set_word(*w, *i);
747
0
  ++i;
748
0
  if (i == end) w->adv_ = 0;
749
0
}
750
751
static void repl_init(const StrVector * tmp, WordEntry & o)
752
0
{
753
0
  o.what = WordEntry::Word;
754
0
  const Str * i   = tmp->pbegin();
755
0
  const Str * end = tmp->pend();
756
0
  set_word(o, *i);
757
0
  ++i;
758
0
  if (i != end) {
759
0
    o.intr[0] = (void *)i;
760
0
    o.intr[1] = (void *)end;
761
0
    o.adv_ = repl_next;
762
0
  } else {
763
0
    o.intr[0] = 0;
764
0
  }
765
0
}
766
  
767
bool WritableReplDict::repl_lookup(const WordEntry & w, WordEntry & o) const 
768
0
{
769
0
  const StrVector * repls;
770
0
  if (w.intr[0] && !w.intr[1]) { // the intr are not for the sl iter
771
0
    repls = get_vector(w.word);
772
0
  } else {
773
0
    SensitiveCompare c(lang()); // FIXME: This is not exactly right
774
0
    WordEntry tmp;
775
0
    WritableReplDict::lookup(w.word, &c, tmp);
776
0
    repls = get_vector(tmp.word);
777
0
    if (!repls) return false;
778
0
  }
779
0
  o.clear();
780
0
  repl_init(repls, o);
781
0
  return true;
782
0
}
783
784
bool WritableReplDict::repl_lookup(ParmString word, WordEntry & o) const 
785
0
{
786
0
  WordEntry w;
787
0
  w.word = word;
788
0
  return WritableReplDict::repl_lookup(w, o);
789
0
}
790
791
PosibErr<void> WritableReplDict::add_repl(ParmString mis, ParmString cor, ParmString sl) 
792
0
{
793
0
  Str m;
794
0
  SensitiveCompare cmp(lang()); // FIXME: I don't think this is completely correct
795
0
  WordEntry we;
796
797
0
  pair<WordLookup::iterator, WordLookup::iterator> p0(word_lookup->equal_range(mis));
798
0
  WordLookup::iterator p = p0.first;
799
800
0
  for (; p != p0.second && !cmp(mis,*p); ++p);
801
802
0
  if (p == p0.second) {
803
0
    byte * m0  = (byte *)buffer.alloc(sizeof(StrVector) + mis.size() + 3, sizeof(void *));
804
0
    new (m0) StrVector;
805
0
    m0 += sizeof(StrVector);
806
0
    *m0++ = lang()->get_word_info(mis);
807
0
    *m0++ = mis.size();
808
0
    memcpy(m0, mis.str(), mis.size() + 1);
809
0
    m = (char *)m0;
810
0
    p = word_lookup->insert(m).first;
811
0
  } else {
812
0
    m = *p;
813
0
  }
814
815
0
  StrVector * v = get_vector(m);
816
817
0
  for (StrVector::iterator i = v->begin(); i != v->end(); ++i)
818
0
    if (cmp(cor, *i)) return no_err;
819
    
820
0
  byte * c0 = (byte *)buffer.alloc(cor.size() + 3);
821
0
  *c0++ = lang()->get_word_info(cor);
822
0
  *c0++ = cor.size();
823
0
  memcpy(c0, cor.str(), cor.size() + 1);
824
0
  v->push_back((char *)c0);
825
826
0
  if (use_soundslike) {
827
0
    byte * s0 = (byte *)buffer.alloc(sl.size() + 2);
828
0
    *s0++ = sl.size();
829
0
    memcpy(s0, sl.str(), sl.size() + 1);
830
0
    soundslike_lookup_[(char *)s0].push_back(m);
831
0
  }
832
833
0
  return no_err;
834
0
}
835
836
PosibErr<void> WritableReplDict::save (FStream & out, ParmString file_name) 
837
0
{
838
0
  out.printf("personal_repl-1.1 %s 0 %s\n", lang_name(), file_encoding.c_str());
839
  
840
0
  Vector<const char *> words;
841
0
  words.reserve(word_lookup->size());
842
0
  for (WordLookup::const_iterator i = word_lookup->begin(), e = word_lookup->end();
843
0
       i != e; ++i)
844
0
    words.push_back(*i);
845
0
  std::sort(words.begin(), words.end(), CStrLess());
846
847
0
  ConvP conv1(oconv);
848
0
  ConvP conv2(oconv);
849
850
0
  Vector<const char *> v;
851
0
  for (Vector<const char *>::const_iterator i = words.begin(), e = words.end();
852
0
       i != e; ++i) 
853
0
  {
854
0
    v = *get_vector(*i); // make a copy
855
0
    std::sort(v.begin(), v.end(), CStrLess());
856
0
    for (StrVector::iterator j = v.begin(); j != v.end(); ++j)
857
0
    {
858
0
      write_n_escape(out, conv1(*i));
859
0
      out << ' ';
860
0
      write_n_escape(out, conv2(*j));
861
0
      out << '\n';
862
0
    }
863
0
  }
864
0
  return no_err;
865
0
}
866
867
PosibErr<void> WritableReplDict::merge(FStream & in,
868
                                       ParmString file_name, 
869
                                       Config * config)
870
0
{
871
0
  typedef PosibErr<void> Ret;
872
0
  unsigned int version;
873
0
  unsigned int num_words, num_repls;
874
875
0
  String buf;
876
0
  DataPair dp;
877
878
0
  if (!getline(in, dp, buf))
879
0
    make_err(bad_file_format, file_name);
880
881
0
  split(dp);
882
0
  if (dp.key == "personal_repl")
883
0
    version = 10;
884
0
  else if (dp.key == "personal_repl-1.1") 
885
0
    version = 11;
886
0
  else
887
0
    return make_err(bad_file_format, file_name);
888
889
0
  split(dp);
890
0
  {
891
0
    Ret pe = set_check_lang(dp.key, *config);
892
0
    if (pe.has_err())
893
0
      return pe.with_file(file_name);
894
0
  }
895
896
0
  unsigned int num_soundslikes = 0;
897
0
  if (version == 10) {
898
0
    split(dp);
899
0
    num_soundslikes = atoi(dp.key);
900
0
  }
901
902
0
  split(dp); // not used at the moment
903
904
0
  split(dp);
905
0
  if (dp.key.size > 0)
906
0
    set_file_encoding(dp.key, *config);
907
0
  else
908
0
    set_file_encoding("", *config);
909
910
0
  if (version == 11) {
911
912
0
    ConvP conv1(iconv);
913
0
    ConvP conv2(iconv);
914
0
    for (;;) {
915
0
      bool res = getline_n_unescape(in, buf, '\n');
916
0
      if (!res) break;
917
0
      char * mis = buf.mstr();
918
0
      char * repl = strchr(mis, ' ');
919
0
      if (!repl) continue; // bad line, ignore
920
0
      *repl = '\0'; // split string
921
0
      ++repl;
922
0
      if (!repl[0]) continue; // empty repl, ignore
923
0
      WritableReplDict::add_repl(conv1(mis), conv2(repl));
924
0
    }
925
    
926
0
  } else {
927
    
928
0
    String mis, sound, repl;
929
0
    unsigned int h,i,j;
930
0
    for (h=0; h != num_soundslikes; ++h) {
931
0
      in >> sound >> num_words;
932
0
      for (i = 0; i != num_words; ++i) {
933
0
        in >> mis >> num_repls;
934
0
        in.ignore(); // ignore space
935
0
        for (j = 0; j != num_repls; ++j) {
936
0
          in.getline(repl, ',');
937
0
          WritableReplDict::add_repl(mis, repl);
938
0
        }
939
0
      }
940
0
    }
941
942
0
  }
943
0
  return no_err;
944
0
}
945
946
WritableReplDict::~WritableReplDict()
947
711
{
948
711
  WordLookup::iterator i = word_lookup->begin();
949
711
  WordLookup::iterator e = word_lookup->end();
950
  
951
711
  for (;i != e; ++i) 
952
0
    get_vector(*i)->~StrVector();
953
711
}
954
955
}
956
957
namespace aspeller {
958
959
1.42k
  Dictionary * new_default_writable_dict(const Config & cfg) {
960
1.42k
    return new WritableDict(cfg);
961
1.42k
  }
962
963
711
  Dictionary * new_default_replacement_dict(const Config & cfg) {
964
711
    return new WritableReplDict(cfg);
965
711
  }
966
967
}