/src/aspell/modules/speller/default/writable.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of The New Aspell |
2 | | // Copyright (C) 2000,2011 by Kevin Atkinson under the GNU LGPL |
3 | | // license version 2.0 or 2.1. You should have received a copy of the |
4 | | // LGPL license along with this library if you did not you can find it |
5 | | // at http://www.gnu.org/. |
6 | | |
7 | | #include <time.h> |
8 | | |
9 | | #include "hash-t.hpp" |
10 | | #include "data.hpp" |
11 | | #include "data_util.hpp" |
12 | | #include "enumeration.hpp" |
13 | | #include "errors.hpp" |
14 | | #include "file_util.hpp" |
15 | | #include "fstream.hpp" |
16 | | #include "language.hpp" |
17 | | #include "getdata.hpp" |
18 | | |
19 | | namespace { |
20 | | |
21 | | ////////////////////////////////////////////////////////////////////// |
22 | | // |
23 | | // WritableBase |
24 | | // |
25 | | |
26 | | using namespace std; |
27 | | using namespace aspeller; |
28 | | using namespace acommon; |
29 | | |
30 | | typedef const char * Str; |
31 | | typedef unsigned char byte; |
32 | | |
33 | | struct Hash { |
34 | | InsensitiveHash<> f; |
35 | 2.13k | Hash(const Language * l) : f(l) {} |
36 | 17.2M | size_t operator() (Str s) const { |
37 | 17.2M | return f(s); |
38 | 17.2M | } |
39 | | }; |
40 | | |
41 | | struct Equal { |
42 | | InsensitiveEqual f; |
43 | 2.13k | Equal(const Language * l) : f(l) {} |
44 | 0 | bool operator() (Str a, Str b) const { |
45 | 0 | return f(a, b); |
46 | 0 | } |
47 | | }; |
48 | | |
49 | 0 | void write_n_escape(FStream & o, const char * str) { |
50 | 0 | while (*str != '\0') { |
51 | 0 | if (*str == '\n') o << "\\n"; |
52 | 0 | else if (*str == '\r') o << "\\r"; |
53 | 0 | else if (*str == '\\') o << "\\\\"; |
54 | 0 | else o << *str; |
55 | 0 | ++str; |
56 | 0 | } |
57 | 0 | } |
58 | | |
59 | 0 | static inline char f_getc(FStream & in) { |
60 | 0 | int c = in.get(); |
61 | 0 | return c == EOF ? '\0' : (char)c; |
62 | 0 | } |
63 | | |
64 | 0 | bool getline_n_unescape(FStream & in, String & str, char delem) { |
65 | 0 | str.clear(); |
66 | 0 | char c = f_getc(in); |
67 | 0 | if (!c) return false; |
68 | 0 | while (c && c != delem) { |
69 | 0 | if (c == '\\') { |
70 | 0 | c = f_getc(in); |
71 | 0 | if (c == 'n') str.append('\n'); |
72 | 0 | else if (c == 'r') str.append('\r'); |
73 | 0 | else if (c == '\\') str.append('\\'); |
74 | 0 | else {str.append('\\'); continue;} |
75 | 0 | } else { |
76 | 0 | str.append(c); |
77 | 0 | } |
78 | 0 | c = f_getc(in); |
79 | 0 | } |
80 | 0 | return true; |
81 | 0 | } |
82 | | |
83 | | bool getline_n_unescape(FStream & in, DataPair & d, String & buf) |
84 | 0 | { |
85 | 0 | if (!getline_n_unescape(in, buf, '\n')) return false; |
86 | 0 | d.value.str = buf.mstr(); |
87 | 0 | d.value.size = buf.size(); |
88 | 0 | return true; |
89 | 0 | } |
90 | | |
91 | | typedef Vector<Str> StrVector; |
92 | | |
93 | | typedef hash_multiset<Str,Hash,Equal> WordLookup; |
94 | | typedef hash_map<Str,StrVector> SoundslikeLookup; |
95 | | |
96 | | class WritableBase : public Dictionary { |
97 | | protected: |
98 | | String suffix; |
99 | | String compatibility_suffix; |
100 | | |
101 | | time_t cur_file_date; |
102 | | |
103 | | String compatibility_file_name; |
104 | | |
105 | | WritableBase(BasicType t, const char * n, const char * s, const char * cs, const Config & cfg) |
106 | | : Dictionary(t,n), |
107 | | suffix(s), compatibility_suffix(cs), |
108 | 2.13k | use_soundslike(true) { |
109 | 2.13k | fast_lookup = true; |
110 | 2.13k | validate_words = cfg.retrieve_bool("validate-words"); |
111 | 2.13k | } |
112 | 2.13k | virtual ~WritableBase() {} |
113 | | |
114 | | virtual PosibErr<void> save(FStream &, ParmString) = 0; |
115 | | virtual PosibErr<void> merge(FStream &, ParmString, Config * = 0) = 0; |
116 | | |
117 | | PosibErr<void> save2(FStream &, ParmString); |
118 | | PosibErr<void> update(FStream &, ParmString); |
119 | | PosibErr<void> save(bool do_update); |
120 | | PosibErr<void> update_file_date_info(FStream &); |
121 | | PosibErr<void> load(ParmString, Config &, DictList *, SpellerImpl *); |
122 | | PosibErr<void> merge(ParmString); |
123 | | PosibErr<void> save_as(ParmString); |
124 | | PosibErr<void> clear(); |
125 | | |
126 | | String file_encoding; |
127 | | ConvObj iconv; |
128 | | ConvObj oconv; |
129 | | PosibErr<void> set_file_encoding(ParmString, Config & c); |
130 | | |
131 | 0 | PosibErr<void> synchronize() {return save(true);} |
132 | 0 | PosibErr<void> save_noupdate() {return save(false);} |
133 | | |
134 | | bool use_soundslike; |
135 | | StackPtr<WordLookup> word_lookup; |
136 | | SoundslikeLookup soundslike_lookup_; |
137 | | ObjStack buffer; |
138 | | |
139 | 2.13k | void set_lang_hook(Config & c) { |
140 | 2.13k | set_file_encoding(lang()->data_encoding(), c); |
141 | 2.13k | word_lookup.reset(new WordLookup(10, Hash(lang()), Equal(lang()))); |
142 | 2.13k | use_soundslike = lang()->have_soundslike(); |
143 | 2.13k | } |
144 | | }; |
145 | | |
146 | 0 | PosibErr<void> WritableBase::update_file_date_info(FStream & f) { |
147 | 0 | RET_ON_ERR(update_file_info(f)); |
148 | 0 | cur_file_date = get_modification_time(f); |
149 | 0 | return no_err; |
150 | 0 | } |
151 | | |
152 | | PosibErr<void> WritableBase::load(ParmString f0, Config & config, |
153 | | DictList *, SpellerImpl *) |
154 | 1.42k | { |
155 | 1.42k | set_file_name(f0); |
156 | 1.42k | const String f = file_name(); |
157 | 1.42k | FStream in; |
158 | | |
159 | 1.42k | if (file_exists(f)) { |
160 | | |
161 | 0 | RET_ON_ERR(open_file_readlock(in, f)); |
162 | 0 | if (in.peek() == EOF) return make_err(cant_read_file,f); |
163 | | // ^^ FIXME |
164 | 0 | RET_ON_ERR(merge(in, f, &config)); |
165 | | |
166 | 1.42k | } else if (f.size() >= suffix.size() && |
167 | 1.42k | f.substr(f.size()-suffix.size(),suffix.size()) |
168 | 1.42k | == suffix) { |
169 | | |
170 | 1.42k | compatibility_file_name = f.substr(0,f.size() - suffix.size()); |
171 | 1.42k | compatibility_file_name += compatibility_suffix; |
172 | | |
173 | 1.42k | { |
174 | 1.42k | PosibErr<void> pe = open_file_readlock(in, compatibility_file_name); |
175 | 1.42k | if (pe.has_err()) {compatibility_file_name = ""; return pe;} |
176 | 1.42k | } { |
177 | 0 | PosibErr<void> pe = merge(in, compatibility_file_name, &config); |
178 | 0 | if (pe.has_err()) {compatibility_file_name = ""; return pe;} |
179 | 0 | } |
180 | | |
181 | 2 | } else { |
182 | | |
183 | 2 | return make_err(cant_read_file,f); |
184 | | |
185 | 2 | } |
186 | | |
187 | 0 | return update_file_date_info(in); |
188 | 1.42k | } |
189 | | |
190 | 0 | PosibErr<void> WritableBase::merge(ParmString f0) { |
191 | 0 | FStream in; |
192 | 0 | Dict::FileName fn(f0); |
193 | 0 | RET_ON_ERR(open_file_readlock(in, fn.path())); |
194 | 0 | RET_ON_ERR(merge(in, fn.path())); |
195 | 0 | return no_err; |
196 | 0 | } |
197 | | |
198 | 0 | PosibErr<void> WritableBase::update(FStream & in, ParmString fn) { |
199 | 0 | typedef PosibErr<void> Ret; |
200 | 0 | { |
201 | 0 | Ret pe = merge(in, fn); |
202 | 0 | if (pe.has_err() && compatibility_file_name.empty()) return pe; |
203 | 0 | } { |
204 | 0 | Ret pe = update_file_date_info(in); |
205 | 0 | if (pe.has_err() && compatibility_file_name.empty()) return pe; |
206 | 0 | } |
207 | 0 | return no_err; |
208 | 0 | } |
209 | | |
210 | 0 | PosibErr<void> WritableBase::save2(FStream & out, ParmString fn) { |
211 | 0 | truncate_file(out, fn); |
212 | | |
213 | 0 | RET_ON_ERR(save(out,fn)); |
214 | | |
215 | 0 | out.flush(); |
216 | |
|
217 | 0 | return no_err; |
218 | 0 | } |
219 | | |
220 | 0 | PosibErr<void> WritableBase::save_as(ParmString fn) { |
221 | 0 | compatibility_file_name = ""; |
222 | 0 | set_file_name(fn); |
223 | 0 | FStream inout; |
224 | 0 | RET_ON_ERR(open_file_writelock(inout, file_name())); |
225 | 0 | RET_ON_ERR(save2(inout, file_name())); |
226 | 0 | RET_ON_ERR(update_file_date_info(inout)); |
227 | 0 | return no_err; |
228 | 0 | } |
229 | | |
230 | 0 | PosibErr<void> WritableBase::save(bool do_update) { |
231 | 0 | FStream inout; |
232 | 0 | RET_ON_ERR_SET(open_file_writelock(inout, file_name()), |
233 | 0 | bool, prev_existed); |
234 | | |
235 | 0 | if (do_update |
236 | 0 | && prev_existed |
237 | 0 | && get_modification_time(inout) > cur_file_date) |
238 | 0 | RET_ON_ERR(update(inout, file_name())); |
239 | | |
240 | 0 | RET_ON_ERR(save2(inout, file_name())); |
241 | 0 | RET_ON_ERR(update_file_date_info(inout)); |
242 | | |
243 | 0 | if (compatibility_file_name.size() != 0) { |
244 | 0 | remove_file(compatibility_file_name.c_str()); |
245 | 0 | compatibility_file_name = ""; |
246 | 0 | } |
247 | |
|
248 | 0 | return no_err; |
249 | 0 | } |
250 | | |
251 | 0 | PosibErr<void> WritableBase::clear() { |
252 | 0 | word_lookup->clear(); |
253 | 0 | soundslike_lookup_.clear(); |
254 | 0 | buffer.reset(); |
255 | 0 | return no_err; |
256 | 0 | } |
257 | | |
258 | | PosibErr<void> WritableBase::set_file_encoding(ParmString enc, Config & c) |
259 | 2.13k | { |
260 | 2.13k | if (enc == file_encoding) return no_err; |
261 | 2.13k | if (enc == "") enc = lang()->charmap(); |
262 | 2.13k | RET_ON_ERR(iconv.setup(c, enc, lang()->charmap(), NormFrom)); |
263 | 2.13k | RET_ON_ERR(oconv.setup(c, lang()->charmap(), enc, NormTo)); |
264 | 2.13k | if (iconv || oconv) |
265 | 0 | file_encoding = enc; |
266 | 2.13k | else |
267 | 2.13k | file_encoding = ""; |
268 | 2.13k | return no_err; |
269 | 2.13k | } |
270 | | |
271 | | |
272 | | ///////////////////////////////////////////////////////////////////// |
273 | | // |
274 | | // Common Stuff |
275 | | // |
276 | | |
277 | | // a word is stored in memory as follows |
278 | | // <word info><size><word...><null> |
279 | | // the hash table points to the word and not the start of the block |
280 | | |
281 | | static inline void set_word(WordEntry & res, Str w) |
282 | 0 | { |
283 | 0 | res.word = w; |
284 | 0 | res.word_size = (byte)w[-1]; |
285 | 0 | res.word_info = (byte)w[-2]; |
286 | 0 | res.aff = ""; |
287 | 0 | } |
288 | | |
289 | | // a soundslike is stored in memory as follows |
290 | | // <word info><size><sl...><null> |
291 | | // the hash table points to the sl and not the start of the block |
292 | | |
293 | | static inline void set_sl(WordEntry & res, Str w) |
294 | 0 | { |
295 | 0 | res.word = w; |
296 | 0 | res.word_size = (byte)w[-1]; |
297 | 0 | } |
298 | | |
299 | | static void soundslike_next(WordEntry * w) |
300 | 0 | { |
301 | 0 | const Str * & i = (const Str * &)(w->intr[0]); |
302 | 0 | const Str * end = (const Str * )(w->intr[1]); |
303 | 0 | set_word(*w, *i); |
304 | 0 | ++i; |
305 | 0 | if (i == end) w->adv_ = 0; |
306 | 0 | } |
307 | | |
308 | | static void sl_init(const StrVector * tmp, WordEntry & o) |
309 | 0 | { |
310 | 0 | const Str * i = tmp->pbegin(); |
311 | 0 | const Str * end = tmp->pend(); |
312 | 0 | set_word(o, *i); |
313 | 0 | ++i; |
314 | 0 | if (i != end) { |
315 | 0 | o.intr[0] = (void *)i; |
316 | 0 | o.intr[1] = (void *)end; |
317 | 0 | o.adv_ = soundslike_next; |
318 | 0 | } else { |
319 | 0 | o.intr[0] = 0; |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | | struct SoundslikeElements : public SoundslikeEnumeration { |
324 | | |
325 | | typedef SoundslikeLookup::const_iterator Itr; |
326 | | |
327 | | Itr i; |
328 | | Itr end; |
329 | | |
330 | | WordEntry d; |
331 | | |
332 | 38.4k | SoundslikeElements(Itr i0, Itr end0) : i(i0), end(end0) { |
333 | 38.4k | d.what = WordEntry::Soundslike; |
334 | 38.4k | } |
335 | | |
336 | 38.4k | WordEntry * next(int) { |
337 | 38.4k | if (i == end) return 0; |
338 | 0 | set_sl(d, i->first); |
339 | 0 | d.intr[0] = (void *)(&i->second); |
340 | 0 | ++i; |
341 | 0 | return &d; |
342 | 38.4k | } |
343 | | }; |
344 | | |
345 | | struct CleanElements : public SoundslikeEnumeration { |
346 | | |
347 | | typedef WordLookup::const_iterator Itr; |
348 | | |
349 | | Itr i; |
350 | | Itr end; |
351 | | |
352 | | WordEntry d; |
353 | | |
354 | 0 | CleanElements(Itr i0, Itr end0) : i(i0), end(end0) { |
355 | 0 | d.what = WordEntry::Word; |
356 | 0 | } |
357 | | |
358 | 0 | WordEntry * next(int) { |
359 | 0 | if (i == end) return 0; |
360 | 0 | set_word(d, *i); |
361 | 0 | ++i; |
362 | 0 | return &d; |
363 | 0 | } |
364 | | }; |
365 | | |
366 | | struct ElementsParms { |
367 | | typedef WordEntry * Value; |
368 | | typedef WordLookup::const_iterator Iterator; |
369 | | Iterator end_; |
370 | | WordEntry data; |
371 | 0 | ElementsParms(Iterator e) : end_(e) {} |
372 | 0 | bool endf(Iterator i) const {return i==end_;} |
373 | 0 | Value deref(Iterator i) {set_word(data, *i); return &data;} |
374 | 0 | static Value end_state() {return 0;} |
375 | | }; |
376 | | |
377 | | ///////////////////////////////////////////////////////////////////// |
378 | | // |
379 | | // WritableDict |
380 | | // |
381 | | |
382 | | class WritableDict : public WritableBase |
383 | | { |
384 | | public: //but don't use |
385 | | PosibErr<void> save(FStream &, ParmString); |
386 | | PosibErr<void> merge(FStream &, ParmString, Config * config); |
387 | | |
388 | | public: |
389 | | |
390 | | WritableDict(const Config & cfg) |
391 | 1.42k | : WritableBase(basic_dict, "WritableDict", ".pws", ".per", cfg) {} |
392 | | |
393 | | Size size() const; |
394 | | bool empty() const; |
395 | | |
396 | 0 | PosibErr<void> add(ParmString w) {return Dictionary::add(w);} |
397 | | PosibErr<void> add(ParmString w, ParmString s); |
398 | | |
399 | | bool lookup(ParmString word, const SensitiveCompare *, WordEntry &) const; |
400 | | |
401 | | bool clean_lookup(ParmString sondslike, WordEntry &) const; |
402 | | |
403 | | bool soundslike_lookup(const WordEntry & soundslike, WordEntry &) const; |
404 | | bool soundslike_lookup(ParmString soundslike, WordEntry &) const; |
405 | | |
406 | | WordEntryEnumeration * detailed_elements() const; |
407 | | |
408 | | SoundslikeEnumeration * soundslike_elements() const; |
409 | | }; |
410 | | |
411 | | WritableDict::Size WritableDict::size() const |
412 | 6.33k | { |
413 | 6.33k | return word_lookup->size(); |
414 | 6.33k | } |
415 | | |
416 | | bool WritableDict::empty() const |
417 | 0 | { |
418 | 0 | return word_lookup->empty(); |
419 | 0 | } |
420 | | |
421 | | bool WritableDict::lookup(ParmString word, const SensitiveCompare * c, |
422 | | WordEntry & o) const |
423 | 970k | { |
424 | 970k | o.clear(); |
425 | 970k | pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(word)); |
426 | 970k | while (p.first != p.second) { |
427 | 0 | if ((*c)(word,*p.first)) { |
428 | 0 | o.what = WordEntry::Word; |
429 | 0 | set_word(o, *p.first); |
430 | 0 | return true; |
431 | 0 | } |
432 | 0 | ++p.first; |
433 | 0 | } |
434 | 970k | return false; |
435 | 970k | } |
436 | | |
437 | | bool WritableDict::clean_lookup(ParmString sl, WordEntry & o) const |
438 | 10.8M | { |
439 | 10.8M | o.clear(); |
440 | 10.8M | pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(sl)); |
441 | 10.8M | if (p.first == p.second) return false; |
442 | 0 | o.what = WordEntry::Word; |
443 | 0 | set_word(o, *p.first); |
444 | 0 | return true; |
445 | | // FIXME: Deal with multiple entries |
446 | 10.8M | } |
447 | | |
448 | | bool WritableDict::soundslike_lookup(const WordEntry & word, WordEntry & o) const |
449 | 0 | { |
450 | 0 | if (use_soundslike) { |
451 | |
|
452 | 0 | const StrVector * tmp |
453 | 0 | = (const StrVector *)(word.intr[0]); |
454 | 0 | o.clear(); |
455 | |
|
456 | 0 | o.what = WordEntry::Word; |
457 | 0 | sl_init(tmp, o); |
458 | |
|
459 | 0 | } else { |
460 | | |
461 | 0 | o.what = WordEntry::Word; |
462 | 0 | o.word = word.word; |
463 | 0 | o.word_size = word.word_size; |
464 | 0 | o.word_info = word.word_info; |
465 | 0 | o.aff = ""; |
466 | | |
467 | 0 | } |
468 | 0 | return true; |
469 | 0 | } |
470 | | |
471 | | bool WritableDict::soundslike_lookup(ParmString word, WordEntry & o) const |
472 | 0 | { |
473 | 0 | if (use_soundslike) { |
474 | |
|
475 | 0 | o.clear(); |
476 | 0 | SoundslikeLookup::const_iterator i = soundslike_lookup_.find(word); |
477 | 0 | if (i == soundslike_lookup_.end()) { |
478 | 0 | return false; |
479 | 0 | } else { |
480 | 0 | o.what = WordEntry::Word; |
481 | 0 | sl_init(&i->second, o); |
482 | 0 | return true; |
483 | 0 | } |
484 | | |
485 | 0 | } else { |
486 | |
|
487 | 0 | return WritableDict::clean_lookup(word, o); |
488 | |
|
489 | 0 | } |
490 | 0 | } |
491 | | |
492 | 25.6k | SoundslikeEnumeration * WritableDict::soundslike_elements() const { |
493 | 25.6k | if (use_soundslike) |
494 | 25.6k | return new SoundslikeElements(soundslike_lookup_.begin(), |
495 | 25.6k | soundslike_lookup_.end()); |
496 | 0 | else |
497 | 0 | return new CleanElements(word_lookup->begin(), |
498 | 0 | word_lookup->end()); |
499 | 25.6k | } |
500 | | |
501 | 0 | WritableDict::Enum * WritableDict::detailed_elements() const { |
502 | 0 | return new MakeEnumeration<ElementsParms> |
503 | 0 | (word_lookup->begin(),ElementsParms(word_lookup->end())); |
504 | 0 | } |
505 | | |
506 | 0 | PosibErr<void> WritableDict::add(ParmString w, ParmString s) { |
507 | 0 | if (validate_words) |
508 | 0 | RET_ON_ERR(check_if_valid(*lang(),w)); |
509 | 0 | else |
510 | 0 | RET_ON_ERR(check_if_sane(*lang(),w)); |
511 | 0 | SensitiveCompare c(lang()); |
512 | 0 | WordEntry we; |
513 | 0 | if (WritableDict::lookup(w,&c,we)) return no_err; |
514 | 0 | byte * w2; |
515 | 0 | w2 = (byte *)buffer.alloc(w.size() + 3); |
516 | 0 | *w2++ = lang()->get_word_info(w); |
517 | 0 | *w2++ = w.size(); |
518 | 0 | memcpy(w2, w.str(), w.size() + 1); |
519 | 0 | word_lookup->insert((char *)w2); |
520 | 0 | if (use_soundslike) { |
521 | 0 | byte * s2; |
522 | 0 | s2 = (byte *)buffer.alloc(s.size() + 2); |
523 | 0 | *s2++ = s.size(); |
524 | 0 | memcpy(s2, s.str(), s.size() + 1); |
525 | 0 | soundslike_lookup_[(char *)s2].push_back((char *)w2); |
526 | 0 | } |
527 | 0 | return no_err; |
528 | 0 | } |
529 | | |
530 | | PosibErr<void> WritableDict::merge(FStream & in, |
531 | | ParmString file_name, |
532 | | Config * config) |
533 | 0 | { |
534 | 0 | typedef PosibErr<void> Ret; |
535 | 0 | unsigned int ver; |
536 | |
|
537 | 0 | String buf; |
538 | 0 | DataPair dp; |
539 | |
|
540 | 0 | if (!getline(in, dp, buf)) |
541 | 0 | make_err(bad_file_format, file_name); |
542 | |
|
543 | 0 | split(dp); |
544 | 0 | if (dp.key == "personal_wl") |
545 | 0 | ver = 10; |
546 | 0 | else if (dp.key == "personal_ws-1.1") |
547 | 0 | ver = 11; |
548 | 0 | else |
549 | 0 | return make_err(bad_file_format, file_name); |
550 | | |
551 | 0 | split(dp); |
552 | 0 | { |
553 | 0 | Ret pe = set_check_lang(dp.key, *config); |
554 | 0 | if (pe.has_err()) |
555 | 0 | return pe.with_file(file_name); |
556 | 0 | } |
557 | | |
558 | 0 | split(dp); // count not used at the moment |
559 | |
|
560 | 0 | split(dp); |
561 | 0 | if (dp.key.size > 0) |
562 | 0 | set_file_encoding(dp.key, *config); |
563 | 0 | else |
564 | 0 | set_file_encoding("", *config); |
565 | | |
566 | 0 | ConvP conv(iconv); |
567 | 0 | while (getline_n_unescape(in, dp, buf)) { |
568 | 0 | if (ver == 10) |
569 | 0 | split(dp); |
570 | 0 | else |
571 | 0 | dp.key = dp.value; |
572 | 0 | Ret pe = add(conv(dp.key)); |
573 | 0 | if (pe.has_err()) { |
574 | 0 | clear(); |
575 | 0 | return pe.with_file(file_name); |
576 | 0 | } |
577 | 0 | } |
578 | 0 | return no_err; |
579 | 0 | } |
580 | | |
581 | | struct CStrLess { |
582 | 0 | bool operator() (const char * x, const char * y) const { |
583 | 0 | return strcmp(x, y) < 0; |
584 | 0 | } |
585 | | }; |
586 | | |
587 | | PosibErr<void> WritableDict::save(FStream & out, ParmString file_name) |
588 | 0 | { |
589 | 0 | out.printf("personal_ws-1.1 %s %i %s\n", |
590 | 0 | lang_name(), word_lookup->size(), file_encoding.c_str()); |
591 | |
|
592 | 0 | Vector<const char *> words; |
593 | 0 | words.reserve(word_lookup->size()); |
594 | 0 | for (WordLookup::const_iterator i = word_lookup->begin(), e = word_lookup->end(); |
595 | 0 | i != e; ++i) |
596 | 0 | words.push_back(*i); |
597 | 0 | std::sort(words.begin(), words.end(), CStrLess()); |
598 | | |
599 | 0 | ConvP conv(oconv); |
600 | 0 | for (Vector<const char *>::const_iterator i = words.begin(), e = words.end(); |
601 | 0 | i != e; ++i) { |
602 | 0 | write_n_escape(out, conv(*i)); |
603 | 0 | out << '\n'; |
604 | 0 | } |
605 | 0 | return no_err; |
606 | 0 | } |
607 | | |
608 | | ///////////////////////////////////////////////////////////////////// |
609 | | // |
610 | | // WritableReplList |
611 | | // |
612 | | |
613 | | static inline StrVector * get_vector(Str s) |
614 | 0 | { |
615 | 0 | return (StrVector *)(s - sizeof(StrVector) - 2); |
616 | 0 | } |
617 | | |
618 | | class WritableReplDict : public WritableBase |
619 | | { |
620 | | WritableReplDict(const WritableReplDict&); |
621 | | WritableReplDict& operator=(const WritableReplDict&); |
622 | | |
623 | | public: |
624 | | WritableReplDict(const Config & cfg) |
625 | | : WritableBase(replacement_dict, "WritableReplDict", ".prepl",".rpl", cfg) |
626 | 711 | { |
627 | 711 | fast_lookup = true; |
628 | 711 | } |
629 | | ~WritableReplDict(); |
630 | | |
631 | | Size size() const; |
632 | | bool empty() const; |
633 | | |
634 | | bool lookup(ParmString, const SensitiveCompare *, WordEntry &) const; |
635 | | |
636 | | bool clean_lookup(ParmString sondslike, WordEntry &) const; |
637 | | |
638 | | bool soundslike_lookup(const WordEntry &, WordEntry &) const; |
639 | | bool soundslike_lookup(ParmString, WordEntry &) const; |
640 | | |
641 | | bool repl_lookup(const WordEntry &, WordEntry &) const; |
642 | | bool repl_lookup(ParmString, WordEntry &) const; |
643 | | |
644 | | WordEntryEnumeration * detailed_elements() const; |
645 | | SoundslikeEnumeration * soundslike_elements() const; |
646 | | |
647 | 0 | PosibErr<void> add_repl(ParmString mis, ParmString cor) { |
648 | 0 | return Dictionary::add_repl(mis,cor);} |
649 | | PosibErr<void> add_repl(ParmString mis, ParmString cor, ParmString s); |
650 | | |
651 | | private: |
652 | | PosibErr<void> save(FStream &, ParmString ); |
653 | | PosibErr<void> merge(FStream &, ParmString , Config * config); |
654 | | }; |
655 | | |
656 | | WritableReplDict::Size WritableReplDict::size() const |
657 | 2.81k | { |
658 | 2.81k | return word_lookup->size(); |
659 | 2.81k | } |
660 | | |
661 | | bool WritableReplDict::empty() const |
662 | 0 | { |
663 | 0 | return word_lookup->empty(); |
664 | 0 | } |
665 | | |
666 | | bool WritableReplDict::lookup(ParmString word, const SensitiveCompare * c, |
667 | | WordEntry & o) const |
668 | 0 | { |
669 | 0 | o.clear(); |
670 | 0 | pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(word)); |
671 | 0 | while (p.first != p.second) { |
672 | 0 | if ((*c)(word,*p.first)) { |
673 | 0 | o.what = WordEntry::Misspelled; |
674 | 0 | set_word(o, *p.first); |
675 | 0 | o.intr[0] = (void *)*p.first; |
676 | 0 | return true; |
677 | 0 | } |
678 | 0 | ++p.first; |
679 | 0 | } |
680 | 0 | return false; |
681 | 0 | } |
682 | | |
683 | | bool WritableReplDict::clean_lookup(ParmString sl, WordEntry & o) const |
684 | 5.41M | { |
685 | 5.41M | o.clear(); |
686 | 5.41M | pair<WordLookup::iterator, WordLookup::iterator> p(word_lookup->equal_range(sl)); |
687 | 5.41M | if (p.first == p.second) return false; |
688 | 0 | o.what = WordEntry::Misspelled; |
689 | 0 | set_word(o, *p.first); |
690 | 0 | o.intr[0] = (void *)*p.first; |
691 | 0 | return true; |
692 | | // FIXME: Deal with multiple entries |
693 | 5.41M | } |
694 | | |
695 | | bool WritableReplDict::soundslike_lookup(const WordEntry & word, WordEntry & o) const |
696 | 0 | { |
697 | 0 | if (use_soundslike) { |
698 | 0 | const StrVector * tmp = (const StrVector *)(word.intr[0]); |
699 | 0 | o.clear(); |
700 | 0 | o.what = WordEntry::Misspelled; |
701 | 0 | sl_init(tmp, o); |
702 | 0 | } else { |
703 | 0 | o.what = WordEntry::Misspelled; |
704 | 0 | o.word = word.word; |
705 | 0 | o.word_size = word.word_size; |
706 | 0 | o.aff = ""; |
707 | 0 | } |
708 | 0 | return true; |
709 | 0 | } |
710 | | |
711 | | bool WritableReplDict::soundslike_lookup(ParmString soundslike, WordEntry & o) const |
712 | 0 | { |
713 | 0 | if (use_soundslike) { |
714 | 0 | o.clear(); |
715 | 0 | SoundslikeLookup::const_iterator i = soundslike_lookup_.find(soundslike); |
716 | 0 | if (i == soundslike_lookup_.end()) { |
717 | 0 | return false; |
718 | 0 | } else { |
719 | 0 | o.what = WordEntry::Misspelled; |
720 | 0 | sl_init(&(i->second), o); |
721 | 0 | return true; |
722 | 0 | } |
723 | 0 | } else { |
724 | 0 | return WritableReplDict::clean_lookup(soundslike, o); |
725 | 0 | } |
726 | 0 | } |
727 | | |
728 | 12.8k | SoundslikeEnumeration * WritableReplDict::soundslike_elements() const { |
729 | 12.8k | if (use_soundslike) |
730 | 12.8k | return new SoundslikeElements(soundslike_lookup_.begin(), |
731 | 12.8k | soundslike_lookup_.end()); |
732 | 0 | else |
733 | 0 | return new CleanElements(word_lookup->begin(), |
734 | 0 | word_lookup->end()); |
735 | 12.8k | } |
736 | | |
737 | 0 | WritableReplDict::Enum * WritableReplDict::detailed_elements() const { |
738 | 0 | return new MakeEnumeration<ElementsParms> |
739 | 0 | (word_lookup->begin(),ElementsParms(word_lookup->end())); |
740 | 0 | } |
741 | | |
742 | | static void repl_next(WordEntry * w) |
743 | 0 | { |
744 | 0 | const Str * & i = (const Str * &)(w->intr[0]); |
745 | 0 | const Str * end = (const Str * )(w->intr[1]); |
746 | 0 | set_word(*w, *i); |
747 | 0 | ++i; |
748 | 0 | if (i == end) w->adv_ = 0; |
749 | 0 | } |
750 | | |
751 | | static void repl_init(const StrVector * tmp, WordEntry & o) |
752 | 0 | { |
753 | 0 | o.what = WordEntry::Word; |
754 | 0 | const Str * i = tmp->pbegin(); |
755 | 0 | const Str * end = tmp->pend(); |
756 | 0 | set_word(o, *i); |
757 | 0 | ++i; |
758 | 0 | if (i != end) { |
759 | 0 | o.intr[0] = (void *)i; |
760 | 0 | o.intr[1] = (void *)end; |
761 | 0 | o.adv_ = repl_next; |
762 | 0 | } else { |
763 | 0 | o.intr[0] = 0; |
764 | 0 | } |
765 | 0 | } |
766 | | |
767 | | bool WritableReplDict::repl_lookup(const WordEntry & w, WordEntry & o) const |
768 | 0 | { |
769 | 0 | const StrVector * repls; |
770 | 0 | if (w.intr[0] && !w.intr[1]) { // the intr are not for the sl iter |
771 | 0 | repls = get_vector(w.word); |
772 | 0 | } else { |
773 | 0 | SensitiveCompare c(lang()); // FIXME: This is not exactly right |
774 | 0 | WordEntry tmp; |
775 | 0 | WritableReplDict::lookup(w.word, &c, tmp); |
776 | 0 | repls = get_vector(tmp.word); |
777 | 0 | if (!repls) return false; |
778 | 0 | } |
779 | 0 | o.clear(); |
780 | 0 | repl_init(repls, o); |
781 | 0 | return true; |
782 | 0 | } |
783 | | |
784 | | bool WritableReplDict::repl_lookup(ParmString word, WordEntry & o) const |
785 | 0 | { |
786 | 0 | WordEntry w; |
787 | 0 | w.word = word; |
788 | 0 | return WritableReplDict::repl_lookup(w, o); |
789 | 0 | } |
790 | | |
791 | | PosibErr<void> WritableReplDict::add_repl(ParmString mis, ParmString cor, ParmString sl) |
792 | 0 | { |
793 | 0 | Str m; |
794 | 0 | SensitiveCompare cmp(lang()); // FIXME: I don't think this is completely correct |
795 | 0 | WordEntry we; |
796 | |
|
797 | 0 | pair<WordLookup::iterator, WordLookup::iterator> p0(word_lookup->equal_range(mis)); |
798 | 0 | WordLookup::iterator p = p0.first; |
799 | |
|
800 | 0 | for (; p != p0.second && !cmp(mis,*p); ++p); |
801 | |
|
802 | 0 | if (p == p0.second) { |
803 | 0 | byte * m0 = (byte *)buffer.alloc(sizeof(StrVector) + mis.size() + 3, sizeof(void *)); |
804 | 0 | new (m0) StrVector; |
805 | 0 | m0 += sizeof(StrVector); |
806 | 0 | *m0++ = lang()->get_word_info(mis); |
807 | 0 | *m0++ = mis.size(); |
808 | 0 | memcpy(m0, mis.str(), mis.size() + 1); |
809 | 0 | m = (char *)m0; |
810 | 0 | p = word_lookup->insert(m).first; |
811 | 0 | } else { |
812 | 0 | m = *p; |
813 | 0 | } |
814 | |
|
815 | 0 | StrVector * v = get_vector(m); |
816 | |
|
817 | 0 | for (StrVector::iterator i = v->begin(); i != v->end(); ++i) |
818 | 0 | if (cmp(cor, *i)) return no_err; |
819 | | |
820 | 0 | byte * c0 = (byte *)buffer.alloc(cor.size() + 3); |
821 | 0 | *c0++ = lang()->get_word_info(cor); |
822 | 0 | *c0++ = cor.size(); |
823 | 0 | memcpy(c0, cor.str(), cor.size() + 1); |
824 | 0 | v->push_back((char *)c0); |
825 | |
|
826 | 0 | if (use_soundslike) { |
827 | 0 | byte * s0 = (byte *)buffer.alloc(sl.size() + 2); |
828 | 0 | *s0++ = sl.size(); |
829 | 0 | memcpy(s0, sl.str(), sl.size() + 1); |
830 | 0 | soundslike_lookup_[(char *)s0].push_back(m); |
831 | 0 | } |
832 | |
|
833 | 0 | return no_err; |
834 | 0 | } |
835 | | |
836 | | PosibErr<void> WritableReplDict::save (FStream & out, ParmString file_name) |
837 | 0 | { |
838 | 0 | out.printf("personal_repl-1.1 %s 0 %s\n", lang_name(), file_encoding.c_str()); |
839 | | |
840 | 0 | Vector<const char *> words; |
841 | 0 | words.reserve(word_lookup->size()); |
842 | 0 | for (WordLookup::const_iterator i = word_lookup->begin(), e = word_lookup->end(); |
843 | 0 | i != e; ++i) |
844 | 0 | words.push_back(*i); |
845 | 0 | std::sort(words.begin(), words.end(), CStrLess()); |
846 | |
|
847 | 0 | ConvP conv1(oconv); |
848 | 0 | ConvP conv2(oconv); |
849 | |
|
850 | 0 | Vector<const char *> v; |
851 | 0 | for (Vector<const char *>::const_iterator i = words.begin(), e = words.end(); |
852 | 0 | i != e; ++i) |
853 | 0 | { |
854 | 0 | v = *get_vector(*i); // make a copy |
855 | 0 | std::sort(v.begin(), v.end(), CStrLess()); |
856 | 0 | for (StrVector::iterator j = v.begin(); j != v.end(); ++j) |
857 | 0 | { |
858 | 0 | write_n_escape(out, conv1(*i)); |
859 | 0 | out << ' '; |
860 | 0 | write_n_escape(out, conv2(*j)); |
861 | 0 | out << '\n'; |
862 | 0 | } |
863 | 0 | } |
864 | 0 | return no_err; |
865 | 0 | } |
866 | | |
867 | | PosibErr<void> WritableReplDict::merge(FStream & in, |
868 | | ParmString file_name, |
869 | | Config * config) |
870 | 0 | { |
871 | 0 | typedef PosibErr<void> Ret; |
872 | 0 | unsigned int version; |
873 | 0 | unsigned int num_words, num_repls; |
874 | |
|
875 | 0 | String buf; |
876 | 0 | DataPair dp; |
877 | |
|
878 | 0 | if (!getline(in, dp, buf)) |
879 | 0 | make_err(bad_file_format, file_name); |
880 | |
|
881 | 0 | split(dp); |
882 | 0 | if (dp.key == "personal_repl") |
883 | 0 | version = 10; |
884 | 0 | else if (dp.key == "personal_repl-1.1") |
885 | 0 | version = 11; |
886 | 0 | else |
887 | 0 | return make_err(bad_file_format, file_name); |
888 | | |
889 | 0 | split(dp); |
890 | 0 | { |
891 | 0 | Ret pe = set_check_lang(dp.key, *config); |
892 | 0 | if (pe.has_err()) |
893 | 0 | return pe.with_file(file_name); |
894 | 0 | } |
895 | | |
896 | 0 | unsigned int num_soundslikes = 0; |
897 | 0 | if (version == 10) { |
898 | 0 | split(dp); |
899 | 0 | num_soundslikes = atoi(dp.key); |
900 | 0 | } |
901 | |
|
902 | 0 | split(dp); // not used at the moment |
903 | |
|
904 | 0 | split(dp); |
905 | 0 | if (dp.key.size > 0) |
906 | 0 | set_file_encoding(dp.key, *config); |
907 | 0 | else |
908 | 0 | set_file_encoding("", *config); |
909 | |
|
910 | 0 | if (version == 11) { |
911 | |
|
912 | 0 | ConvP conv1(iconv); |
913 | 0 | ConvP conv2(iconv); |
914 | 0 | for (;;) { |
915 | 0 | bool res = getline_n_unescape(in, buf, '\n'); |
916 | 0 | if (!res) break; |
917 | 0 | char * mis = buf.mstr(); |
918 | 0 | char * repl = strchr(mis, ' '); |
919 | 0 | if (!repl) continue; // bad line, ignore |
920 | 0 | *repl = '\0'; // split string |
921 | 0 | ++repl; |
922 | 0 | if (!repl[0]) continue; // empty repl, ignore |
923 | 0 | WritableReplDict::add_repl(conv1(mis), conv2(repl)); |
924 | 0 | } |
925 | | |
926 | 0 | } else { |
927 | | |
928 | 0 | String mis, sound, repl; |
929 | 0 | unsigned int h,i,j; |
930 | 0 | for (h=0; h != num_soundslikes; ++h) { |
931 | 0 | in >> sound >> num_words; |
932 | 0 | for (i = 0; i != num_words; ++i) { |
933 | 0 | in >> mis >> num_repls; |
934 | 0 | in.ignore(); // ignore space |
935 | 0 | for (j = 0; j != num_repls; ++j) { |
936 | 0 | in.getline(repl, ','); |
937 | 0 | WritableReplDict::add_repl(mis, repl); |
938 | 0 | } |
939 | 0 | } |
940 | 0 | } |
941 | |
|
942 | 0 | } |
943 | 0 | return no_err; |
944 | 0 | } |
945 | | |
946 | | WritableReplDict::~WritableReplDict() |
947 | 711 | { |
948 | 711 | WordLookup::iterator i = word_lookup->begin(); |
949 | 711 | WordLookup::iterator e = word_lookup->end(); |
950 | | |
951 | 711 | for (;i != e; ++i) |
952 | 0 | get_vector(*i)->~StrVector(); |
953 | 711 | } |
954 | | |
955 | | } |
956 | | |
957 | | namespace aspeller { |
958 | | |
959 | 1.42k | Dictionary * new_default_writable_dict(const Config & cfg) { |
960 | 1.42k | return new WritableDict(cfg); |
961 | 1.42k | } |
962 | | |
963 | 711 | Dictionary * new_default_replacement_dict(const Config & cfg) { |
964 | 711 | return new WritableReplDict(cfg); |
965 | 711 | } |
966 | | |
967 | | } |