/src/aspell/modules/speller/default/speller_impl.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Aspell main C++ include file |
2 | | // Copyright 1998-2000 by Kevin Atkinson under the terms of the LGPL. |
3 | | |
4 | | #ifndef __aspeller_speller__ |
5 | | #define __aspeller_speller__ |
6 | | |
7 | | #include <vector> |
8 | | |
9 | | #include "clone_ptr.hpp" |
10 | | #include "copy_ptr.hpp" |
11 | | #include "data.hpp" |
12 | | #include "enumeration.hpp" |
13 | | #include "speller.hpp" |
14 | | #include "check_list.hpp" |
15 | | |
16 | | using namespace acommon; |
17 | | |
18 | | namespace acommon { |
19 | | class StringMap; |
20 | | class Config; |
21 | | class WordList; |
22 | | } |
23 | | // The speller class is responsible for keeping track of the |
24 | | // dictionaries coming up with suggestions and the like. Its methods |
25 | | // are NOT meant to be used my multiple threads and/or documents. |
26 | | |
27 | | namespace aspeller { |
28 | | |
29 | | class Language; |
30 | | struct SensitiveCompare; |
31 | | class Suggest; |
32 | | |
33 | | enum SpecialId {main_id, personal_id, session_id, |
34 | | personal_repl_id, none_id}; |
35 | | |
36 | | struct SpellerDict |
37 | | { |
38 | | Dict * dict; |
39 | | bool use_to_check; |
40 | | bool use_to_suggest; |
41 | | bool save_on_saveall; |
42 | | SpecialId special_id; |
43 | | SpellerDict * next; |
44 | | SpellerDict(Dict *); |
45 | | SpellerDict(Dict *, const Config &, SpecialId id = none_id); |
46 | 4.96k | ~SpellerDict() {if (dict) dict->release();} |
47 | | }; |
48 | | |
49 | | class SpellerImpl : public Speller |
50 | | { |
51 | | public: |
52 | | SpellerImpl(); // does not set anything up. |
53 | | ~SpellerImpl(); |
54 | | |
55 | | PosibErr<void> setup(Config *); |
56 | | |
57 | | void setup_tokenizer(Tokenizer *); |
58 | | |
59 | | // |
60 | | // Low level Word List Management methods |
61 | | // |
62 | | |
63 | | public: |
64 | | |
65 | | typedef Enumeration<Dict *> * WordLists; |
66 | | |
67 | | WordLists wordlists() const; |
68 | | int num_wordlists() const; |
69 | | |
70 | | const SpellerDict * locate (const Dict::Id &) const; |
71 | | |
72 | | // |
73 | | // Add a single dictionary that has not been previously added |
74 | | // |
75 | | PosibErr<void> add_dict(SpellerDict *); |
76 | | |
77 | | PosibErr<const WordList *> personal_word_list () const; |
78 | | PosibErr<const WordList *> session_word_list () const; |
79 | | PosibErr<const WordList *> main_word_list () const; |
80 | | |
81 | | // |
82 | | // Language methods |
83 | | // |
84 | | |
85 | | char * to_lower(char *); |
86 | | |
87 | | const char * lang_name() const; |
88 | | |
89 | 12.3k | const Language & lang() const {return *lang_;} |
90 | | |
91 | | // |
92 | | // Spelling methods |
93 | | // |
94 | | |
95 | | struct CompoundInfo { |
96 | | short count; |
97 | | short incorrect_count; |
98 | | CheckInfo * first_incorrect; |
99 | 10.5k | CompoundInfo() : count(0), incorrect_count(0), first_incorrect() {} |
100 | | }; |
101 | | |
102 | | PosibErr<bool> check(char * word, char * word_end, /* it WILL modify word */ |
103 | | bool try_uppercase, |
104 | | unsigned run_together_limit, |
105 | | CheckInfo *, CheckInfo *, GuessInfo *, CompoundInfo * = NULL); |
106 | | |
107 | 110k | PosibErr<bool> check(MutableString word) { |
108 | 110k | guess_info.reset(); |
109 | 110k | return check(word.begin(), word.end(), false, |
110 | 110k | unconditional_run_together_ ? run_together_limit_ : 0, |
111 | 110k | check_inf, check_inf + 8, &guess_info); |
112 | 110k | } |
113 | | PosibErr<bool> check(ParmString word) |
114 | 82.4k | { |
115 | 82.4k | size_t sz = word.size(); |
116 | 82.4k | std::vector<char> w(sz+1); |
117 | 82.4k | memcpy(&*w.begin(), word.str(), sz+1); |
118 | 82.4k | return check(MutableString(&w.front(), sz)); |
119 | 82.4k | } |
120 | 82.4k | PosibErr<bool> check(const char * word) {return check(ParmString(word));} |
121 | | PosibErr<bool> check(const char * word, size_t sz) |
122 | 0 | { |
123 | 0 | std::vector<char> w(sz+1); |
124 | 0 | memcpy(&*w.begin(), word, sz); |
125 | 0 | w[sz] = '\0'; |
126 | 0 | return check(MutableString(&w.front(), sz)); |
127 | 0 | } |
128 | | |
129 | | CheckInfo * check_runtogether(char * word, char * word_end, /* it WILL modify word */ |
130 | | bool try_uppercase, |
131 | | unsigned run_together_limit, |
132 | | CheckInfo *, CheckInfo *, |
133 | | GuessInfo *); |
134 | | |
135 | | bool check_single(char * word, /* it WILL modify word */ |
136 | | bool try_uppercase, |
137 | | CheckInfo & ci, GuessInfo * gi); |
138 | | |
139 | | bool check_affix(ParmString word, CheckInfo & ci, GuessInfo * gi); |
140 | | |
141 | | bool check_simple(ParmString, WordEntry &); |
142 | | |
143 | 0 | const CheckInfo * check_info() { |
144 | 0 | if (check_inf[0].word.str) |
145 | 0 | return check_inf; |
146 | 0 | else if (guess_info.head) |
147 | 0 | return guess_info.head; |
148 | 0 | else |
149 | 0 | return 0; |
150 | 0 | } |
151 | | |
152 | | // |
153 | | // High level Word List management methods |
154 | | // |
155 | | |
156 | | PosibErr<void> add_to_personal(MutableString word); |
157 | | PosibErr<void> add_to_session(MutableString word); |
158 | | |
159 | | PosibErr<void> save_all_word_lists(); |
160 | | |
161 | | PosibErr<void> clear_session(); |
162 | | |
163 | | PosibErr<const WordList *> suggest(MutableString word); |
164 | | // the suggestion list and the elements in it are only |
165 | | // valid until the next call to suggest. |
166 | | |
167 | | PosibErr<void> store_replacement(MutableString mis, |
168 | | MutableString cor); |
169 | | |
170 | | PosibErr<void> store_replacement(const String & mis, const String & cor, |
171 | | bool memory); |
172 | | |
173 | | // |
174 | | // Private Stuff (from here to the end of the class) |
175 | | // |
176 | | |
177 | | class DictCollection; |
178 | | class ConfigNotifier; |
179 | | |
180 | | private: |
181 | | friend class ConfigNotifier; |
182 | | |
183 | | CachePtr<const Language> lang_; |
184 | | CopyPtr<SensitiveCompare> sensitive_compare_; |
185 | | //CopyPtr<DictCollection> wls_; |
186 | | ClonePtr<Suggest> suggest_; |
187 | | ClonePtr<Suggest> intr_suggest_; |
188 | | unsigned int ignore_count; |
189 | | bool ignore_repl; |
190 | | String prev_mis_repl_; |
191 | | String prev_cor_repl_; |
192 | | |
193 | | void operator= (const SpellerImpl &other); |
194 | | SpellerImpl(const SpellerImpl &other); |
195 | | |
196 | | SpellerDict * dicts_; |
197 | | |
198 | | Dictionary * personal_; |
199 | | Dictionary * session_; |
200 | | ReplacementDict * repl_; |
201 | | Dictionary * main_; |
202 | | |
203 | | public: |
204 | | // these are public so that other classes and functions can use them, |
205 | | // DO NOT USE |
206 | | |
207 | 0 | const SensitiveCompare & sensitive_compare() const {return *sensitive_compare_;} |
208 | | |
209 | | //const DictCollection & data_set_collection() const {return *wls_;} |
210 | | |
211 | | PosibErr<void> set_check_lang(ParmString lang, ParmString lang_dir); |
212 | | |
213 | | double distance (const char *, const char *, |
214 | | const char *, const char *) const; |
215 | | |
216 | | CheckInfo check_inf[8]; |
217 | | GuessInfo guess_info; |
218 | | |
219 | | SensitiveCompare s_cmp; |
220 | | SensitiveCompare s_cmp_begin; // These (s_cmp_begin,middle,end) |
221 | | SensitiveCompare s_cmp_middle; // are used by the affix code. |
222 | | SensitiveCompare s_cmp_end; |
223 | | |
224 | | typedef Vector<const Dict *> WS; |
225 | | WS check_ws, affix_ws, suggest_ws, suggest_affix_ws; |
226 | | |
227 | | bool unconditional_run_together_; |
228 | | unsigned int run_together_limit_; |
229 | | unsigned int run_together_min_; |
230 | | |
231 | 43.5k | unsigned run_together_limit() const { |
232 | 43.5k | return unconditional_run_together_ ? run_together_limit_ : 0; |
233 | 43.5k | } |
234 | | |
235 | | bool camel_case_; |
236 | | |
237 | | bool affix_info, affix_compress; |
238 | | |
239 | | bool have_repl; |
240 | | |
241 | | bool have_soundslike; |
242 | | |
243 | | bool invisible_soundslike, soundslike_root_only; |
244 | | |
245 | | bool fast_scan, fast_lookup; |
246 | | |
247 | | bool run_together; |
248 | | |
249 | | }; |
250 | | |
251 | | struct LookupInfo { |
252 | | SpellerImpl * sp; |
253 | | enum Mode {Word, Guess, Clean, Soundslike, AlwaysTrue} mode; |
254 | | SpellerImpl::WS::const_iterator begin; |
255 | | SpellerImpl::WS::const_iterator end; |
256 | | inline LookupInfo(SpellerImpl * s, Mode m); |
257 | | // returns 0 if nothing found |
258 | | // 1 if a match is found |
259 | | // -1 if a word is found but affix doesn't match and "gi" |
260 | | int lookup (ParmString word, const SensitiveCompare * c, char aff, |
261 | | WordEntry & o, GuessInfo * gi) const; |
262 | | }; |
263 | | |
264 | | inline LookupInfo::LookupInfo(SpellerImpl * s, Mode m) |
265 | | : sp(s), mode(m) |
266 | 498k | { |
267 | 498k | switch (m) { |
268 | 20.8k | case Word: |
269 | 20.8k | begin = sp->affix_ws.begin(); |
270 | 20.8k | end = sp->affix_ws.end(); |
271 | 20.8k | return; |
272 | 241k | case Guess: |
273 | 241k | begin = sp->check_ws.begin(); |
274 | 241k | end = sp->check_ws.end(); |
275 | 241k | mode = Word; |
276 | 241k | return; |
277 | 235k | case Clean: |
278 | 235k | case Soundslike: |
279 | 235k | begin = sp->suggest_affix_ws.begin(); |
280 | 235k | end = sp->suggest_affix_ws.end(); |
281 | 235k | return; |
282 | 868 | case AlwaysTrue: |
283 | 868 | return; |
284 | 498k | } |
285 | 498k | } |
286 | | } |
287 | | |
288 | | #endif |