/src/aspell-fuzz/aspell_fuzzer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include <stdint.h> |
2 | | #include <stdio.h> |
3 | | #include <stdlib.h> |
4 | | #include <string.h> |
5 | | #include <sys/types.h> |
6 | | #include <libgen.h> |
7 | | #include <aspell.h> |
8 | | #include <algorithm> |
9 | | |
10 | | static int enable_diags; |
11 | | static char data_dir[1024]; |
12 | | |
13 | | #define FUZZ_DEBUG(FMT, ...) \ |
14 | 655k | if (enable_diags) { \ |
15 | 0 | fprintf(stderr, FMT, ##__VA_ARGS__); \ |
16 | 0 | fprintf(stderr, "\n"); \ |
17 | 0 | } |
18 | | static const size_t MAX_CONFIG_LEN = 10*1024; |
19 | | |
20 | | int parse_config(AspellConfig *spell_config, |
21 | | uint8_t *config, |
22 | | size_t config_len); |
23 | | |
24 | | // On startup, this function is called once. Use it to access argv. |
25 | 2 | extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { |
26 | 2 | char *argv0_copy = strdup((*argv)[0]); |
27 | | |
28 | | // Create the data dir. |
29 | 2 | snprintf(data_dir, sizeof(data_dir), "%s/dict", dirname(argv0_copy)); |
30 | | |
31 | | // Free off the temporary variable. |
32 | 2 | free(argv0_copy); |
33 | | |
34 | 2 | printf("Init: Running with data-dir: %s\n", data_dir); |
35 | | |
36 | 2 | return 0; |
37 | 2 | } |
38 | | |
39 | | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) |
40 | 991 | { |
41 | 991 | AspellCanHaveError *possible_err = NULL; |
42 | 991 | AspellSpeller *spell_checker = NULL; |
43 | 991 | AspellConfig *spell_config = NULL; |
44 | 991 | AspellDocumentChecker *doc_checker = NULL; |
45 | 991 | AspellCanHaveError *doc_err = NULL; |
46 | 991 | AspellToken token; |
47 | 991 | const char *data_str = reinterpret_cast<const char *>(data); |
48 | 991 | uint8_t config[MAX_CONFIG_LEN]; |
49 | 991 | size_t config_len; |
50 | 991 | int rc; |
51 | | |
52 | | // Enable or disable diagnostics based on the FUZZ_VERBOSE environment flag. |
53 | 991 | enable_diags = (getenv("FUZZ_VERBOSE") != NULL); |
54 | | |
55 | | // Copy up to MAX_CONFIG_LEN bytes from the data. |
56 | 991 | config_len = std::min(size, MAX_CONFIG_LEN); |
57 | 991 | memcpy(config, data, config_len); |
58 | | |
59 | | // Create a new configuration class. |
60 | 991 | spell_config = new_aspell_config(); |
61 | | |
62 | | // Parse configuration. Exit if the configuration was bad. |
63 | 991 | rc = parse_config(spell_config, config, config_len); |
64 | 991 | if (rc == -1) |
65 | 21 | { |
66 | 21 | FUZZ_DEBUG("Configuration parsing failed"); |
67 | 21 | goto EXIT_LABEL; |
68 | 21 | } |
69 | | |
70 | | // Move the data pointer past the config. |
71 | 970 | data_str += rc; |
72 | 970 | size -= rc; |
73 | | |
74 | 970 | FUZZ_DEBUG("Document: %.*s", (int)size, data_str); |
75 | | |
76 | | // Replace the data dir with the relative directory so that it works wherever |
77 | | // it is run from, so long as dictionary files are installed relative to it. |
78 | 970 | FUZZ_DEBUG("Overriding data-dir to %s", data_dir); |
79 | 970 | aspell_config_replace(spell_config, "data-dir", data_dir); |
80 | | |
81 | | // Convert the configuration to a spell checker. |
82 | 970 | possible_err = new_aspell_speller(spell_config); |
83 | 970 | if (aspell_error_number(possible_err) != 0) { |
84 | | // Failed on configuration. |
85 | 138 | FUZZ_DEBUG("Failed to create speller: %s", |
86 | 138 | aspell_error_message(possible_err)); |
87 | 138 | delete_aspell_can_have_error(possible_err); |
88 | 138 | goto EXIT_LABEL; |
89 | 138 | } |
90 | | |
91 | | // Create a spell checker. |
92 | 832 | spell_checker = to_aspell_speller(possible_err); |
93 | | |
94 | | // Convert the spell checker to a document checker. |
95 | 832 | doc_err = new_aspell_document_checker(spell_checker); |
96 | 832 | if (aspell_error(doc_err) != 0) { |
97 | | // Failed to convert to a document checker. |
98 | 8 | FUZZ_DEBUG("Failed to create document checker: %s", |
99 | 8 | aspell_error_message(doc_err)); |
100 | 8 | delete_aspell_can_have_error(doc_err); |
101 | 8 | goto EXIT_LABEL; |
102 | 8 | } |
103 | | |
104 | 824 | doc_checker = to_aspell_document_checker(doc_err); |
105 | | |
106 | | // Process the remainder of the document. |
107 | 824 | aspell_document_checker_process(doc_checker, data_str, size); |
108 | | |
109 | | // Iterate over all misspellings. |
110 | 824 | token = aspell_document_checker_next_misspelling(doc_checker); |
111 | | |
112 | 824 | FUZZ_DEBUG("Token len %d", token.len); |
113 | | |
114 | 824 | for (; |
115 | 11.7k | token.len != 0; |
116 | 10.9k | token = aspell_document_checker_next_misspelling(doc_checker)) |
117 | 10.9k | { |
118 | | // Get spelling suggestions for the misspelling. |
119 | 10.9k | auto word_list = aspell_speller_suggest(spell_checker, |
120 | 10.9k | data_str + token.offset, |
121 | 10.9k | token.len); |
122 | | |
123 | | // Iterate over the suggested replacement words in the word list. |
124 | 10.9k | AspellStringEnumeration *els = aspell_word_list_elements(word_list); |
125 | | |
126 | 10.9k | for (const char *word = aspell_string_enumeration_next(els); |
127 | 616k | word != 0; |
128 | 606k | word = aspell_string_enumeration_next(els)) |
129 | 606k | { |
130 | | // Conditionally print out the suggested replacement words. |
131 | 606k | FUZZ_DEBUG("Suggesting replacement for word at offset %d len %d: %s", |
132 | 606k | token.offset, |
133 | 606k | token.len, |
134 | 606k | word); |
135 | 606k | } |
136 | 10.9k | delete_aspell_string_enumeration(els); |
137 | 10.9k | } |
138 | | |
139 | 991 | EXIT_LABEL: |
140 | | |
141 | 991 | if (doc_checker != NULL) { |
142 | 824 | delete_aspell_document_checker(doc_checker); |
143 | 824 | } |
144 | | |
145 | 991 | if (spell_checker != NULL) { |
146 | 832 | delete_aspell_speller(spell_checker); |
147 | 832 | } |
148 | | |
149 | 991 | if (spell_config != NULL) { |
150 | 991 | delete_aspell_config(spell_config); |
151 | 991 | } |
152 | | |
153 | 991 | return 0; |
154 | 824 | } |
155 | | |
156 | | // Returns -1 on error, or the number of bytes consumed from the config string |
157 | | // otherwise. |
158 | | int parse_config(AspellConfig *spell_config, |
159 | | uint8_t *config, |
160 | | size_t config_len) |
161 | 991 | { |
162 | 991 | uint8_t line[MAX_CONFIG_LEN]; |
163 | | |
164 | 991 | uint8_t *config_ptr = config; |
165 | 991 | size_t config_ptr_used = 0; |
166 | | |
167 | 991 | uint8_t *delimiter; |
168 | | |
169 | | // Iterate over the lines. |
170 | 991 | for (delimiter = (uint8_t *)memchr(config_ptr, |
171 | 991 | '\n', |
172 | 991 | config_len - config_ptr_used); |
173 | 29.5k | delimiter != NULL; |
174 | 28.5k | delimiter = (uint8_t *)memchr(config_ptr, |
175 | 28.5k | '\n', |
176 | 28.5k | config_len - config_ptr_used)) |
177 | 28.9k | { |
178 | 28.9k | int line_len = delimiter - config_ptr; |
179 | | |
180 | 28.9k | if (line_len == 0) { |
181 | | // The line is zero-length; it's the end of configuration. Skip over the |
182 | | // delimiter and break out. |
183 | 362 | FUZZ_DEBUG("Breaking out of config"); |
184 | 362 | config_ptr++; |
185 | 362 | config_ptr_used++; |
186 | 362 | break; |
187 | 362 | } |
188 | | |
189 | | // Copy the line into the line array. Replace the newline by a null. |
190 | 28.5k | memcpy(line, config_ptr, line_len); |
191 | 28.5k | line[line_len] = 0; |
192 | | |
193 | | // Try and split the line by =. |
194 | 28.5k | uint8_t *kv_delim = (uint8_t *)memchr(line, '=', line_len); |
195 | | |
196 | 28.5k | if (kv_delim == NULL) { |
197 | | // Can't split as a k/v pair. Exit early. |
198 | 21 | return -1; |
199 | 21 | } |
200 | | |
201 | | // Convert the line into a key, value pair. |
202 | 28.5k | kv_delim[0] = 0; |
203 | | |
204 | 28.5k | char *keyword = reinterpret_cast<char *>(line); |
205 | 28.5k | char *value = reinterpret_cast<char *>(kv_delim + 1); |
206 | | |
207 | 28.5k | FUZZ_DEBUG("Key: %s; Value: %s", keyword, value); |
208 | 28.5k | int ok = aspell_config_replace(spell_config, keyword, value); |
209 | 28.5k | if (!ok) { |
210 | | // Log any errors and continue. |
211 | 17.0k | FUZZ_DEBUG("Config error from aspell_config_replace: %s", |
212 | 17.0k | aspell_config_error_message(spell_config)); |
213 | 17.0k | } |
214 | | |
215 | | // Advance the config pointers. Make sure to add 1 for the delimiter. |
216 | 28.5k | config_ptr += (line_len + 1); |
217 | 28.5k | config_ptr_used += (line_len + 1); |
218 | 28.5k | } |
219 | | |
220 | | // Return how much data was used. |
221 | 970 | FUZZ_DEBUG("Used %zu bytes of configuration data", config_ptr_used); |
222 | | |
223 | 970 | return config_ptr_used; |
224 | 991 | } |