/src/aspell-fuzz/aspell_fuzzer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include <stdint.h> |
2 | | #include <stdio.h> |
3 | | #include <stdlib.h> |
4 | | #include <string.h> |
5 | | #include <sys/types.h> |
6 | | #include <libgen.h> |
7 | | #include <aspell.h> |
8 | | #include <algorithm> |
9 | | |
10 | | static int enable_diags; |
11 | | static char data_dir[1024]; |
12 | | |
13 | | #define FUZZ_DEBUG(FMT, ...) \ |
14 | 1.20M | if (enable_diags) { \ |
15 | 0 | fprintf(stderr, FMT, ##__VA_ARGS__); \ |
16 | 0 | fprintf(stderr, "\n"); \ |
17 | 0 | } |
18 | | static const size_t MAX_CONFIG_LEN = 10*1024; |
19 | | |
20 | | int parse_config(AspellConfig *spell_config, |
21 | | uint8_t *config, |
22 | | size_t config_len); |
23 | | |
24 | | // On startup, this function is called once. Use it to access argv. |
25 | 2 | extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { |
26 | 2 | char *argv0_copy = strdup((*argv)[0]); |
27 | 2 | |
28 | 2 | // Create the data dir. |
29 | 2 | snprintf(data_dir, sizeof(data_dir), "%s/dict", dirname(argv0_copy)); |
30 | 2 | |
31 | 2 | // Free off the temporary variable. |
32 | 2 | free(argv0_copy); |
33 | 2 | |
34 | 2 | printf("Init: Running with data-dir: %s\n", data_dir); |
35 | 2 | |
36 | 2 | return 0; |
37 | 2 | } |
38 | | |
39 | | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) |
40 | 5.80k | { |
41 | 5.80k | AspellCanHaveError *possible_err = NULL; |
42 | 5.80k | AspellSpeller *spell_checker = NULL; |
43 | 5.80k | AspellConfig *spell_config = NULL; |
44 | 5.80k | AspellDocumentChecker *doc_checker = NULL; |
45 | 5.80k | AspellCanHaveError *doc_err = NULL; |
46 | 5.80k | AspellToken token; |
47 | 5.80k | const char *data_str = reinterpret_cast<const char *>(data); |
48 | 5.80k | uint8_t config[MAX_CONFIG_LEN]; |
49 | 5.80k | size_t config_len; |
50 | 5.80k | int rc; |
51 | 5.80k | |
52 | 5.80k | // Enable or disable diagnostics based on the FUZZ_VERBOSE environment flag. |
53 | 5.80k | enable_diags = (getenv("FUZZ_VERBOSE") != NULL); |
54 | 5.80k | |
55 | 5.80k | // Copy up to MAX_CONFIG_LEN bytes from the data. |
56 | 5.80k | config_len = std::min(size, MAX_CONFIG_LEN); |
57 | 5.80k | memcpy(config, data, config_len); |
58 | 5.80k | |
59 | 5.80k | // Create a new configuration class. |
60 | 5.80k | spell_config = new_aspell_config(); |
61 | 5.80k | |
62 | 5.80k | // Parse configuration. Exit if the configuration was bad. |
63 | 5.80k | rc = parse_config(spell_config, config, config_len); |
64 | 5.80k | if (rc == -1) |
65 | 194 | { |
66 | 194 | FUZZ_DEBUG("Configuration parsing failed"); |
67 | 194 | goto EXIT_LABEL; |
68 | 194 | } |
69 | 5.60k | |
70 | 5.60k | // Move the data pointer past the config. |
71 | 5.60k | data_str += rc; |
72 | 5.60k | size -= rc; |
73 | 5.60k | |
74 | 5.60k | FUZZ_DEBUG("Document: %.*s", (int)size, data_str); |
75 | 5.60k | |
76 | 5.60k | // Replace the data dir with the relative directory so that it works wherever |
77 | 5.60k | // it is run from, so long as dictionary files are installed relative to it. |
78 | 5.60k | FUZZ_DEBUG("Overriding data-dir to %s", data_dir); |
79 | 5.60k | aspell_config_replace(spell_config, "data-dir", data_dir); |
80 | 5.60k | |
81 | 5.60k | // Convert the configuration to a spell checker. |
82 | 5.60k | possible_err = new_aspell_speller(spell_config); |
83 | 5.60k | if (aspell_error_number(possible_err) != 0) { |
84 | 577 | // Failed on configuration. |
85 | 577 | FUZZ_DEBUG("Failed to create speller: %s", |
86 | 577 | aspell_error_message(possible_err)); |
87 | 577 | delete_aspell_can_have_error(possible_err); |
88 | 577 | goto EXIT_LABEL; |
89 | 577 | } |
90 | 5.02k | |
91 | 5.02k | // Create a spell checker. |
92 | 5.02k | spell_checker = to_aspell_speller(possible_err); |
93 | 5.02k | |
94 | 5.02k | // Convert the spell checker to a document checker. |
95 | 5.02k | doc_err = new_aspell_document_checker(spell_checker); |
96 | 5.02k | if (aspell_error(doc_err) != 0) { |
97 | 0 | // Failed to convert to a document checker. |
98 | 0 | FUZZ_DEBUG("Failed to create document checker: %s", |
99 | 0 | aspell_error_message(doc_err)); |
100 | 0 | delete_aspell_can_have_error(doc_err); |
101 | 0 | goto EXIT_LABEL; |
102 | 0 | } |
103 | 5.02k | |
104 | 5.02k | doc_checker = to_aspell_document_checker(doc_err); |
105 | 5.02k | |
106 | 5.02k | // Process the remainder of the document. |
107 | 5.02k | aspell_document_checker_process(doc_checker, data_str, size); |
108 | 5.02k | |
109 | 5.02k | // Iterate over all misspellings. |
110 | 5.02k | token = aspell_document_checker_next_misspelling(doc_checker); |
111 | 5.02k | |
112 | 5.02k | FUZZ_DEBUG("Token len %d", token.len); |
113 | 5.02k | |
114 | 5.02k | for (; |
115 | 27.7k | token.len != 0; |
116 | 22.6k | token = aspell_document_checker_next_misspelling(doc_checker)) |
117 | 22.6k | { |
118 | 22.6k | // Get spelling suggestions for the misspelling. |
119 | 22.6k | auto word_list = aspell_speller_suggest(spell_checker, |
120 | 22.6k | data_str + token.offset, |
121 | 22.6k | token.len); |
122 | 22.6k | |
123 | 22.6k | // Iterate over the suggested replacement words in the word list. |
124 | 22.6k | AspellStringEnumeration *els = aspell_word_list_elements(word_list); |
125 | 22.6k | |
126 | 22.6k | for (const char *word = aspell_string_enumeration_next(els); |
127 | 1.11M | word != 0; |
128 | 1.09M | word = aspell_string_enumeration_next(els)) |
129 | 1.09M | { |
130 | 1.09M | // Conditionally print out the suggested replacement words. |
131 | 1.09M | FUZZ_DEBUG("Suggesting replacement for word at offset %d len %d: %s", |
132 | 1.09M | token.offset, |
133 | 1.09M | token.len, |
134 | 1.09M | word); |
135 | 1.09M | } |
136 | 22.6k | delete_aspell_string_enumeration(els); |
137 | 22.6k | } |
138 | 5.02k | |
139 | 5.80k | EXIT_LABEL: |
140 | 5.80k | |
141 | 5.80k | if (doc_checker != NULL) { |
142 | 5.02k | delete_aspell_document_checker(doc_checker); |
143 | 5.02k | } |
144 | 5.80k | |
145 | 5.80k | if (spell_checker != NULL) { |
146 | 5.02k | delete_aspell_speller(spell_checker); |
147 | 5.02k | } |
148 | 5.80k | |
149 | 5.80k | if (spell_config != NULL) { |
150 | 5.80k | delete_aspell_config(spell_config); |
151 | 5.80k | } |
152 | 5.80k | |
153 | 5.80k | return 0; |
154 | 5.02k | } |
155 | | |
156 | | // Returns -1 on error, or the number of bytes consumed from the config string |
157 | | // otherwise. |
158 | | int parse_config(AspellConfig *spell_config, |
159 | | uint8_t *config, |
160 | | size_t config_len) |
161 | 5.80k | { |
162 | 5.80k | uint8_t line[MAX_CONFIG_LEN]; |
163 | 5.80k | |
164 | 5.80k | uint8_t *config_ptr = config; |
165 | 5.80k | size_t config_ptr_used = 0; |
166 | 5.80k | |
167 | 5.80k | uint8_t *delimiter; |
168 | 5.80k | |
169 | 5.80k | // Iterate over the lines. |
170 | 5.80k | for (delimiter = (uint8_t *)memchr(config_ptr, |
171 | 5.80k | '\n', |
172 | 5.80k | config_len - config_ptr_used); |
173 | 57.9k | delimiter != NULL; |
174 | 52.1k | delimiter = (uint8_t *)memchr(config_ptr, |
175 | 52.1k | '\n', |
176 | 52.1k | config_len - config_ptr_used)) |
177 | 52.9k | { |
178 | 52.9k | int line_len = delimiter - config_ptr; |
179 | 52.9k | |
180 | 52.9k | if (line_len == 0) { |
181 | 623 | // The line is zero-length; it's the end of configuration. Skip over the |
182 | 623 | // delimiter and break out. |
183 | 623 | FUZZ_DEBUG("Breaking out of config"); |
184 | 623 | config_ptr++; |
185 | 623 | config_ptr_used++; |
186 | 623 | break; |
187 | 623 | } |
188 | 52.3k | |
189 | 52.3k | // Copy the line into the line array. Replace the newline by a null. |
190 | 52.3k | memcpy(line, config_ptr, line_len); |
191 | 52.3k | line[line_len] = 0; |
192 | 52.3k | |
193 | 52.3k | // Try and split the line by =. |
194 | 52.3k | uint8_t *kv_delim = (uint8_t *)memchr(line, '=', line_len); |
195 | 52.3k | |
196 | 52.3k | if (kv_delim == NULL) { |
197 | 194 | // Can't split as a k/v pair. Exit early. |
198 | 194 | return -1; |
199 | 194 | } |
200 | 52.1k | |
201 | 52.1k | // Convert the line into a key, value pair. |
202 | 52.1k | kv_delim[0] = 0; |
203 | 52.1k | |
204 | 52.1k | char *keyword = reinterpret_cast<char *>(line); |
205 | 52.1k | char *value = reinterpret_cast<char *>(kv_delim + 1); |
206 | 52.1k | |
207 | 52.1k | FUZZ_DEBUG("Key: %s; Value: %s", keyword, value); |
208 | 52.1k | int ok = aspell_config_replace(spell_config, keyword, value); |
209 | 52.1k | if (!ok) { |
210 | 36.8k | // Log any errors and continue. |
211 | 36.8k | FUZZ_DEBUG("Config error from aspell_config_replace: %s", |
212 | 36.8k | aspell_config_error_message(spell_config)); |
213 | 36.8k | } |
214 | 52.1k | |
215 | 52.1k | // Advance the config pointers. Make sure to add 1 for the delimiter. |
216 | 52.1k | config_ptr += (line_len + 1); |
217 | 52.1k | config_ptr_used += (line_len + 1); |
218 | 52.1k | } |
219 | 5.80k | |
220 | 5.80k | // Return how much data was used. |
221 | 5.80k | FUZZ_DEBUG("Used %zu bytes of configuration data", config_ptr_used); |
222 | 5.60k | |
223 | 5.60k | return config_ptr_used; |
224 | 5.80k | } |