/src/libredwg/examples/llvmfuzz.c
Line | Count | Source |
1 | | /*****************************************************************************/ |
2 | | /* LibreDWG - free implementation of the DWG file format */ |
3 | | /* */ |
4 | | /* Copyright (C) 2021, 2023 Free Software Foundation, Inc. */ |
5 | | /* */ |
6 | | /* This library is free software, licensed under the terms of the GNU */ |
7 | | /* General Public License as published by the Free Software Foundation, */ |
8 | | /* either version 3 of the License, or (at your option) any later version. */ |
9 | | /* You should have received a copy of the GNU General Public License */ |
10 | | /* along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
11 | | /*****************************************************************************/ |
12 | | |
13 | | /* |
14 | | * llvmfuzz.c: libfuzzer testing, esp. for oss-fuzz. with libfuzzer or |
15 | | * standalone written by Reini Urban |
16 | | */ |
17 | | |
18 | | #include <stdio.h> |
19 | | #include <stdlib.h> |
20 | | #include <assert.h> |
21 | | // #include <unistd.h> |
22 | | #include <sys/stat.h> |
23 | | |
24 | | #include "common.h" |
25 | | #include <dwg.h> |
26 | | #ifdef HAVE_SYS_TIME_H |
27 | | # include <sys/time.h> |
28 | | #endif |
29 | | #include "decode.h" |
30 | | #include "encode.h" |
31 | | #include "bits.h" |
32 | | #ifndef DISABLE_DXF |
33 | | # include "out_dxf.h" |
34 | | # ifndef DISABLE_JSON |
35 | | # include "in_json.h" |
36 | | # include "out_json.h" |
37 | | # endif |
38 | | # include "in_dxf.h" |
39 | | #endif |
40 | | |
41 | | // Number of output converters selectable via out: 0 encode, 1 dxf, 2 dxfb, 3 |
42 | | // json, 4 geojson |
43 | | #ifdef DISABLE_DXF |
44 | | # define LLVMFUZZ_NUM_OUTPUTS 1 |
45 | | #else |
46 | | # ifdef DISABLE_JSON |
47 | | # define LLVMFUZZ_NUM_OUTPUTS 3 |
48 | | # else |
49 | 7.03k | # define LLVMFUZZ_NUM_OUTPUTS 5 |
50 | | # endif |
51 | | #endif |
52 | | |
53 | | int out; |
54 | | int ver; |
55 | | |
56 | | extern int LLVMFuzzerTestOneInput (const unsigned char *data, size_t size); |
57 | | |
58 | | // libfuzzer limitation: |
59 | | // Enforce NULL-termination of the input buffer, to avoid bogus reports. copy |
60 | | // it. Problematic is mostly strtol(3) which also works with \n termination. |
61 | | static int |
62 | | enforce_null_termination (Bit_Chain *dat, bool enforce) |
63 | 2.25k | { |
64 | 2.25k | unsigned char *copy; |
65 | 2.25k | unsigned char c; |
66 | 2.25k | if (!dat->size) |
67 | 0 | return 0; |
68 | 2.25k | c = dat->chain[dat->size - 1]; |
69 | | // Allow \n termination without \0 in DXF? No, still crashes |
70 | 2.25k | if (!enforce && ((c == '\n' && c + 1 == '\0') || c == '\0')) |
71 | 120 | return 0; |
72 | | #ifdef STANDALONE |
73 | | fprintf (stderr, |
74 | | "llvmfuzz_standalone: enforce libfuzzer buffer NULL termination\n"); |
75 | | #endif |
76 | 2.13k | copy = malloc (dat->size + 1); |
77 | 2.13k | memcpy (copy, dat->chain, dat->size); |
78 | 2.13k | copy[dat->size] = '\0'; |
79 | 2.13k | dat->chain = copy; |
80 | 2.13k | return 1; |
81 | 2.25k | } |
82 | | |
83 | | int |
84 | | LLVMFuzzerTestOneInput (const unsigned char *data, size_t size) |
85 | 7.03k | { |
86 | 7.03k | Dwg_Data dwg; |
87 | 7.03k | Bit_Chain dat = { NULL, 0, 0, 0, 0, 0, 0, NULL, 0 }; |
88 | 7.03k | Bit_Chain out_dat = { NULL, 0, 0, 0, 0, 0, 0, NULL, 0 }; |
89 | 7.03k | int copied = 0; |
90 | 7.03k | struct ly_ctx *ctx = NULL; |
91 | | |
92 | 7.03k | static char tmp_file[256]; |
93 | | |
94 | 7.03k | #ifndef STANDALONE |
95 | | /* The libfuzzer path otherwise leaves out/ver at 0, so only dwg_encode runs. |
96 | | Derive them from the input (without consuming it, so the existing corpus |
97 | | keeps decoding) to also drive the out_dxf/out_dxfb/out_json encoders. */ |
98 | 7.03k | out = 0; |
99 | 7.03k | ver = 0; |
100 | 7.03k | if (size) |
101 | 7.03k | { |
102 | 7.03k | unsigned int h = 2166136261u; |
103 | 7.03k | const size_t n = size > 4096 ? 4096 : size; |
104 | 9.46M | for (size_t i = 0; i < n; i++) |
105 | 9.45M | h = (h ^ data[i]) * 16777619u; |
106 | 7.03k | out = (int)(h % LLVMFUZZ_NUM_OUTPUTS); |
107 | 7.03k | ver = (int)((h >> 8) % 20); |
108 | 7.03k | } |
109 | 7.03k | #endif |
110 | | |
111 | 7.03k | dat.chain = (unsigned char *)data; |
112 | 7.03k | dat.size = size; |
113 | 7.03k | memset (&dwg, 0, sizeof (dwg)); |
114 | | |
115 | | /* Cap input size to prevent OOM/timeout in fuzzing */ |
116 | 7.03k | if (size > 10 * 1024 * 1024) |
117 | 0 | return 0; |
118 | | // Detect the input format: DWG, DXF or JSON |
119 | 7.03k | if (dat.size > 2 && dat.chain[0] == 'A' && dat.chain[1] == 'C') |
120 | 4.78k | { |
121 | 4.78k | if (dwg_decode (&dat, &dwg) >= DWG_ERR_CRITICAL) |
122 | 4.14k | { |
123 | 4.14k | dwg_free (&dwg); |
124 | 4.14k | return 0; |
125 | 4.14k | } |
126 | 4.78k | } |
127 | 2.25k | #ifndef DISABLE_JSON |
128 | 2.25k | else if (dat.size > 1 && dat.chain[0] == '{') |
129 | 1.18k | { |
130 | 1.18k | copied = enforce_null_termination (&dat, true); |
131 | 1.18k | if (dwg_read_json (&dat, &dwg) >= DWG_ERR_CRITICAL) |
132 | 354 | { |
133 | 354 | if (copied) |
134 | 354 | bit_chain_free (&dat); |
135 | 354 | dwg_free (&dwg); |
136 | 354 | return 0; |
137 | 354 | } |
138 | 827 | dat.opts |= DWG_OPTS_INJSON; |
139 | 827 | dwg.opts |= DWG_OPTS_INJSON; |
140 | 827 | } |
141 | 1.07k | #endif |
142 | 1.07k | #ifndef DISABLE_DXF |
143 | 1.07k | else |
144 | 1.07k | { |
145 | 1.07k | copied = enforce_null_termination (&dat, false); |
146 | 1.07k | if (dwg_read_dxf (&dat, &dwg) >= DWG_ERR_CRITICAL) |
147 | 1.07k | { |
148 | 1.07k | if (copied) |
149 | 950 | bit_chain_free (&dat); |
150 | 1.07k | dwg_free (&dwg); |
151 | 1.07k | return 0; |
152 | 1.07k | } |
153 | 1.07k | } |
154 | | #else |
155 | | else |
156 | | return 0; |
157 | | #endif |
158 | | |
159 | 1.46k | memset (&out_dat, 0, sizeof (out_dat)); |
160 | 1.46k | bit_chain_set_version (&out_dat, &dat); |
161 | 1.46k | if (copied) |
162 | 827 | bit_chain_free (&dat); |
163 | | |
164 | | #if 0 |
165 | | snprintf (tmp_file, 255, "/tmp/llvmfuzzer%d.out", getpid()); |
166 | | tmp_file[255] = '\0'; |
167 | | #elif defined _WIN32 |
168 | | strcpy (tmp_file, "NUL"); |
169 | | #else |
170 | 1.46k | strcpy (tmp_file, "/dev/null"); |
171 | 1.46k | #endif |
172 | 1.46k | out_dat.fh = fopen (tmp_file, "w"); |
173 | | |
174 | 1.46k | switch (out) |
175 | 1.46k | { |
176 | 299 | case 0: |
177 | 299 | { |
178 | 299 | switch (ver) |
179 | 299 | { |
180 | | // TODO support preR13, many downconverters still missing |
181 | 19 | case 0: |
182 | 19 | out_dat.version = dwg.header.version = R_1_4; |
183 | 19 | break; |
184 | 13 | case 1: |
185 | 13 | out_dat.version = dwg.header.version = R_2_0; |
186 | 13 | break; |
187 | 9 | case 2: |
188 | 9 | out_dat.version = dwg.header.version = R_2_10; |
189 | 9 | break; |
190 | 9 | case 3: |
191 | 9 | out_dat.version = dwg.header.version = R_2_21; |
192 | 9 | break; |
193 | 6 | case 4: |
194 | 6 | out_dat.version = dwg.header.version = R_2_4; |
195 | 6 | break; |
196 | 15 | case 5: |
197 | 15 | out_dat.version = dwg.header.version = R_2_6; |
198 | 15 | break; |
199 | 7 | case 6: |
200 | 7 | out_dat.version = dwg.header.version = R_9; |
201 | 7 | break; |
202 | 22 | case 7: |
203 | 22 | out_dat.version = dwg.header.version = R_10; |
204 | 22 | break; |
205 | 19 | case 8: |
206 | 19 | out_dat.version = dwg.header.version = R_11; |
207 | 19 | break; |
208 | 14 | case 9: |
209 | 14 | out_dat.version = dwg.header.version = R_12; |
210 | 14 | break; |
211 | 15 | case 10: |
212 | 15 | out_dat.version = dwg.header.version = R_13; |
213 | 15 | break; |
214 | 15 | case 11: |
215 | 15 | out_dat.version = dwg.header.version = R_13c3; |
216 | 15 | break; |
217 | 6 | case 12: |
218 | 6 | out_dat.version = dwg.header.version = R_14; |
219 | 6 | break; |
220 | 16 | case 13: |
221 | 16 | out_dat.version = dwg.header.version = R_2004; |
222 | 16 | break; |
223 | 114 | default: // favor this one |
224 | 114 | out_dat.version = dwg.header.version = R_2000; |
225 | 114 | break; |
226 | 299 | } |
227 | 299 | dwg_encode (&dwg, &out_dat); |
228 | 299 | break; |
229 | 299 | } |
230 | 0 | #ifndef DISABLE_DXF |
231 | 293 | case 1: |
232 | 293 | dwg_write_dxf (&out_dat, &dwg); |
233 | 293 | break; |
234 | 287 | case 2: // experimental |
235 | 287 | dwg_write_dxfb (&out_dat, &dwg); |
236 | 287 | break; |
237 | 0 | # ifndef DISABLE_JSON |
238 | 303 | case 3: |
239 | 303 | dwg_write_json (&out_dat, &dwg); |
240 | 303 | break; |
241 | 281 | case 4: |
242 | 281 | dwg_write_geojson (&out_dat, &dwg); |
243 | 281 | break; |
244 | 0 | # endif |
245 | 0 | #endif |
246 | 0 | default: |
247 | 0 | break; |
248 | 1.46k | } |
249 | 1.46k | dwg_free (&dwg); |
250 | 1.46k | free (out_dat.chain); |
251 | 1.46k | fclose (out_dat.fh); |
252 | | // unlink (tmp_file); |
253 | 1.46k | return 0; |
254 | 1.46k | } |
255 | | |
256 | | #ifdef STANDALONE |
257 | | /* |
258 | | # ifdef __GNUC__ |
259 | | __attribute__((weak)) |
260 | | # endif |
261 | | extern int LLVMFuzzerInitialize(int *argc, char ***argv); |
262 | | */ |
263 | | |
264 | | static int |
265 | | usage (void) |
266 | | { |
267 | | printf ("\nUsage: OUT=0 VER=3 llvmfuzz_standalone INPUT..."); |
268 | | return 1; |
269 | | } |
270 | | // llvmfuzz_standalone reproducer, see OUT and VER env vars |
271 | | int |
272 | | main (int argc, char *argv[]) |
273 | | { |
274 | | unsigned seed; |
275 | | const unsigned int possible_outputformats = LLVMFUZZ_NUM_OUTPUTS; |
276 | | |
277 | | if (argc <= 1 || !*argv[1]) |
278 | | return usage (); |
279 | | if (getenv ("SEED")) |
280 | | seed = (unsigned)strtol (getenv ("SEED"), NULL, 10) % 9999; |
281 | | else |
282 | | { |
283 | | # ifdef HAVE_GETTIMEOFDAY |
284 | | struct timeval tval; |
285 | | gettimeofday (&tval, NULL); |
286 | | seed = (unsigned)(tval.tv_sec * 1000 + tval.tv_usec) % 9999; |
287 | | # else |
288 | | seed = (unsigned)time (NULL) % 9999; |
289 | | # endif |
290 | | } |
291 | | srand (seed); |
292 | | /* works only on linux |
293 | | if (LLVMFuzzerInitialize) |
294 | | LLVMFuzzerInitialize (&argc, &argv); |
295 | | */ |
296 | | for (int i = 1; i < argc; i++) |
297 | | { |
298 | | unsigned char *buf; |
299 | | FILE *f = fopen (argv[i], "rb"); |
300 | | struct stat attrib; |
301 | | long len; |
302 | | size_t n_read; |
303 | | int fd; |
304 | | if (!f) |
305 | | { |
306 | | fprintf (stderr, "Illegal file argument %s\n", argv[i]); |
307 | | continue; |
308 | | } |
309 | | fd = fileno (f); |
310 | | if (fd < 0 || fstat (fd, &attrib) |
311 | | || !(S_ISREG (attrib.st_mode) |
312 | | # ifndef _WIN32 |
313 | | || S_ISLNK (attrib.st_mode) |
314 | | # endif |
315 | | )) |
316 | | { |
317 | | fprintf (stderr, "Illegal input file \"%s\"\n", argv[i]); |
318 | | continue; |
319 | | } |
320 | | // libFuzzer design bug, not zero-terminating its text buffer |
321 | | fseek (f, 0, SEEK_END); |
322 | | len = ftell (f); |
323 | | fseek (f, 0, SEEK_SET); |
324 | | if (len <= 0) |
325 | | continue; |
326 | | buf = (unsigned char *)malloc (len); |
327 | | n_read = fread (buf, 1, len, f); |
328 | | fclose (f); |
329 | | assert ((long)n_read == len); |
330 | | |
331 | | out = rand () % possible_outputformats; |
332 | | # ifdef STANDALONE |
333 | | if (getenv ("OUT")) |
334 | | out = strtol (getenv ("OUT"), NULL, 10); |
335 | | // print SEED onlyu when needed (no env vars given) |
336 | | if (!(out || getenv ("VER"))) |
337 | | fprintf (stderr, "SEED=%04u ", seed); |
338 | | fprintf (stderr, "OUT=%d ", out); |
339 | | # endif |
340 | | if (out == 0) |
341 | | { |
342 | | ver = rand () % 20; |
343 | | # ifdef STANDALONE |
344 | | if (getenv ("VER")) |
345 | | ver = strtol (getenv ("VER"), NULL, 10); |
346 | | fprintf (stderr, "VER=%d ", ver); |
347 | | # endif |
348 | | } |
349 | | fprintf (stderr, "examples/llvmfuzz_standalone %s [%" PRIuSIZE "]\n", |
350 | | argv[i], len); |
351 | | LLVMFuzzerTestOneInput (buf, len); |
352 | | free (buf); |
353 | | // Bit_Chain dat = { 0 }; |
354 | | // dat_read_file (&dat, fp, argv[i]); |
355 | | // LLVMFuzzerTestOneInput (dat.chain, dat.size); |
356 | | // bit_free_chain (&dat); |
357 | | } |
358 | | } |
359 | | #endif |