Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Christos Zoulas 2003. |
3 | | * All Rights Reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice immediately at the beginning of the file, without modification, |
10 | | * this list of conditions, and the following disclaimer. |
11 | | * 2. Redistributions in binary form must reproduce the above copyright |
12 | | * notice, this list of conditions and the following disclaimer in the |
13 | | * documentation and/or other materials provided with the distribution. |
14 | | * |
15 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
19 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | | * SUCH DAMAGE. |
26 | | */ |
27 | | #include "file.h" |
28 | | |
29 | | #ifndef lint |
30 | | FILE_RCSID("@(#)$File: funcs.c,v 1.142 2023/07/30 14:41:14 christos Exp $") |
31 | | #endif /* lint */ |
32 | | |
33 | | #include "magic.h" |
34 | | #include <assert.h> |
35 | | #include <stdarg.h> |
36 | | #include <stdlib.h> |
37 | | #include <string.h> |
38 | | #include <ctype.h> |
39 | | #ifdef HAVE_UNISTD_H |
40 | | #include <unistd.h> /* for pipe2() */ |
41 | | #endif |
42 | | #if defined(HAVE_WCHAR_H) |
43 | | #include <wchar.h> |
44 | | #endif |
45 | | #if defined(HAVE_WCTYPE_H) |
46 | | #include <wctype.h> |
47 | | #endif |
48 | | #include <limits.h> |
49 | | |
50 | | #ifndef SIZE_MAX |
51 | | #define SIZE_MAX ((size_t)~0) |
52 | | #endif |
53 | | |
54 | | file_protected char * |
55 | | file_copystr(char *buf, size_t blen, size_t width, const char *str) |
56 | 0 | { |
57 | 0 | if (blen == 0) |
58 | 0 | return buf; |
59 | 0 | if (width >= blen) |
60 | 0 | width = blen - 1; |
61 | 0 | memcpy(buf, str, width); |
62 | 0 | buf[width] = '\0'; |
63 | 0 | return buf; |
64 | 0 | } |
65 | | |
66 | | file_private void |
67 | | file_clearbuf(struct magic_set *ms) |
68 | 13.9k | { |
69 | 13.9k | free(ms->o.buf); |
70 | 13.9k | ms->o.buf = NULL; |
71 | 13.9k | ms->o.blen = 0; |
72 | 13.9k | } |
73 | | |
74 | | file_private int |
75 | | file_checkfield(char *msg, size_t mlen, const char *what, const char **pp) |
76 | 1.88k | { |
77 | 1.88k | const char *p = *pp; |
78 | 1.88k | int fw = 0; |
79 | | |
80 | 1.88k | while (*p && isdigit((unsigned char)*p)) |
81 | 0 | fw = fw * 10 + (*p++ - '0'); |
82 | | |
83 | 1.88k | *pp = p; |
84 | | |
85 | 1.88k | if (fw < 1024) |
86 | 1.88k | return 1; |
87 | 0 | if (msg) |
88 | 0 | snprintf(msg, mlen, "field %s too large: %d", what, fw); |
89 | |
|
90 | 0 | return 0; |
91 | 1.88k | } |
92 | | |
93 | | file_protected int |
94 | | file_checkfmt(char *msg, size_t mlen, const char *fmt) |
95 | 8.99k | { |
96 | 8.99k | const char *p; |
97 | 300k | for (p = fmt; *p; p++) { |
98 | 291k | if (*p != '%') |
99 | 290k | continue; |
100 | 1.88k | if (*++p == '%') |
101 | 0 | continue; |
102 | | // Skip uninteresting. |
103 | 1.88k | while (strchr("#0.'+- ", *p) != NULL) |
104 | 0 | p++; |
105 | 1.88k | if (*p == '*') { |
106 | 0 | if (msg) |
107 | 0 | snprintf(msg, mlen, "* not allowed in format"); |
108 | 0 | return -1; |
109 | 0 | } |
110 | | |
111 | 1.88k | if (!file_checkfield(msg, mlen, "width", &p)) |
112 | 0 | return -1; |
113 | | |
114 | 1.88k | if (*p == '.') { |
115 | 0 | p++; |
116 | 0 | if (!file_checkfield(msg, mlen, "precision", &p)) |
117 | 0 | return -1; |
118 | 0 | } |
119 | | |
120 | 1.88k | if (!isalpha((unsigned char)*p)) { |
121 | 0 | if (msg) |
122 | 0 | snprintf(msg, mlen, "bad format char: %c", *p); |
123 | 0 | return -1; |
124 | 0 | } |
125 | 1.88k | } |
126 | 8.99k | return 0; |
127 | 8.99k | } |
128 | | |
129 | | /* |
130 | | * Like printf, only we append to a buffer. |
131 | | */ |
132 | | file_protected int |
133 | | file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) |
134 | 8.99k | { |
135 | 8.99k | int len; |
136 | 8.99k | char *buf, *newstr; |
137 | 8.99k | char tbuf[1024]; |
138 | | |
139 | 8.99k | if (ms->event_flags & EVENT_HAD_ERR) |
140 | 0 | return 0; |
141 | | |
142 | 8.99k | if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) { |
143 | 0 | file_clearbuf(ms); |
144 | 0 | file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf); |
145 | 0 | return -1; |
146 | 0 | } |
147 | | |
148 | 8.99k | len = vasprintf(&buf, fmt, ap); |
149 | 8.99k | if (len < 0 || (size_t)len > 1024 || len + ms->o.blen > 1024 * 1024) { |
150 | 60 | size_t blen = ms->o.blen; |
151 | 60 | free(buf); |
152 | 60 | file_clearbuf(ms); |
153 | 60 | file_error(ms, 0, "Output buffer space exceeded %d+%" |
154 | 60 | SIZE_T_FORMAT "u", len, blen); |
155 | 60 | return -1; |
156 | 60 | } |
157 | | |
158 | 8.93k | if (ms->o.buf != NULL) { |
159 | 842 | len = asprintf(&newstr, "%s%s", ms->o.buf, buf); |
160 | 842 | free(buf); |
161 | 842 | if (len < 0) |
162 | 0 | goto out; |
163 | 842 | free(ms->o.buf); |
164 | 842 | buf = newstr; |
165 | 842 | } |
166 | 8.93k | ms->o.buf = buf; |
167 | 8.93k | ms->o.blen = len; |
168 | 8.93k | return 0; |
169 | 0 | out: |
170 | 0 | file_clearbuf(ms); |
171 | 0 | file_error(ms, errno, "vasprintf failed"); |
172 | 0 | return -1; |
173 | 8.93k | } |
174 | | |
175 | | file_protected int |
176 | | file_printf(struct magic_set *ms, const char *fmt, ...) |
177 | 846 | { |
178 | 846 | int rv; |
179 | 846 | va_list ap; |
180 | | |
181 | 846 | va_start(ap, fmt); |
182 | 846 | rv = file_vprintf(ms, fmt, ap); |
183 | 846 | va_end(ap); |
184 | 846 | return rv; |
185 | 846 | } |
186 | | |
187 | | /* |
188 | | * error - print best error message possible |
189 | | */ |
190 | | /*VARARGS*/ |
191 | | __attribute__((__format__(__printf__, 3, 0))) |
192 | | file_private void |
193 | | file_error_core(struct magic_set *ms, int error, const char *f, va_list va, |
194 | | size_t lineno) |
195 | 12.1k | { |
196 | | /* Only the first error is ok */ |
197 | 12.1k | if (ms->event_flags & EVENT_HAD_ERR) |
198 | 3.96k | return; |
199 | 8.15k | if (lineno != 0) { |
200 | 423 | file_clearbuf(ms); |
201 | 423 | (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); |
202 | 423 | } |
203 | 8.15k | if (ms->o.buf && *ms->o.buf) |
204 | 423 | (void)file_printf(ms, " "); |
205 | 8.15k | (void)file_vprintf(ms, f, va); |
206 | 8.15k | if (error > 0) |
207 | 0 | (void)file_printf(ms, " (%s)", strerror(error)); |
208 | 8.15k | ms->event_flags |= EVENT_HAD_ERR; |
209 | 8.15k | ms->error = error; |
210 | 8.15k | } |
211 | | |
212 | | /*VARARGS*/ |
213 | | file_protected void |
214 | | file_error(struct magic_set *ms, int error, const char *f, ...) |
215 | 10.4k | { |
216 | 10.4k | va_list va; |
217 | 10.4k | va_start(va, f); |
218 | 10.4k | file_error_core(ms, error, f, va, 0); |
219 | 10.4k | va_end(va); |
220 | 10.4k | } |
221 | | |
222 | | /* |
223 | | * Print an error with magic line number. |
224 | | */ |
225 | | /*VARARGS*/ |
226 | | file_protected void |
227 | | file_magerror(struct magic_set *ms, const char *f, ...) |
228 | 1.66k | { |
229 | 1.66k | va_list va; |
230 | 1.66k | va_start(va, f); |
231 | 1.66k | file_error_core(ms, 0, f, va, ms->line); |
232 | 1.66k | va_end(va); |
233 | 1.66k | } |
234 | | |
235 | | file_protected void |
236 | | file_oomem(struct magic_set *ms, size_t len) |
237 | 0 | { |
238 | 0 | file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", |
239 | 0 | len); |
240 | 0 | } |
241 | | |
242 | | file_protected void |
243 | | file_badseek(struct magic_set *ms) |
244 | 0 | { |
245 | 0 | file_error(ms, errno, "error seeking"); |
246 | 0 | } |
247 | | |
248 | | file_protected void |
249 | | file_badread(struct magic_set *ms) |
250 | 0 | { |
251 | 0 | file_error(ms, errno, "error reading"); |
252 | 0 | } |
253 | | |
254 | | #ifndef COMPILE_ONLY |
255 | 0 | #define FILE_SEPARATOR "\n- " |
256 | | |
257 | | file_protected int |
258 | | file_separator(struct magic_set *ms) |
259 | 0 | { |
260 | 0 | return file_printf(ms, FILE_SEPARATOR); |
261 | 0 | } |
262 | | |
263 | | static void |
264 | | trim_separator(struct magic_set *ms) |
265 | 0 | { |
266 | 0 | size_t l; |
267 | |
|
268 | 0 | if (ms->o.buf == NULL) |
269 | 0 | return; |
270 | | |
271 | 0 | l = strlen(ms->o.buf); |
272 | 0 | if (l < sizeof(FILE_SEPARATOR)) |
273 | 0 | return; |
274 | | |
275 | 0 | l -= sizeof(FILE_SEPARATOR) - 1; |
276 | 0 | if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0) |
277 | 0 | return; |
278 | | |
279 | 0 | ms->o.buf[l] = '\0'; |
280 | 0 | } |
281 | | |
282 | | static int |
283 | | checkdone(struct magic_set *ms, int *rv) |
284 | 0 | { |
285 | 0 | if ((ms->flags & MAGIC_CONTINUE) == 0) |
286 | 0 | return 1; |
287 | 0 | if (file_separator(ms) == -1) |
288 | 0 | *rv = -1; |
289 | 0 | return 0; |
290 | 0 | } |
291 | | |
292 | | file_protected int |
293 | | file_default(struct magic_set *ms, size_t nb) |
294 | 0 | { |
295 | 0 | if (ms->flags & MAGIC_MIME) { |
296 | 0 | if ((ms->flags & MAGIC_MIME_TYPE) && |
297 | 0 | file_printf(ms, "application/%s", |
298 | 0 | nb ? "octet-stream" : "x-empty") == -1) |
299 | 0 | return -1; |
300 | 0 | return 1; |
301 | 0 | } |
302 | 0 | if (ms->flags & MAGIC_APPLE) { |
303 | 0 | if (file_printf(ms, "UNKNUNKN") == -1) |
304 | 0 | return -1; |
305 | 0 | return 1; |
306 | 0 | } |
307 | 0 | if (ms->flags & MAGIC_EXTENSION) { |
308 | 0 | if (file_printf(ms, "???") == -1) |
309 | 0 | return -1; |
310 | 0 | return 1; |
311 | 0 | } |
312 | 0 | return 0; |
313 | 0 | } |
314 | | |
315 | | /* |
316 | | * The magic detection functions return: |
317 | | * 1: found |
318 | | * 0: not found |
319 | | * -1: error |
320 | | */ |
321 | | /*ARGSUSED*/ |
322 | | file_protected int |
323 | | file_buffer(struct magic_set *ms, int fd, struct stat *st, |
324 | | const char *inname __attribute__ ((__unused__)), |
325 | | const void *buf, size_t nb) |
326 | 0 | { |
327 | 0 | int m = 0, rv = 0, looks_text = 0; |
328 | 0 | const char *code = NULL; |
329 | 0 | const char *code_mime = "binary"; |
330 | 0 | const char *def = "data"; |
331 | 0 | const char *ftype = NULL; |
332 | 0 | char *rbuf = NULL; |
333 | 0 | struct buffer b; |
334 | |
|
335 | 0 | buffer_init(&b, fd, st, buf, nb); |
336 | 0 | ms->mode = b.st.st_mode; |
337 | |
|
338 | 0 | if (nb == 0) { |
339 | 0 | def = "empty"; |
340 | 0 | goto simple; |
341 | 0 | } else if (nb == 1) { |
342 | 0 | def = "very short file (no magic)"; |
343 | 0 | goto simple; |
344 | 0 | } |
345 | | |
346 | 0 | if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { |
347 | 0 | looks_text = file_encoding(ms, &b, NULL, 0, |
348 | 0 | &code, &code_mime, &ftype); |
349 | 0 | } |
350 | |
|
351 | | #ifdef __EMX__ |
352 | | if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { |
353 | | m = file_os2_apptype(ms, inname, &b); |
354 | | if ((ms->flags & MAGIC_DEBUG) != 0) |
355 | | (void)fprintf(stderr, "[try os2_apptype %d]\n", m); |
356 | | switch (m) { |
357 | | case -1: |
358 | | return -1; |
359 | | case 0: |
360 | | break; |
361 | | default: |
362 | | return 1; |
363 | | } |
364 | | } |
365 | | #endif |
366 | 0 | #if HAVE_FORK |
367 | | /* try compression stuff */ |
368 | 0 | if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { |
369 | 0 | m = file_zmagic(ms, &b, inname); |
370 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
371 | 0 | (void)fprintf(stderr, "[try zmagic %d]\n", m); |
372 | 0 | if (m) { |
373 | 0 | goto done_encoding; |
374 | 0 | } |
375 | 0 | } |
376 | 0 | #endif |
377 | | /* Check if we have a tar file */ |
378 | 0 | if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { |
379 | 0 | m = file_is_tar(ms, &b); |
380 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
381 | 0 | (void)fprintf(stderr, "[try tar %d]\n", m); |
382 | 0 | if (m) { |
383 | 0 | if (checkdone(ms, &rv)) |
384 | 0 | goto done; |
385 | 0 | } |
386 | 0 | } |
387 | | |
388 | | /* Check if we have a JSON file */ |
389 | 0 | if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) { |
390 | 0 | m = file_is_json(ms, &b); |
391 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
392 | 0 | (void)fprintf(stderr, "[try json %d]\n", m); |
393 | 0 | if (m) { |
394 | 0 | if (checkdone(ms, &rv)) |
395 | 0 | goto done; |
396 | 0 | } |
397 | 0 | } |
398 | | |
399 | | /* Check if we have a CSV file */ |
400 | 0 | if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) { |
401 | 0 | m = file_is_csv(ms, &b, looks_text, code); |
402 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
403 | 0 | (void)fprintf(stderr, "[try csv %d]\n", m); |
404 | 0 | if (m) { |
405 | 0 | if (checkdone(ms, &rv)) |
406 | 0 | goto done; |
407 | 0 | } |
408 | 0 | } |
409 | | |
410 | | /* Check if we have a SIMH tape file */ |
411 | 0 | if ((ms->flags & MAGIC_NO_CHECK_SIMH) == 0) { |
412 | 0 | m = file_is_simh(ms, &b); |
413 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
414 | 0 | (void)fprintf(stderr, "[try simh %d]\n", m); |
415 | 0 | if (m) { |
416 | 0 | if (checkdone(ms, &rv)) |
417 | 0 | goto done; |
418 | 0 | } |
419 | 0 | } |
420 | | |
421 | | /* Check if we have a CDF file */ |
422 | 0 | if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { |
423 | 0 | m = file_trycdf(ms, &b); |
424 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
425 | 0 | (void)fprintf(stderr, "[try cdf %d]\n", m); |
426 | 0 | if (m) { |
427 | 0 | if (checkdone(ms, &rv)) |
428 | 0 | goto done; |
429 | 0 | } |
430 | 0 | } |
431 | 0 | #ifdef BUILTIN_ELF |
432 | 0 | if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) { |
433 | 0 | file_pushbuf_t *pb; |
434 | | /* |
435 | | * We matched something in the file, so this |
436 | | * *might* be an ELF file, and the file is at |
437 | | * least 5 bytes long, so if it's an ELF file |
438 | | * it has at least one byte past the ELF magic |
439 | | * number - try extracting information from the |
440 | | * ELF headers that cannot easily be extracted |
441 | | * with rules in the magic file. We we don't |
442 | | * print the information yet. |
443 | | */ |
444 | 0 | if ((pb = file_push_buffer(ms)) == NULL) |
445 | 0 | return -1; |
446 | | |
447 | 0 | rv = file_tryelf(ms, &b); |
448 | 0 | rbuf = file_pop_buffer(ms, pb); |
449 | 0 | if (rv == -1) { |
450 | 0 | free(rbuf); |
451 | 0 | rbuf = NULL; |
452 | 0 | } |
453 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
454 | 0 | (void)fprintf(stderr, "[try elf %d]\n", m); |
455 | 0 | } |
456 | 0 | #endif |
457 | | |
458 | | /* try soft magic tests */ |
459 | 0 | if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { |
460 | 0 | m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text); |
461 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
462 | 0 | (void)fprintf(stderr, "[try softmagic %d]\n", m); |
463 | 0 | if (m == 1 && rbuf) { |
464 | 0 | if (file_printf(ms, "%s", rbuf) == -1) |
465 | 0 | goto done; |
466 | 0 | } |
467 | 0 | if (m) { |
468 | 0 | if (checkdone(ms, &rv)) |
469 | 0 | goto done; |
470 | 0 | } |
471 | 0 | } |
472 | | |
473 | | /* try text properties */ |
474 | 0 | if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { |
475 | |
|
476 | 0 | m = file_ascmagic(ms, &b, looks_text); |
477 | 0 | if ((ms->flags & MAGIC_DEBUG) != 0) |
478 | 0 | (void)fprintf(stderr, "[try ascmagic %d]\n", m); |
479 | 0 | if (m) { |
480 | 0 | goto done; |
481 | 0 | } |
482 | 0 | } |
483 | | |
484 | 0 | simple: |
485 | | /* give up */ |
486 | 0 | if (m == 0) { |
487 | 0 | m = 1; |
488 | 0 | rv = file_default(ms, nb); |
489 | 0 | if (rv == 0) |
490 | 0 | if (file_printf(ms, "%s", def) == -1) |
491 | 0 | rv = -1; |
492 | 0 | } |
493 | 0 | done: |
494 | 0 | trim_separator(ms); |
495 | 0 | if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { |
496 | 0 | if (ms->flags & MAGIC_MIME_TYPE) |
497 | 0 | if (file_printf(ms, "; charset=") == -1) |
498 | 0 | rv = -1; |
499 | 0 | if (file_printf(ms, "%s", code_mime) == -1) |
500 | 0 | rv = -1; |
501 | 0 | } |
502 | 0 | #if HAVE_FORK |
503 | 0 | done_encoding: |
504 | 0 | #endif |
505 | 0 | free(rbuf); |
506 | 0 | buffer_fini(&b); |
507 | 0 | if (rv) |
508 | 0 | return rv; |
509 | | |
510 | 0 | return m; |
511 | 0 | } |
512 | | #endif |
513 | | |
514 | | file_protected int |
515 | | file_reset(struct magic_set *ms, int checkloaded) |
516 | 13.4k | { |
517 | 13.4k | if (checkloaded && ms->mlist[0] == NULL) { |
518 | 0 | file_error(ms, 0, "no magic files loaded"); |
519 | 0 | return -1; |
520 | 0 | } |
521 | 13.4k | file_clearbuf(ms); |
522 | 13.4k | if (ms->o.pbuf) { |
523 | 0 | free(ms->o.pbuf); |
524 | 0 | ms->o.pbuf = NULL; |
525 | 0 | } |
526 | 13.4k | ms->event_flags &= ~EVENT_HAD_ERR; |
527 | 13.4k | ms->error = -1; |
528 | 13.4k | return 0; |
529 | 13.4k | } |
530 | | |
531 | | #define OCTALIFY(n, o) \ |
532 | | /*LINTED*/ \ |
533 | 0 | (void)(*(n)++ = '\\', \ |
534 | 0 | *(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \ |
535 | 0 | *(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \ |
536 | 0 | *(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \ |
537 | 0 | (o)++) |
538 | | |
539 | | file_protected const char * |
540 | | file_getbuffer(struct magic_set *ms) |
541 | 0 | { |
542 | 0 | char *pbuf, *op, *np; |
543 | 0 | size_t psize, len; |
544 | |
|
545 | 0 | if (ms->event_flags & EVENT_HAD_ERR) |
546 | 0 | return NULL; |
547 | | |
548 | 0 | if (ms->flags & MAGIC_RAW) |
549 | 0 | return ms->o.buf; |
550 | | |
551 | 0 | if (ms->o.buf == NULL) |
552 | 0 | return NULL; |
553 | | |
554 | | /* * 4 is for octal representation, + 1 is for NUL */ |
555 | 0 | len = strlen(ms->o.buf); |
556 | 0 | if (len > (SIZE_MAX - 1) / 4) { |
557 | 0 | file_oomem(ms, len); |
558 | 0 | return NULL; |
559 | 0 | } |
560 | 0 | psize = len * 4 + 1; |
561 | 0 | if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { |
562 | 0 | file_oomem(ms, psize); |
563 | 0 | return NULL; |
564 | 0 | } |
565 | 0 | ms->o.pbuf = pbuf; |
566 | |
|
567 | 0 | #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) |
568 | 0 | { |
569 | 0 | mbstate_t state; |
570 | 0 | wchar_t nextchar; |
571 | 0 | int mb_conv = 1; |
572 | 0 | size_t bytesconsumed; |
573 | 0 | char *eop; |
574 | 0 | (void)memset(&state, 0, sizeof(mbstate_t)); |
575 | |
|
576 | 0 | np = ms->o.pbuf; |
577 | 0 | op = ms->o.buf; |
578 | 0 | eop = op + len; |
579 | |
|
580 | 0 | while (op < eop) { |
581 | 0 | bytesconsumed = mbrtowc(&nextchar, op, |
582 | 0 | CAST(size_t, eop - op), &state); |
583 | 0 | if (bytesconsumed == CAST(size_t, -1) || |
584 | 0 | bytesconsumed == CAST(size_t, -2)) { |
585 | 0 | mb_conv = 0; |
586 | 0 | break; |
587 | 0 | } |
588 | | |
589 | 0 | if (iswprint(nextchar)) { |
590 | 0 | (void)memcpy(np, op, bytesconsumed); |
591 | 0 | op += bytesconsumed; |
592 | 0 | np += bytesconsumed; |
593 | 0 | } else { |
594 | 0 | while (bytesconsumed-- > 0) |
595 | 0 | OCTALIFY(np, op); |
596 | 0 | } |
597 | 0 | } |
598 | 0 | *np = '\0'; |
599 | | |
600 | | /* Parsing succeeded as a multi-byte sequence */ |
601 | 0 | if (mb_conv != 0) |
602 | 0 | return ms->o.pbuf; |
603 | 0 | } |
604 | 0 | #endif |
605 | | |
606 | 0 | for (np = ms->o.pbuf, op = ms->o.buf; *op;) { |
607 | 0 | if (isprint(CAST(unsigned char, *op))) { |
608 | 0 | *np++ = *op++; |
609 | 0 | } else { |
610 | 0 | OCTALIFY(np, op); |
611 | 0 | } |
612 | 0 | } |
613 | 0 | *np = '\0'; |
614 | 0 | return ms->o.pbuf; |
615 | 0 | } |
616 | | |
617 | | file_protected int |
618 | | file_check_mem(struct magic_set *ms, unsigned int level) |
619 | 6.81M | { |
620 | 6.81M | size_t len; |
621 | | |
622 | 6.81M | if (level >= ms->c.len) { |
623 | 153 | len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); |
624 | 153 | ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? |
625 | 153 | malloc(len) : |
626 | 153 | realloc(ms->c.li, len)); |
627 | 153 | if (ms->c.li == NULL) { |
628 | 0 | file_oomem(ms, len); |
629 | 0 | return -1; |
630 | 0 | } |
631 | 153 | } |
632 | 6.81M | ms->c.li[level].got_match = 0; |
633 | 6.81M | #ifdef ENABLE_CONDITIONALS |
634 | 6.81M | ms->c.li[level].last_match = 0; |
635 | 6.81M | ms->c.li[level].last_cond = COND_NONE; |
636 | 6.81M | #endif /* ENABLE_CONDITIONALS */ |
637 | 6.81M | return 0; |
638 | 6.81M | } |
639 | | |
640 | | file_protected size_t |
641 | | file_printedlen(const struct magic_set *ms) |
642 | 0 | { |
643 | 0 | return ms->o.blen; |
644 | 0 | } |
645 | | |
646 | | file_protected int |
647 | | file_replace(struct magic_set *ms, const char *pat, const char *rep) |
648 | 0 | { |
649 | 0 | file_regex_t rx; |
650 | 0 | int rc, rv = -1; |
651 | |
|
652 | 0 | rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); |
653 | 0 | if (rc == 0) { |
654 | 0 | regmatch_t rm; |
655 | 0 | int nm = 0; |
656 | 0 | while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { |
657 | 0 | ms->o.buf[rm.rm_so] = '\0'; |
658 | 0 | if (file_printf(ms, "%s%s", rep, |
659 | 0 | rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) |
660 | 0 | goto out; |
661 | 0 | nm++; |
662 | 0 | } |
663 | 0 | rv = nm; |
664 | 0 | } |
665 | 0 | out: |
666 | 0 | file_regfree(&rx); |
667 | 0 | return rv; |
668 | 0 | } |
669 | | |
670 | | file_private int |
671 | | check_regex(struct magic_set *ms, const char *pat) |
672 | 12.4k | { |
673 | 12.4k | char sbuf[512]; |
674 | 12.4k | unsigned char oc = '\0'; |
675 | 12.4k | const char *p; |
676 | 12.4k | unsigned long l; |
677 | | |
678 | 148k | for (p = pat; *p; p++) { |
679 | 138k | unsigned char c = *p; |
680 | | // Avoid repetition |
681 | 138k | if (c == oc && strchr("?*+{", c) != NULL) { |
682 | 294 | size_t len = strlen(pat); |
683 | 294 | file_magwarn(ms, |
684 | 294 | "repetition-operator operand `%c' " |
685 | 294 | "invalid in regex `%s'", c, |
686 | 294 | file_printable(ms, sbuf, sizeof(sbuf), pat, len)); |
687 | 294 | return -1; |
688 | 294 | } |
689 | 137k | if (c == '{') { |
690 | 4.21k | char *ep, *eep; |
691 | 4.21k | errno = 0; |
692 | 4.21k | l = strtoul(p + 1, &ep, 10); |
693 | 4.21k | if (ep != p + 1 && l > 1000) |
694 | 362 | goto bounds; |
695 | | |
696 | 3.85k | if (*ep == ',') { |
697 | 1.37k | l = strtoul(ep + 1, &eep, 10); |
698 | 1.37k | if (eep != ep + 1 && l > 1000) |
699 | 404 | goto bounds; |
700 | 1.37k | } |
701 | 3.85k | } |
702 | 137k | oc = c; |
703 | 137k | if (isprint(c) || isspace(c) || c == '\b' |
704 | 137k | || c == 0x8a) // XXX: apple magic fixme |
705 | 136k | continue; |
706 | 772 | size_t len = strlen(pat); |
707 | 772 | file_magwarn(ms, |
708 | 772 | "non-ascii characters in regex \\%#o `%s'", |
709 | 772 | c, file_printable(ms, sbuf, sizeof(sbuf), pat, len)); |
710 | 772 | return -1; |
711 | 137k | } |
712 | 10.5k | return 0; |
713 | 766 | bounds: |
714 | 766 | file_magwarn(ms, "bounds too large %ld in regex `%s'", l, pat); |
715 | 766 | return -1; |
716 | 12.4k | } |
717 | | |
718 | | file_protected int |
719 | | file_regcomp(struct magic_set *ms file_locale_used, file_regex_t *rx, |
720 | | const char *pat, int flags) |
721 | 12.4k | { |
722 | 12.4k | if (check_regex(ms, pat) == -1) |
723 | 1.83k | return -1; |
724 | | |
725 | 10.5k | #ifdef USE_C_LOCALE |
726 | 10.5k | locale_t old = uselocale(ms->c_lc_ctype); |
727 | 10.5k | assert(old != NULL); |
728 | | #else |
729 | | char old[1024]; |
730 | | strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); |
731 | | (void)setlocale(LC_CTYPE, "C"); |
732 | | #endif |
733 | 10.5k | int rc; |
734 | 10.5k | rc = regcomp(rx, pat, flags); |
735 | | |
736 | 10.5k | #ifdef USE_C_LOCALE |
737 | 10.5k | uselocale(old); |
738 | | #else |
739 | | (void)setlocale(LC_CTYPE, old); |
740 | | #endif |
741 | 10.5k | if (rc > 0 && (ms->flags & MAGIC_CHECK)) { |
742 | 537 | char errmsg[512], buf[512]; |
743 | | |
744 | 537 | (void)regerror(rc, rx, errmsg, sizeof(errmsg)); |
745 | 537 | file_magerror(ms, "regex error %d for `%s', (%s)", rc, |
746 | 537 | file_printable(ms, buf, sizeof(buf), pat, strlen(pat)), |
747 | 537 | errmsg); |
748 | 537 | } |
749 | 10.5k | return rc; |
750 | 10.5k | } |
751 | | |
752 | | /*ARGSUSED*/ |
753 | | file_protected int |
754 | | file_regexec(struct magic_set *ms file_locale_used, file_regex_t *rx, |
755 | | const char *str, size_t nmatch, regmatch_t* pmatch, int eflags) |
756 | 0 | { |
757 | 0 | #ifdef USE_C_LOCALE |
758 | 0 | locale_t old = uselocale(ms->c_lc_ctype); |
759 | 0 | assert(old != NULL); |
760 | | #else |
761 | | char old[1024]; |
762 | | strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); |
763 | | (void)setlocale(LC_CTYPE, "C"); |
764 | | #endif |
765 | 0 | int rc; |
766 | | /* XXX: force initialization because glibc does not always do this */ |
767 | 0 | if (nmatch != 0) |
768 | 0 | memset(pmatch, 0, nmatch * sizeof(*pmatch)); |
769 | 0 | rc = regexec(rx, str, nmatch, pmatch, eflags); |
770 | 0 | #ifdef USE_C_LOCALE |
771 | 0 | uselocale(old); |
772 | | #else |
773 | | (void)setlocale(LC_CTYPE, old); |
774 | | #endif |
775 | 0 | return rc; |
776 | 0 | } |
777 | | |
778 | | file_protected void |
779 | | file_regfree(file_regex_t *rx) |
780 | 10.0k | { |
781 | 10.0k | regfree(rx); |
782 | 10.0k | } |
783 | | |
784 | | file_protected file_pushbuf_t * |
785 | | file_push_buffer(struct magic_set *ms) |
786 | 0 | { |
787 | 0 | file_pushbuf_t *pb; |
788 | |
|
789 | 0 | if (ms->event_flags & EVENT_HAD_ERR) |
790 | 0 | return NULL; |
791 | | |
792 | 0 | if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) |
793 | 0 | return NULL; |
794 | | |
795 | 0 | pb->buf = ms->o.buf; |
796 | 0 | pb->blen = ms->o.blen; |
797 | 0 | pb->offset = ms->offset; |
798 | |
|
799 | 0 | ms->o.buf = NULL; |
800 | 0 | ms->o.blen = 0; |
801 | 0 | ms->offset = 0; |
802 | |
|
803 | 0 | return pb; |
804 | 0 | } |
805 | | |
806 | | file_protected char * |
807 | | file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) |
808 | 0 | { |
809 | 0 | char *rbuf; |
810 | |
|
811 | 0 | if (ms->event_flags & EVENT_HAD_ERR) { |
812 | 0 | free(pb->buf); |
813 | 0 | free(pb); |
814 | 0 | return NULL; |
815 | 0 | } |
816 | | |
817 | 0 | rbuf = ms->o.buf; |
818 | |
|
819 | 0 | ms->o.buf = pb->buf; |
820 | 0 | ms->o.blen = pb->blen; |
821 | 0 | ms->offset = pb->offset; |
822 | |
|
823 | 0 | free(pb); |
824 | 0 | return rbuf; |
825 | 0 | } |
826 | | |
827 | | /* |
828 | | * convert string to ascii printable format. |
829 | | */ |
830 | | file_protected char * |
831 | | file_printable(struct magic_set *ms, char *buf, size_t bufsiz, |
832 | | const char *str, size_t slen) |
833 | 1.82k | { |
834 | 1.82k | char *ptr, *eptr = buf + bufsiz - 1; |
835 | 1.82k | const unsigned char *s = RCAST(const unsigned char *, str); |
836 | 1.82k | const unsigned char *es = s + slen; |
837 | | |
838 | 24.8k | for (ptr = buf; ptr < eptr && s < es && *s; s++) { |
839 | 23.0k | if ((ms->flags & MAGIC_RAW) != 0 || isprint(*s)) { |
840 | 18.0k | *ptr++ = *s; |
841 | 18.0k | continue; |
842 | 18.0k | } |
843 | 5.01k | if (ptr >= eptr - 3) |
844 | 0 | break; |
845 | 5.01k | *ptr++ = '\\'; |
846 | 5.01k | *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; |
847 | 5.01k | *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; |
848 | 5.01k | *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; |
849 | 5.01k | } |
850 | 1.82k | *ptr = '\0'; |
851 | 1.82k | return buf; |
852 | 1.82k | } |
853 | | |
854 | | struct guid { |
855 | | uint32_t data1; |
856 | | uint16_t data2; |
857 | | uint16_t data3; |
858 | | uint8_t data4[8]; |
859 | | }; |
860 | | |
861 | | file_protected int |
862 | | file_parse_guid(const char *s, uint64_t *guid) |
863 | 1.18k | { |
864 | 1.18k | struct guid *g = CAST(struct guid *, CAST(void *, guid)); |
865 | 1.18k | #ifndef WIN32 |
866 | 1.18k | return sscanf(s, |
867 | 1.18k | "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", |
868 | 1.18k | &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1], |
869 | 1.18k | &g->data4[2], &g->data4[3], &g->data4[4], &g->data4[5], |
870 | 1.18k | &g->data4[6], &g->data4[7]) == 11 ? 0 : -1; |
871 | | #else |
872 | | /* MS-Windows runtime doesn't support %hhx, except under |
873 | | non-default __USE_MINGW_ANSI_STDIO. */ |
874 | | uint16_t data16[8]; |
875 | | int rv = sscanf(s, "%8x-%4hx-%4hx-%2hx%2hx-%2hx%2hx%2hx%2hx%2hx%2hx", |
876 | | &g->data1, &g->data2, &g->data3, &data16[0], &data16[1], |
877 | | &data16[2], &data16[3], &data16[4], &data16[5], |
878 | | &data16[6], &data16[7]) == 11 ? 0 : -1; |
879 | | int i; |
880 | | for (i = 0; i < 8; i++) |
881 | | g->data4[i] = data16[i]; |
882 | | return rv; |
883 | | #endif |
884 | 1.18k | } |
885 | | |
886 | | file_protected int |
887 | | file_print_guid(char *str, size_t len, const uint64_t *guid) |
888 | 318 | { |
889 | 318 | const struct guid *g = CAST(const struct guid *, |
890 | 318 | CAST(const void *, guid)); |
891 | | |
892 | 318 | #ifndef WIN32 |
893 | 318 | return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-" |
894 | 318 | "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX", |
895 | 318 | g->data1, g->data2, g->data3, g->data4[0], g->data4[1], |
896 | 318 | g->data4[2], g->data4[3], g->data4[4], g->data4[5], |
897 | 318 | g->data4[6], g->data4[7]); |
898 | | #else |
899 | | return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hX%.2hX-" |
900 | | "%.2hX%.2hX%.2hX%.2hX%.2hX%.2hX", |
901 | | g->data1, g->data2, g->data3, g->data4[0], g->data4[1], |
902 | | g->data4[2], g->data4[3], g->data4[4], g->data4[5], |
903 | | g->data4[6], g->data4[7]); |
904 | | #endif |
905 | 318 | } |
906 | | |
907 | | file_protected int |
908 | | file_pipe_closexec(int *fds) |
909 | 0 | { |
910 | | #ifdef __MINGW32__ |
911 | | return 0; |
912 | | #elif defined(HAVE_PIPE2) |
913 | 0 | return pipe2(fds, O_CLOEXEC); |
914 | | #else |
915 | | if (pipe(fds) == -1) |
916 | | return -1; |
917 | | # ifdef F_SETFD |
918 | | (void)fcntl(fds[0], F_SETFD, FD_CLOEXEC); |
919 | | (void)fcntl(fds[1], F_SETFD, FD_CLOEXEC); |
920 | | # endif |
921 | | return 0; |
922 | | #endif |
923 | 0 | } |
924 | | |
925 | | file_protected int |
926 | 0 | file_clear_closexec(int fd) { |
927 | 0 | #ifdef F_SETFD |
928 | 0 | return fcntl(fd, F_SETFD, 0); |
929 | | #else |
930 | | return 0; |
931 | | #endif |
932 | 0 | } |
933 | | |
934 | | file_protected char * |
935 | | file_strtrim(char *str) |
936 | 0 | { |
937 | 0 | char *last; |
938 | |
|
939 | 0 | while (isspace(CAST(unsigned char, *str))) |
940 | 0 | str++; |
941 | 0 | last = str; |
942 | 0 | while (*last) |
943 | 0 | last++; |
944 | 0 | --last; |
945 | 0 | while (isspace(CAST(unsigned char, *last))) |
946 | 0 | last--; |
947 | 0 | *++last = '\0'; |
948 | 0 | return str; |
949 | 0 | } |