Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Christos Zoulas 2003. |
3 | | * All Rights Reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice immediately at the beginning of the file, without modification, |
10 | | * this list of conditions, and the following disclaimer. |
11 | | * 2. Redistributions in binary form must reproduce the above copyright |
12 | | * notice, this list of conditions and the following disclaimer in the |
13 | | * documentation and/or other materials provided with the distribution. |
14 | | * |
15 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
19 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | | * SUCH DAMAGE. |
26 | | */ |
27 | | #include "file.h" |
28 | | |
29 | | #ifndef lint |
30 | | FILE_RCSID("@(#)$File: funcs.c,v 1.142 2023/07/30 14:41:14 christos Exp $") |
31 | | #endif /* lint */ |
32 | | |
33 | | #include "magic.h" |
34 | | #include <assert.h> |
35 | | #include <stdarg.h> |
36 | | #include <stdlib.h> |
37 | | #include <string.h> |
38 | | #include <ctype.h> |
39 | | #ifdef HAVE_UNISTD_H |
40 | | #include <unistd.h> /* for pipe2() */ |
41 | | #endif |
42 | | #if defined(HAVE_WCHAR_H) |
43 | | #include <wchar.h> |
44 | | #endif |
45 | | #if defined(HAVE_WCTYPE_H) |
46 | | #include <wctype.h> |
47 | | #endif |
48 | | #include <limits.h> |
49 | | |
50 | | #ifndef SIZE_MAX |
51 | | #define SIZE_MAX ((size_t)~0) |
52 | | #endif |
53 | | |
54 | | file_protected char * |
55 | | file_copystr(char *buf, size_t blen, size_t width, const char *str) |
56 | 562 | { |
57 | 562 | if (blen == 0) |
58 | 0 | return buf; |
59 | 562 | if (width >= blen) |
60 | 6 | width = blen - 1; |
61 | 562 | memcpy(buf, str, width); |
62 | 562 | buf[width] = '\0'; |
63 | 562 | return buf; |
64 | 562 | } |
65 | | |
66 | | file_private void |
67 | | file_clearbuf(struct magic_set *ms) |
68 | 8.95k | { |
69 | 8.95k | free(ms->o.buf); |
70 | 8.95k | ms->o.buf = NULL; |
71 | 8.95k | ms->o.blen = 0; |
72 | 8.95k | } |
73 | | |
74 | | file_private int |
75 | | file_checkfield(char *msg, size_t mlen, const char *what, const char **pp) |
76 | 226k | { |
77 | 226k | const char *p = *pp; |
78 | 226k | int fw = 0; |
79 | | |
80 | 234k | while (*p && isdigit((unsigned char)*p)) |
81 | 7.89k | fw = fw * 10 + (*p++ - '0'); |
82 | | |
83 | 226k | *pp = p; |
84 | | |
85 | 226k | if (fw < 1024) |
86 | 226k | return 1; |
87 | 0 | if (msg) |
88 | 0 | snprintf(msg, mlen, "field %s too large: %d", what, fw); |
89 | |
|
90 | 0 | return 0; |
91 | 226k | } |
92 | | |
93 | | file_protected int |
94 | | file_checkfmt(char *msg, size_t mlen, const char *fmt) |
95 | 327k | { |
96 | 327k | const char *p; |
97 | 3.99M | for (p = fmt; *p; p++) { |
98 | 3.66M | if (*p != '%') |
99 | 3.44M | continue; |
100 | 225k | if (*++p == '%') |
101 | 0 | continue; |
102 | | // Skip uninteresting. |
103 | 349k | while (strchr("#0.'+- ", *p) != NULL) |
104 | 124k | p++; |
105 | 225k | if (*p == '*') { |
106 | 0 | if (msg) |
107 | 0 | snprintf(msg, mlen, "* not allowed in format"); |
108 | 0 | return -1; |
109 | 0 | } |
110 | | |
111 | 225k | if (!file_checkfield(msg, mlen, "width", &p)) |
112 | 0 | return -1; |
113 | | |
114 | 225k | if (*p == '.') { |
115 | 1.07k | p++; |
116 | 1.07k | if (!file_checkfield(msg, mlen, "precision", &p)) |
117 | 0 | return -1; |
118 | 1.07k | } |
119 | | |
120 | 225k | if (!isalpha((unsigned char)*p)) { |
121 | 0 | if (msg) |
122 | 0 | snprintf(msg, mlen, "bad format char: %c", *p); |
123 | 0 | return -1; |
124 | 0 | } |
125 | 225k | } |
126 | 327k | return 0; |
127 | 327k | } |
128 | | |
129 | | /* |
130 | | * Like printf, only we append to a buffer. |
131 | | */ |
132 | | file_protected int |
133 | | file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) |
134 | 331k | { |
135 | 331k | int len; |
136 | 331k | char *buf, *newstr; |
137 | 331k | char tbuf[1024]; |
138 | | |
139 | 331k | if (ms->event_flags & EVENT_HAD_ERR) |
140 | 3.92k | return 0; |
141 | | |
142 | 327k | if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) { |
143 | 0 | file_clearbuf(ms); |
144 | 0 | file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf); |
145 | 0 | return -1; |
146 | 0 | } |
147 | | |
148 | 327k | len = vasprintf(&buf, fmt, ap); |
149 | 327k | if (len < 0 || (size_t)len > 1024 || len + ms->o.blen > 1024 * 1024) { |
150 | 149 | size_t blen = ms->o.blen; |
151 | 149 | free(buf); |
152 | 149 | file_clearbuf(ms); |
153 | 149 | file_error(ms, 0, "Output buffer space exceeded %d+%" |
154 | 149 | SIZE_T_FORMAT "u", len, blen); |
155 | 149 | return -1; |
156 | 149 | } |
157 | | |
158 | 327k | if (ms->o.buf != NULL) { |
159 | 302k | len = asprintf(&newstr, "%s%s", ms->o.buf, buf); |
160 | 302k | free(buf); |
161 | 302k | if (len < 0) |
162 | 0 | goto out; |
163 | 302k | free(ms->o.buf); |
164 | 302k | buf = newstr; |
165 | 302k | } |
166 | 327k | ms->o.buf = buf; |
167 | 327k | ms->o.blen = len; |
168 | 327k | return 0; |
169 | 0 | out: |
170 | 0 | file_clearbuf(ms); |
171 | 0 | file_error(ms, errno, "vasprintf failed"); |
172 | 0 | return -1; |
173 | 327k | } |
174 | | |
175 | | file_protected int |
176 | | file_printf(struct magic_set *ms, const char *fmt, ...) |
177 | 330k | { |
178 | 330k | int rv; |
179 | 330k | va_list ap; |
180 | | |
181 | 330k | va_start(ap, fmt); |
182 | 330k | rv = file_vprintf(ms, fmt, ap); |
183 | 330k | va_end(ap); |
184 | 330k | return rv; |
185 | 330k | } |
186 | | |
187 | | /* |
188 | | * error - print best error message possible |
189 | | */ |
190 | | /*VARARGS*/ |
191 | | __attribute__((__format__(__printf__, 3, 0))) |
192 | | file_private void |
193 | | file_error_core(struct magic_set *ms, int error, const char *f, va_list va, |
194 | | size_t lineno) |
195 | 718 | { |
196 | | /* Only the first error is ok */ |
197 | 718 | if (ms->event_flags & EVENT_HAD_ERR) |
198 | 369 | return; |
199 | 349 | if (lineno != 0) { |
200 | 0 | file_clearbuf(ms); |
201 | 0 | (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); |
202 | 0 | } |
203 | 349 | if (ms->o.buf && *ms->o.buf) |
204 | 158 | (void)file_printf(ms, " "); |
205 | 349 | (void)file_vprintf(ms, f, va); |
206 | 349 | if (error > 0) |
207 | 94 | (void)file_printf(ms, " (%s)", strerror(error)); |
208 | 349 | ms->event_flags |= EVENT_HAD_ERR; |
209 | 349 | ms->error = error; |
210 | 349 | } |
211 | | |
212 | | /*VARARGS*/ |
213 | | file_protected void |
214 | | file_error(struct magic_set *ms, int error, const char *f, ...) |
215 | 718 | { |
216 | 718 | va_list va; |
217 | 718 | va_start(va, f); |
218 | 718 | file_error_core(ms, error, f, va, 0); |
219 | 718 | va_end(va); |
220 | 718 | } |
221 | | |
222 | | /* |
223 | | * Print an error with magic line number. |
224 | | */ |
225 | | /*VARARGS*/ |
226 | | file_protected void |
227 | | file_magerror(struct magic_set *ms, const char *f, ...) |
228 | 0 | { |
229 | 0 | va_list va; |
230 | 0 | va_start(va, f); |
231 | 0 | file_error_core(ms, 0, f, va, ms->line); |
232 | 0 | va_end(va); |
233 | 0 | } |
234 | | |
235 | | file_protected void |
236 | | file_oomem(struct magic_set *ms, size_t len) |
237 | 0 | { |
238 | 0 | file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", |
239 | 0 | len); |
240 | 0 | } |
241 | | |
242 | | file_protected void |
243 | | file_badseek(struct magic_set *ms) |
244 | 1 | { |
245 | 1 | file_error(ms, errno, "error seeking"); |
246 | 1 | } |
247 | | |
248 | | file_protected void |
249 | | file_badread(struct magic_set *ms) |
250 | 90 | { |
251 | 90 | file_error(ms, errno, "error reading"); |
252 | 90 | } |
253 | | |
254 | | #ifndef COMPILE_ONLY |
255 | 65.8k | #define FILE_SEPARATOR "\n- " |
256 | | |
257 | | file_protected int |
258 | | file_separator(struct magic_set *ms) |
259 | 36.5k | { |
260 | 36.5k | return file_printf(ms, FILE_SEPARATOR); |
261 | 36.5k | } |
262 | | |
263 | | static void |
264 | | trim_separator(struct magic_set *ms) |
265 | 9.78k | { |
266 | 9.78k | size_t l; |
267 | | |
268 | 9.78k | if (ms->o.buf == NULL) |
269 | 0 | return; |
270 | | |
271 | 9.78k | l = strlen(ms->o.buf); |
272 | 9.78k | if (l < sizeof(FILE_SEPARATOR)) |
273 | 0 | return; |
274 | | |
275 | 9.78k | l -= sizeof(FILE_SEPARATOR) - 1; |
276 | 9.78k | if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0) |
277 | 9.78k | return; |
278 | | |
279 | 0 | ms->o.buf[l] = '\0'; |
280 | 0 | } |
281 | | |
282 | | static int |
283 | | checkdone(struct magic_set *ms, int *rv) |
284 | 8.45k | { |
285 | 8.45k | if ((ms->flags & MAGIC_CONTINUE) == 0) |
286 | 0 | return 1; |
287 | 8.45k | if (file_separator(ms) == -1) |
288 | 0 | *rv = -1; |
289 | 8.45k | return 0; |
290 | 8.45k | } |
291 | | |
292 | | file_protected int |
293 | | file_default(struct magic_set *ms, size_t nb) |
294 | 8.17k | { |
295 | 8.17k | if (ms->flags & MAGIC_MIME) { |
296 | 0 | if ((ms->flags & MAGIC_MIME_TYPE) && |
297 | 0 | file_printf(ms, "application/%s", |
298 | 0 | nb ? "octet-stream" : "x-empty") == -1) |
299 | 0 | return -1; |
300 | 0 | return 1; |
301 | 0 | } |
302 | 8.17k | if (ms->flags & MAGIC_APPLE) { |
303 | 0 | if (file_printf(ms, "UNKNUNKN") == -1) |
304 | 0 | return -1; |
305 | 0 | return 1; |
306 | 0 | } |
307 | 8.17k | if (ms->flags & MAGIC_EXTENSION) { |
308 | 0 | if (file_printf(ms, "???") == -1) |
309 | 0 | return -1; |
310 | 0 | return 1; |
311 | 0 | } |
312 | 8.17k | return 0; |
313 | 8.17k | } |
314 | | |
315 | | /* |
316 | | * The magic detection functions return: |
317 | | * 1: found |
318 | | * 0: not found |
319 | | * -1: error |
320 | | */ |
321 | | /*ARGSUSED*/ |
322 | | file_protected int |
323 | | file_buffer(struct magic_set *ms, int fd, struct stat *st, |
324 | | const char *inname __attribute__ ((__unused__)), |
325 | | const void *buf, size_t nb) |
326 | 10.4k | { |
327 | 10.4k | int m = 0, rv = 0, looks_text = 0; |
328 | 10.4k | const char *code = NULL; |
329 | 10.4k | const char *code_mime = "binary"; |
330 | 10.4k | const char *def = "data"; |
331 | 10.4k | const char *ftype = NULL; |
332 | 10.4k | char *rbuf = NULL; |
333 | 10.4k | struct buffer b; |
334 | | |
335 | 10.4k | buffer_init(&b, fd, st, buf, nb); |
336 | 10.4k | ms->mode = b.st.st_mode; |
337 | | |
338 | 10.4k | if (nb == 0) { |
339 | 13 | def = "empty"; |
340 | 13 | goto simple; |
341 | 10.4k | } else if (nb == 1) { |
342 | 8 | def = "very short file (no magic)"; |
343 | 8 | goto simple; |
344 | 8 | } |
345 | | |
346 | 10.4k | if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { |
347 | 10.4k | looks_text = file_encoding(ms, &b, NULL, 0, |
348 | 10.4k | &code, &code_mime, &ftype); |
349 | 10.4k | } |
350 | | |
351 | | #ifdef __EMX__ |
352 | | if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { |
353 | | m = file_os2_apptype(ms, inname, &b); |
354 | | if ((ms->flags & MAGIC_DEBUG) != 0) |
355 | | (void)fprintf(stderr, "[try os2_apptype %d]\n", m); |
356 | | switch (m) { |
357 | | case -1: |
358 | | return -1; |
359 | | case 0: |
360 | | break; |
361 | | default: |
362 | | return 1; |
363 | | } |
364 | | } |
365 | | #endif |
366 | 10.4k | #if HAVE_FORK |
367 | | /* try compression stuff */ |
368 | 10.4k | if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { |
369 | 10.4k | m = file_zmagic(ms, &b, inname); |
370 | 10.4k | if ((ms->flags & MAGIC_DEBUG) != 0) |
371 | 0 | (void)fprintf(stderr, "[try zmagic %d]\n", m); |
372 | 10.4k | if (m) { |
373 | 697 | goto done_encoding; |
374 | 697 | } |
375 | 10.4k | } |
376 | 9.75k | #endif |
377 | | /* Check if we have a tar file */ |
378 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { |
379 | 9.75k | m = file_is_tar(ms, &b); |
380 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
381 | 0 | (void)fprintf(stderr, "[try tar %d]\n", m); |
382 | 9.75k | if (m) { |
383 | 7 | if (checkdone(ms, &rv)) |
384 | 0 | goto done; |
385 | 7 | } |
386 | 9.75k | } |
387 | | |
388 | | /* Check if we have a JSON file */ |
389 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) { |
390 | 9.75k | m = file_is_json(ms, &b); |
391 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
392 | 0 | (void)fprintf(stderr, "[try json %d]\n", m); |
393 | 9.75k | if (m) { |
394 | 15 | if (checkdone(ms, &rv)) |
395 | 0 | goto done; |
396 | 15 | } |
397 | 9.75k | } |
398 | | |
399 | | /* Check if we have a CSV file */ |
400 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) { |
401 | 9.75k | m = file_is_csv(ms, &b, looks_text, code); |
402 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
403 | 0 | (void)fprintf(stderr, "[try csv %d]\n", m); |
404 | 9.75k | if (m) { |
405 | 7 | if (checkdone(ms, &rv)) |
406 | 0 | goto done; |
407 | 7 | } |
408 | 9.75k | } |
409 | | |
410 | | /* Check if we have a SIMH tape file */ |
411 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_SIMH) == 0) { |
412 | 9.75k | m = file_is_simh(ms, &b); |
413 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
414 | 0 | (void)fprintf(stderr, "[try simh %d]\n", m); |
415 | 9.75k | if (m) { |
416 | 30 | if (checkdone(ms, &rv)) |
417 | 0 | goto done; |
418 | 30 | } |
419 | 9.75k | } |
420 | | |
421 | | /* Check if we have a CDF file */ |
422 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { |
423 | 9.75k | m = file_trycdf(ms, &b); |
424 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
425 | 0 | (void)fprintf(stderr, "[try cdf %d]\n", m); |
426 | 9.75k | if (m) { |
427 | 996 | if (checkdone(ms, &rv)) |
428 | 0 | goto done; |
429 | 996 | } |
430 | 9.75k | } |
431 | 9.75k | #ifdef BUILTIN_ELF |
432 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) { |
433 | 7.39k | file_pushbuf_t *pb; |
434 | | /* |
435 | | * We matched something in the file, so this |
436 | | * *might* be an ELF file, and the file is at |
437 | | * least 5 bytes long, so if it's an ELF file |
438 | | * it has at least one byte past the ELF magic |
439 | | * number - try extracting information from the |
440 | | * ELF headers that cannot easily be extracted |
441 | | * with rules in the magic file. We we don't |
442 | | * print the information yet. |
443 | | */ |
444 | 7.39k | if ((pb = file_push_buffer(ms)) == NULL) |
445 | 0 | return -1; |
446 | | |
447 | 7.39k | rv = file_tryelf(ms, &b); |
448 | 7.39k | rbuf = file_pop_buffer(ms, pb); |
449 | 7.39k | if (rv == -1) { |
450 | 95 | free(rbuf); |
451 | 95 | rbuf = NULL; |
452 | 95 | } |
453 | 7.39k | if ((ms->flags & MAGIC_DEBUG) != 0) |
454 | 0 | (void)fprintf(stderr, "[try elf %d]\n", m); |
455 | 7.39k | } |
456 | 9.75k | #endif |
457 | | |
458 | | /* try soft magic tests */ |
459 | 9.75k | if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { |
460 | 9.75k | m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text); |
461 | 9.75k | if ((ms->flags & MAGIC_DEBUG) != 0) |
462 | 0 | (void)fprintf(stderr, "[try softmagic %d]\n", m); |
463 | 9.75k | if (m == 1 && rbuf) { |
464 | 3.24k | if (file_printf(ms, "%s", rbuf) == -1) |
465 | 53 | goto done; |
466 | 3.24k | } |
467 | 9.70k | if (m) { |
468 | 7.40k | if (checkdone(ms, &rv)) |
469 | 0 | goto done; |
470 | 7.40k | } |
471 | 9.70k | } |
472 | | |
473 | | /* try text properties */ |
474 | 9.70k | if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { |
475 | | |
476 | 9.70k | m = file_ascmagic(ms, &b, looks_text); |
477 | 9.70k | if ((ms->flags & MAGIC_DEBUG) != 0) |
478 | 0 | (void)fprintf(stderr, "[try ascmagic %d]\n", m); |
479 | 9.70k | if (m) { |
480 | 1.55k | goto done; |
481 | 1.55k | } |
482 | 9.70k | } |
483 | | |
484 | 8.17k | simple: |
485 | | /* give up */ |
486 | 8.17k | if (m == 0) { |
487 | 8.17k | m = 1; |
488 | 8.17k | rv = file_default(ms, nb); |
489 | 8.17k | if (rv == 0) |
490 | 8.17k | if (file_printf(ms, "%s", def) == -1) |
491 | 0 | rv = -1; |
492 | 8.17k | } |
493 | 9.78k | done: |
494 | 9.78k | trim_separator(ms); |
495 | 9.78k | if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { |
496 | 0 | if (ms->flags & MAGIC_MIME_TYPE) |
497 | 0 | if (file_printf(ms, "; charset=") == -1) |
498 | 0 | rv = -1; |
499 | 0 | if (file_printf(ms, "%s", code_mime) == -1) |
500 | 0 | rv = -1; |
501 | 0 | } |
502 | 9.78k | #if HAVE_FORK |
503 | 10.4k | done_encoding: |
504 | 10.4k | #endif |
505 | 10.4k | free(rbuf); |
506 | 10.4k | buffer_fini(&b); |
507 | 10.4k | if (rv) |
508 | 53 | return rv; |
509 | | |
510 | 10.4k | return m; |
511 | 10.4k | } |
512 | | #endif |
513 | | |
514 | | file_protected int |
515 | | file_reset(struct magic_set *ms, int checkloaded) |
516 | 8.80k | { |
517 | 8.80k | if (checkloaded && ms->mlist[0] == NULL) { |
518 | 0 | file_error(ms, 0, "no magic files loaded"); |
519 | 0 | return -1; |
520 | 0 | } |
521 | 8.80k | file_clearbuf(ms); |
522 | 8.80k | if (ms->o.pbuf) { |
523 | 8.45k | free(ms->o.pbuf); |
524 | 8.45k | ms->o.pbuf = NULL; |
525 | 8.45k | } |
526 | 8.80k | ms->event_flags &= ~EVENT_HAD_ERR; |
527 | 8.80k | ms->error = -1; |
528 | 8.80k | return 0; |
529 | 8.80k | } |
530 | | |
531 | | #define OCTALIFY(n, o) \ |
532 | | /*LINTED*/ \ |
533 | 28.8k | (void)(*(n)++ = '\\', \ |
534 | 28.8k | *(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \ |
535 | 28.8k | *(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \ |
536 | 28.8k | *(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \ |
537 | 28.8k | (o)++) |
538 | | |
539 | | file_protected const char * |
540 | | file_getbuffer(struct magic_set *ms) |
541 | 8.79k | { |
542 | 8.79k | char *pbuf, *op, *np; |
543 | 8.79k | size_t psize, len; |
544 | | |
545 | 8.79k | if (ms->event_flags & EVENT_HAD_ERR) |
546 | 345 | return NULL; |
547 | | |
548 | 8.45k | if (ms->flags & MAGIC_RAW) |
549 | 0 | return ms->o.buf; |
550 | | |
551 | 8.45k | if (ms->o.buf == NULL) |
552 | 0 | return NULL; |
553 | | |
554 | | /* * 4 is for octal representation, + 1 is for NUL */ |
555 | 8.45k | len = strlen(ms->o.buf); |
556 | 8.45k | if (len > (SIZE_MAX - 1) / 4) { |
557 | 0 | file_oomem(ms, len); |
558 | 0 | return NULL; |
559 | 0 | } |
560 | 8.45k | psize = len * 4 + 1; |
561 | 8.45k | if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { |
562 | 0 | file_oomem(ms, psize); |
563 | 0 | return NULL; |
564 | 0 | } |
565 | 8.45k | ms->o.pbuf = pbuf; |
566 | | |
567 | 8.45k | #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) |
568 | 8.45k | { |
569 | 8.45k | mbstate_t state; |
570 | 8.45k | wchar_t nextchar; |
571 | 8.45k | int mb_conv = 1; |
572 | 8.45k | size_t bytesconsumed; |
573 | 8.45k | char *eop; |
574 | 8.45k | (void)memset(&state, 0, sizeof(mbstate_t)); |
575 | | |
576 | 8.45k | np = ms->o.pbuf; |
577 | 8.45k | op = ms->o.buf; |
578 | 8.45k | eop = op + len; |
579 | | |
580 | 1.60M | while (op < eop) { |
581 | 1.59M | bytesconsumed = mbrtowc(&nextchar, op, |
582 | 1.59M | CAST(size_t, eop - op), &state); |
583 | 1.59M | if (bytesconsumed == CAST(size_t, -1) || |
584 | 1.59M | bytesconsumed == CAST(size_t, -2)) { |
585 | 53 | mb_conv = 0; |
586 | 53 | break; |
587 | 53 | } |
588 | | |
589 | 1.59M | if (iswprint(nextchar)) { |
590 | 1.57M | (void)memcpy(np, op, bytesconsumed); |
591 | 1.57M | op += bytesconsumed; |
592 | 1.57M | np += bytesconsumed; |
593 | 1.57M | } else { |
594 | 55.9k | while (bytesconsumed-- > 0) |
595 | 27.9k | OCTALIFY(np, op); |
596 | 27.9k | } |
597 | 1.59M | } |
598 | 8.45k | *np = '\0'; |
599 | | |
600 | | /* Parsing succeeded as a multi-byte sequence */ |
601 | 8.45k | if (mb_conv != 0) |
602 | 8.39k | return ms->o.pbuf; |
603 | 8.45k | } |
604 | 53 | #endif |
605 | | |
606 | 15.2k | for (np = ms->o.pbuf, op = ms->o.buf; *op;) { |
607 | 15.2k | if (isprint(CAST(unsigned char, *op))) { |
608 | 14.3k | *np++ = *op++; |
609 | 14.3k | } else { |
610 | 833 | OCTALIFY(np, op); |
611 | 833 | } |
612 | 15.2k | } |
613 | 53 | *np = '\0'; |
614 | 53 | return ms->o.pbuf; |
615 | 8.45k | } |
616 | | |
617 | | file_protected int |
618 | | file_check_mem(struct magic_set *ms, unsigned int level) |
619 | 589k | { |
620 | 589k | size_t len; |
621 | | |
622 | 589k | if (level >= ms->c.len) { |
623 | 1 | len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); |
624 | 1 | ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? |
625 | 1 | malloc(len) : |
626 | 1 | realloc(ms->c.li, len)); |
627 | 1 | if (ms->c.li == NULL) { |
628 | 0 | file_oomem(ms, len); |
629 | 0 | return -1; |
630 | 0 | } |
631 | 1 | } |
632 | 589k | ms->c.li[level].got_match = 0; |
633 | 589k | #ifdef ENABLE_CONDITIONALS |
634 | 589k | ms->c.li[level].last_match = 0; |
635 | 589k | ms->c.li[level].last_cond = COND_NONE; |
636 | 589k | #endif /* ENABLE_CONDITIONALS */ |
637 | 589k | return 0; |
638 | 589k | } |
639 | | |
640 | | file_protected size_t |
641 | | file_printedlen(const struct magic_set *ms) |
642 | 1.55k | { |
643 | 1.55k | return ms->o.blen; |
644 | 1.55k | } |
645 | | |
646 | | file_protected int |
647 | | file_replace(struct magic_set *ms, const char *pat, const char *rep) |
648 | 835 | { |
649 | 835 | file_regex_t rx; |
650 | 835 | int rc, rv = -1; |
651 | | |
652 | 835 | rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); |
653 | 835 | if (rc == 0) { |
654 | 835 | regmatch_t rm; |
655 | 835 | int nm = 0; |
656 | 904 | while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { |
657 | 69 | ms->o.buf[rm.rm_so] = '\0'; |
658 | 69 | if (file_printf(ms, "%s%s", rep, |
659 | 69 | rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) |
660 | 0 | goto out; |
661 | 69 | nm++; |
662 | 69 | } |
663 | 835 | rv = nm; |
664 | 835 | } |
665 | 835 | out: |
666 | 835 | file_regfree(&rx); |
667 | 835 | return rv; |
668 | 835 | } |
669 | | |
670 | | file_private int |
671 | | check_regex(struct magic_set *ms, const char *pat) |
672 | 22.7k | { |
673 | 22.7k | char sbuf[512]; |
674 | 22.7k | unsigned char oc = '\0'; |
675 | 22.7k | const char *p; |
676 | 22.7k | unsigned long l; |
677 | | |
678 | 275k | for (p = pat; *p; p++) { |
679 | 252k | unsigned char c = *p; |
680 | | // Avoid repetition |
681 | 252k | if (c == oc && strchr("?*+{", c) != NULL) { |
682 | 0 | size_t len = strlen(pat); |
683 | 0 | file_magwarn(ms, |
684 | 0 | "repetition-operator operand `%c' " |
685 | 0 | "invalid in regex `%s'", c, |
686 | 0 | file_printable(ms, sbuf, sizeof(sbuf), pat, len)); |
687 | 0 | return -1; |
688 | 0 | } |
689 | 252k | if (c == '{') { |
690 | 63 | char *ep, *eep; |
691 | 63 | errno = 0; |
692 | 63 | l = strtoul(p + 1, &ep, 10); |
693 | 63 | if (ep != p + 1 && l > 1000) |
694 | 0 | goto bounds; |
695 | | |
696 | 63 | if (*ep == ',') { |
697 | 26 | l = strtoul(ep + 1, &eep, 10); |
698 | 26 | if (eep != ep + 1 && l > 1000) |
699 | 0 | goto bounds; |
700 | 26 | } |
701 | 63 | } |
702 | 252k | oc = c; |
703 | 252k | if (isprint(c) || isspace(c) || c == '\b' |
704 | 252k | || c == 0x8a) // XXX: apple magic fixme |
705 | 252k | continue; |
706 | 0 | size_t len = strlen(pat); |
707 | 0 | file_magwarn(ms, |
708 | 0 | "non-ascii characters in regex \\%#o `%s'", |
709 | 0 | c, file_printable(ms, sbuf, sizeof(sbuf), pat, len)); |
710 | 0 | return -1; |
711 | 252k | } |
712 | 22.7k | return 0; |
713 | 0 | bounds: |
714 | 0 | file_magwarn(ms, "bounds too large %ld in regex `%s'", l, pat); |
715 | 0 | return -1; |
716 | 22.7k | } |
717 | | |
718 | | file_protected int |
719 | | file_regcomp(struct magic_set *ms file_locale_used, file_regex_t *rx, |
720 | | const char *pat, int flags) |
721 | 22.7k | { |
722 | 22.7k | if (check_regex(ms, pat) == -1) |
723 | 0 | return -1; |
724 | | |
725 | 22.7k | #ifdef USE_C_LOCALE |
726 | 22.7k | locale_t old = uselocale(ms->c_lc_ctype); |
727 | 22.7k | assert(old != NULL); |
728 | | #else |
729 | | char old[1024]; |
730 | | strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); |
731 | | (void)setlocale(LC_CTYPE, "C"); |
732 | | #endif |
733 | 22.7k | int rc; |
734 | 22.7k | rc = regcomp(rx, pat, flags); |
735 | | |
736 | 22.7k | #ifdef USE_C_LOCALE |
737 | 22.7k | uselocale(old); |
738 | | #else |
739 | | (void)setlocale(LC_CTYPE, old); |
740 | | #endif |
741 | 22.7k | if (rc > 0 && (ms->flags & MAGIC_CHECK)) { |
742 | 0 | char errmsg[512], buf[512]; |
743 | |
|
744 | 0 | (void)regerror(rc, rx, errmsg, sizeof(errmsg)); |
745 | 0 | file_magerror(ms, "regex error %d for `%s', (%s)", rc, |
746 | 0 | file_printable(ms, buf, sizeof(buf), pat, strlen(pat)), |
747 | 0 | errmsg); |
748 | 0 | } |
749 | 22.7k | return rc; |
750 | 22.7k | } |
751 | | |
752 | | /*ARGSUSED*/ |
753 | | file_protected int |
754 | | file_regexec(struct magic_set *ms file_locale_used, file_regex_t *rx, |
755 | | const char *str, size_t nmatch, regmatch_t* pmatch, int eflags) |
756 | 182k | { |
757 | 182k | #ifdef USE_C_LOCALE |
758 | 182k | locale_t old = uselocale(ms->c_lc_ctype); |
759 | 182k | assert(old != NULL); |
760 | | #else |
761 | | char old[1024]; |
762 | | strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); |
763 | | (void)setlocale(LC_CTYPE, "C"); |
764 | | #endif |
765 | 182k | int rc; |
766 | | /* XXX: force initialization because glibc does not always do this */ |
767 | 182k | if (nmatch != 0) |
768 | 161k | memset(pmatch, 0, nmatch * sizeof(*pmatch)); |
769 | 182k | rc = regexec(rx, str, nmatch, pmatch, eflags); |
770 | 182k | #ifdef USE_C_LOCALE |
771 | 182k | uselocale(old); |
772 | | #else |
773 | | (void)setlocale(LC_CTYPE, old); |
774 | | #endif |
775 | 182k | return rc; |
776 | 182k | } |
777 | | |
778 | | file_protected void |
779 | | file_regfree(file_regex_t *rx) |
780 | 22.6k | { |
781 | 22.6k | regfree(rx); |
782 | 22.6k | } |
783 | | |
784 | | file_protected file_pushbuf_t * |
785 | | file_push_buffer(struct magic_set *ms) |
786 | 22.3k | { |
787 | 22.3k | file_pushbuf_t *pb; |
788 | | |
789 | 22.3k | if (ms->event_flags & EVENT_HAD_ERR) |
790 | 303 | return NULL; |
791 | | |
792 | 22.0k | if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) |
793 | 0 | return NULL; |
794 | | |
795 | 22.0k | pb->buf = ms->o.buf; |
796 | 22.0k | pb->blen = ms->o.blen; |
797 | 22.0k | pb->offset = ms->offset; |
798 | | |
799 | 22.0k | ms->o.buf = NULL; |
800 | 22.0k | ms->o.blen = 0; |
801 | 22.0k | ms->offset = 0; |
802 | | |
803 | 22.0k | return pb; |
804 | 22.0k | } |
805 | | |
806 | | file_protected char * |
807 | | file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) |
808 | 22.0k | { |
809 | 22.0k | char *rbuf; |
810 | | |
811 | 22.0k | if (ms->event_flags & EVENT_HAD_ERR) { |
812 | 1.15k | free(pb->buf); |
813 | 1.15k | free(pb); |
814 | 1.15k | return NULL; |
815 | 1.15k | } |
816 | | |
817 | 20.9k | rbuf = ms->o.buf; |
818 | | |
819 | 20.9k | ms->o.buf = pb->buf; |
820 | 20.9k | ms->o.blen = pb->blen; |
821 | 20.9k | ms->offset = pb->offset; |
822 | | |
823 | 20.9k | free(pb); |
824 | 20.9k | return rbuf; |
825 | 22.0k | } |
826 | | |
827 | | /* |
828 | | * convert string to ascii printable format. |
829 | | */ |
830 | | file_protected char * |
831 | | file_printable(struct magic_set *ms, char *buf, size_t bufsiz, |
832 | | const char *str, size_t slen) |
833 | 41.6k | { |
834 | 41.6k | char *ptr, *eptr = buf + bufsiz - 1; |
835 | 41.6k | const unsigned char *s = RCAST(const unsigned char *, str); |
836 | 41.6k | const unsigned char *es = s + slen; |
837 | | |
838 | 270k | for (ptr = buf; ptr < eptr && s < es && *s; s++) { |
839 | 228k | if ((ms->flags & MAGIC_RAW) != 0 || isprint(*s)) { |
840 | 125k | *ptr++ = *s; |
841 | 125k | continue; |
842 | 125k | } |
843 | 103k | if (ptr >= eptr - 3) |
844 | 73 | break; |
845 | 103k | *ptr++ = '\\'; |
846 | 103k | *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; |
847 | 103k | *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; |
848 | 103k | *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; |
849 | 103k | } |
850 | 41.6k | *ptr = '\0'; |
851 | 41.6k | return buf; |
852 | 41.6k | } |
853 | | |
854 | | struct guid { |
855 | | uint32_t data1; |
856 | | uint16_t data2; |
857 | | uint16_t data3; |
858 | | uint8_t data4[8]; |
859 | | }; |
860 | | |
861 | | file_protected int |
862 | | file_parse_guid(const char *s, uint64_t *guid) |
863 | 0 | { |
864 | 0 | struct guid *g = CAST(struct guid *, CAST(void *, guid)); |
865 | 0 | #ifndef WIN32 |
866 | 0 | return sscanf(s, |
867 | 0 | "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", |
868 | 0 | &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1], |
869 | 0 | &g->data4[2], &g->data4[3], &g->data4[4], &g->data4[5], |
870 | 0 | &g->data4[6], &g->data4[7]) == 11 ? 0 : -1; |
871 | | #else |
872 | | /* MS-Windows runtime doesn't support %hhx, except under |
873 | | non-default __USE_MINGW_ANSI_STDIO. */ |
874 | | uint16_t data16[8]; |
875 | | int rv = sscanf(s, "%8x-%4hx-%4hx-%2hx%2hx-%2hx%2hx%2hx%2hx%2hx%2hx", |
876 | | &g->data1, &g->data2, &g->data3, &data16[0], &data16[1], |
877 | | &data16[2], &data16[3], &data16[4], &data16[5], |
878 | | &data16[6], &data16[7]) == 11 ? 0 : -1; |
879 | | int i; |
880 | | for (i = 0; i < 8; i++) |
881 | | g->data4[i] = data16[i]; |
882 | | return rv; |
883 | | #endif |
884 | 0 | } |
885 | | |
886 | | file_protected int |
887 | | file_print_guid(char *str, size_t len, const uint64_t *guid) |
888 | 410 | { |
889 | 410 | const struct guid *g = CAST(const struct guid *, |
890 | 410 | CAST(const void *, guid)); |
891 | | |
892 | 410 | #ifndef WIN32 |
893 | 410 | return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-" |
894 | 410 | "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX", |
895 | 410 | g->data1, g->data2, g->data3, g->data4[0], g->data4[1], |
896 | 410 | g->data4[2], g->data4[3], g->data4[4], g->data4[5], |
897 | 410 | g->data4[6], g->data4[7]); |
898 | | #else |
899 | | return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hX%.2hX-" |
900 | | "%.2hX%.2hX%.2hX%.2hX%.2hX%.2hX", |
901 | | g->data1, g->data2, g->data3, g->data4[0], g->data4[1], |
902 | | g->data4[2], g->data4[3], g->data4[4], g->data4[5], |
903 | | g->data4[6], g->data4[7]); |
904 | | #endif |
905 | 410 | } |
906 | | |
907 | | file_protected int |
908 | | file_pipe_closexec(int *fds) |
909 | 1.76k | { |
910 | | #ifdef __MINGW32__ |
911 | | return 0; |
912 | | #elif defined(HAVE_PIPE2) |
913 | 1.76k | return pipe2(fds, O_CLOEXEC); |
914 | | #else |
915 | | if (pipe(fds) == -1) |
916 | | return -1; |
917 | | # ifdef F_SETFD |
918 | | (void)fcntl(fds[0], F_SETFD, FD_CLOEXEC); |
919 | | (void)fcntl(fds[1], F_SETFD, FD_CLOEXEC); |
920 | | # endif |
921 | | return 0; |
922 | | #endif |
923 | 1.76k | } |
924 | | |
925 | | file_protected int |
926 | 2.64k | file_clear_closexec(int fd) { |
927 | 2.64k | #ifdef F_SETFD |
928 | 2.64k | return fcntl(fd, F_SETFD, 0); |
929 | | #else |
930 | | return 0; |
931 | | #endif |
932 | 2.64k | } |
933 | | |
934 | | file_protected char * |
935 | | file_strtrim(char *str) |
936 | 428 | { |
937 | 428 | char *last; |
938 | | |
939 | 428 | while (isspace(CAST(unsigned char, *str))) |
940 | 281 | str++; |
941 | 428 | last = str; |
942 | 7.12k | while (*last) |
943 | 6.69k | last++; |
944 | 428 | --last; |
945 | 428 | while (isspace(CAST(unsigned char, *last))) |
946 | 324 | last--; |
947 | 428 | *++last = '\0'; |
948 | 428 | return str; |
949 | 428 | } |