Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Copyright (c) 2019 Christos Zoulas |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
15 | | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
16 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
18 | | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
19 | | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
20 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
21 | | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
22 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
23 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
24 | | * POSSIBILITY OF SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | /* |
28 | | * Parse CSV object serialization format (RFC-4180, RFC-7111) |
29 | | */ |
30 | | |
31 | | #ifndef TEST |
32 | | #include "file.h" |
33 | | |
34 | | #ifndef lint |
35 | | FILE_RCSID("@(#)$File: is_csv.c,v 1.13 2023/07/17 16:08:17 christos Exp $") |
36 | | #endif |
37 | | |
38 | | #include <string.h> |
39 | | #include "magic.h" |
40 | | #else |
41 | | #include <sys/types.h> |
42 | | #endif |
43 | | |
44 | | |
45 | | #ifdef DEBUG |
46 | | #include <stdio.h> |
47 | | #define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__) |
48 | | #else |
49 | | #define DPRINTF(fmt, ...) |
50 | | #endif |
51 | | |
52 | | /* |
53 | | * if CSV_LINES == 0: |
54 | | * check all the lines in the buffer |
55 | | * otherwise: |
56 | | * check only up-to the number of lines specified |
57 | | * |
58 | | * the last line count is always ignored if it does not end in CRLF |
59 | | */ |
60 | | #ifndef CSV_LINES |
61 | 238 | #define CSV_LINES 10 |
62 | | #endif |
63 | | |
64 | | static int csv_parse(const unsigned char *, const unsigned char *); |
65 | | |
66 | | static const unsigned char * |
67 | | eatquote(const unsigned char *uc, const unsigned char *ue) |
68 | 6.30k | { |
69 | 6.30k | int quote = 0; |
70 | | |
71 | 7.63M | while (uc < ue) { |
72 | 7.63M | unsigned char c = *uc++; |
73 | 7.63M | if (c != '"') { |
74 | | // We already got one, done. |
75 | 242k | if (quote) { |
76 | 6.13k | return --uc; |
77 | 6.13k | } |
78 | 236k | continue; |
79 | 242k | } |
80 | 7.38M | if (quote) { |
81 | | // quote-quote escapes |
82 | 3.69M | quote = 0; |
83 | 3.69M | continue; |
84 | 3.69M | } |
85 | | // first quote |
86 | 3.69M | quote = 1; |
87 | 3.69M | } |
88 | 167 | return ue; |
89 | 6.30k | } |
90 | | |
91 | | static int |
92 | | csv_parse(const unsigned char *uc, const unsigned char *ue) |
93 | 1.50k | { |
94 | 1.50k | size_t nf = 0, tf = 0, nl = 0; |
95 | | |
96 | 53.9M | while (uc < ue) { |
97 | 53.9M | switch (*uc++) { |
98 | 6.30k | case '"': |
99 | | // Eat until the matching quote |
100 | 6.30k | uc = eatquote(uc, ue); |
101 | 6.30k | break; |
102 | 2.93M | case ',': |
103 | 2.93M | nf++; |
104 | 2.93M | break; |
105 | 238 | case '\n': |
106 | 238 | DPRINTF("%zu %zu %zu\n", nl, nf, tf); |
107 | 238 | nl++; |
108 | 238 | #if CSV_LINES |
109 | 238 | if (nl == CSV_LINES) |
110 | 1 | return tf != 0 && tf == nf; |
111 | 237 | #endif |
112 | 237 | if (tf == 0) { |
113 | | // First time and no fields, give up |
114 | 184 | if (nf == 0) |
115 | 134 | return 0; |
116 | | // First time, set the number of fields |
117 | 50 | tf = nf; |
118 | 53 | } else if (tf != nf) { |
119 | | // Field number mismatch, we are done. |
120 | 31 | return 0; |
121 | 31 | } |
122 | 72 | nf = 0; |
123 | 72 | break; |
124 | 51.0M | default: |
125 | 51.0M | break; |
126 | 53.9M | } |
127 | 53.9M | } |
128 | 1.34k | return tf && nl >= 2; |
129 | 1.50k | } |
130 | | |
131 | | #ifndef TEST |
132 | | int |
133 | | file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text, |
134 | | const char *code) |
135 | 9.75k | { |
136 | 9.75k | const unsigned char *uc = CAST(const unsigned char *, b->fbuf); |
137 | 9.75k | const unsigned char *ue = uc + b->flen; |
138 | 9.75k | int mime = ms->flags & MAGIC_MIME; |
139 | | |
140 | 9.75k | if (!looks_text) |
141 | 8.25k | return 0; |
142 | | |
143 | 1.50k | if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) |
144 | 0 | return 0; |
145 | | |
146 | 1.50k | if (!csv_parse(uc, ue)) |
147 | 1.50k | return 0; |
148 | | |
149 | 7 | if (mime == MAGIC_MIME_ENCODING) |
150 | 0 | return 1; |
151 | | |
152 | 7 | if (mime) { |
153 | 0 | if (file_printf(ms, "text/csv") == -1) |
154 | 0 | return -1; |
155 | 0 | return 1; |
156 | 0 | } |
157 | | |
158 | 7 | if (file_printf(ms, "CSV %s%stext", code ? code : "", |
159 | 7 | code ? " " : "") == -1) |
160 | 0 | return -1; |
161 | | |
162 | 7 | return 1; |
163 | 7 | } |
164 | | |
165 | | #else |
166 | | |
167 | | #include <sys/types.h> |
168 | | #include <sys/stat.h> |
169 | | #include <stdio.h> |
170 | | #include <fcntl.h> |
171 | | #include <unistd.h> |
172 | | #include <stdlib.h> |
173 | | #include <stdint.h> |
174 | | #include <err.h> |
175 | | |
176 | | int |
177 | | main(int argc, char *argv[]) |
178 | | { |
179 | | int fd; |
180 | | struct stat st; |
181 | | unsigned char *p; |
182 | | |
183 | | if ((fd = open(argv[1], O_RDONLY)) == -1) |
184 | | err(EXIT_FAILURE, "Can't open `%s'", argv[1]); |
185 | | |
186 | | if (fstat(fd, &st) == -1) |
187 | | err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); |
188 | | |
189 | | if ((p = CAST(char *, malloc(st.st_size))) == NULL) |
190 | | err(EXIT_FAILURE, "Can't allocate %jd bytes", |
191 | | (intmax_t)st.st_size); |
192 | | if (read(fd, p, st.st_size) != st.st_size) |
193 | | err(EXIT_FAILURE, "Can't read %jd bytes", |
194 | | (intmax_t)st.st_size); |
195 | | printf("is csv %d\n", csv_parse(p, p + st.st_size)); |
196 | | return 0; |
197 | | } |
198 | | #endif |