Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2017-2019 OpenSIPS Solutions |
3 | | * |
4 | | * This file is part of opensips, a free SIP server. |
5 | | * |
6 | | * opensips is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU General Public License as published by |
8 | | * the Free Software Foundation; either version 2 of the License, or |
9 | | * (at your option) any later version |
10 | | * |
11 | | * opensips is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU General Public License |
17 | | * along with this program; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include <string.h> |
22 | | #include <stdio.h> |
23 | | |
24 | | #include "../mem/mem.h" |
25 | | #include "../trim.h" |
26 | | |
27 | | #include "csv.h" |
28 | | |
29 | | static osips_malloc_t malloc_f; |
30 | | static osips_free_t free_f; |
31 | | |
32 | | static str_list *push_csv_field(const str *field, |
33 | | str_list **record, enum csv_flags parse_flags) |
34 | 7.50k | { |
35 | 7.50k | str_list *rec; |
36 | 7.50k | enum csv_flags *flags_holder; |
37 | 7.50k | int len; |
38 | | |
39 | 7.50k | len = sizeof *rec; |
40 | 7.50k | if (!*record) |
41 | 7.50k | len += sizeof *flags_holder; |
42 | | |
43 | 7.50k | rec = malloc_f(len); |
44 | 7.50k | if (!rec) { |
45 | 0 | LM_ERR("oom\n"); |
46 | 0 | return NULL; |
47 | 0 | } |
48 | | |
49 | 7.50k | memset(rec, 0, len); |
50 | | |
51 | 7.50k | if (parse_flags & CSV_DUP_FIELDS) { |
52 | 3.14k | rec->s.s = malloc_f(field->len + 1); |
53 | 3.14k | if (!rec->s.s) { |
54 | 0 | free_f(rec); |
55 | 0 | LM_ERR("oom\n"); |
56 | 0 | return NULL; |
57 | 0 | } |
58 | 3.14k | memcpy(rec->s.s, field->s, field->len); |
59 | 3.14k | rec->s.len = field->len; |
60 | 3.14k | rec->s.s[field->len] = '\0'; |
61 | 4.35k | } else { |
62 | 4.35k | rec->s = *field; |
63 | 4.35k | } |
64 | | |
65 | 7.50k | if (!*record) { |
66 | 7.50k | flags_holder = (enum csv_flags *)(rec + 1); |
67 | 7.50k | *flags_holder = parse_flags; |
68 | 7.50k | *record = rec; |
69 | 7.50k | } else { |
70 | 0 | (*record)->next = rec; |
71 | 0 | } |
72 | | |
73 | 7.50k | return rec; |
74 | 7.50k | } |
75 | | |
76 | | csv_record *__parse_csv_record(const str *_in, enum csv_flags parse_flags, |
77 | | unsigned char sep) |
78 | 369 | { |
79 | 369 | str_list *record = NULL, **last = &record; |
80 | 369 | str in = *_in, field; |
81 | 369 | char *ch, *p, *c, finished, *lim, *field_start; |
82 | | |
83 | 369 | if (parse_flags & CSV_SHM) { |
84 | 0 | malloc_f = osips_shm_malloc; |
85 | 0 | free_f = osips_shm_free; |
86 | 369 | } else { |
87 | 369 | malloc_f = osips_pkg_malloc; |
88 | 369 | free_f = osips_pkg_free; |
89 | 369 | } |
90 | | |
91 | 369 | if (parse_flags & CSV_RFC_4180) |
92 | 240 | goto rfc_4180_parsing; |
93 | | |
94 | 129 | trim(&in); |
95 | | |
96 | 4.48k | for (finished = 0; !finished; ) { |
97 | 4.35k | ch = memchr(in.s, sep, in.len); |
98 | 4.35k | if (!ch) { |
99 | 129 | ch = in.s + in.len; |
100 | 129 | finished = 1; |
101 | 129 | } |
102 | | |
103 | 4.35k | field.s = in.s; |
104 | 4.35k | field.len = ch - in.s; |
105 | 4.35k | in.s += field.len + 1; |
106 | 4.35k | in.len -= field.len + 1; |
107 | 4.35k | trim(&field); |
108 | | |
109 | 4.35k | if (!push_csv_field(&field, last, parse_flags)) |
110 | 0 | goto oom; |
111 | | |
112 | 4.35k | last = &(*last)->next; |
113 | 4.35k | } |
114 | | |
115 | 129 | return record; |
116 | | |
117 | 240 | rfc_4180_parsing: |
118 | 240 | parse_flags |= CSV_DUP_FIELDS; |
119 | | |
120 | 240 | if (in.len >= 2 && in.s[in.len - 2] == '\r' && in.s[in.len - 1] == '\n') |
121 | 1 | in.len -= 2; |
122 | | |
123 | 240 | field_start = NULL; |
124 | 7.54k | for (ch = in.s, lim = in.s + in.len; ch < lim; ch++) { |
125 | 7.49k | if (*ch < 0x20 || *ch > 0x7E) |
126 | 51 | goto bad_csv_str; |
127 | | |
128 | 7.44k | switch (*ch) { |
129 | 1.50k | case ',': |
130 | 1.50k | if (field_start) |
131 | 1.44k | field.s = field_start; |
132 | 61 | else |
133 | 61 | field.s = in.s; |
134 | | |
135 | 1.50k | field.len = ch - field.s; |
136 | 1.50k | field_start = ch + 1; |
137 | | |
138 | 1.50k | if (!push_csv_field(&field, last, parse_flags)) |
139 | 0 | goto oom; |
140 | | |
141 | 1.50k | last = &(*last)->next; |
142 | 1.50k | break; |
143 | | |
144 | 2.37k | case '"': |
145 | 2.37k | if ((field_start && ch != field_start) || |
146 | 2.37k | (!field_start && ch != in.s)) |
147 | 696 | continue; |
148 | | |
149 | 259k | for (p = ch + 1; p < lim; p++) { |
150 | 259k | if (*p == '"') { |
151 | 2.40k | if (p == lim - 1 || *(p + 1) != '"') |
152 | 1.62k | goto matched_quote; |
153 | | |
154 | 779 | p++; |
155 | 779 | continue; |
156 | 2.40k | } |
157 | 259k | } |
158 | | |
159 | 48 | goto bad_csv_str; |
160 | | |
161 | 1.62k | matched_quote: |
162 | 1.62k | field.s = malloc_f(p - ch); |
163 | 1.62k | if (!field.s) |
164 | 0 | goto oom; |
165 | | |
166 | 258k | for (c = field.s; ++ch < p; c++) { |
167 | 256k | if (*ch == '"') |
168 | 585 | ch++; |
169 | 256k | *c = *ch; |
170 | 256k | } |
171 | | |
172 | 1.62k | if (ch < lim - 1) { |
173 | 1.58k | if (*(ch + 1) != ',') { |
174 | 34 | free_f(field.s); |
175 | 34 | goto bad_csv_str; |
176 | 34 | } |
177 | 1.54k | ch++; |
178 | 1.54k | field_start = ch + 1; |
179 | 1.54k | } |
180 | | |
181 | 1.59k | *c = '\0'; |
182 | 1.59k | field.len = c - field.s; |
183 | | |
184 | 1.59k | if (!push_csv_field(&field, last, parse_flags)) { |
185 | 0 | free_f(field.s); |
186 | 0 | goto oom; |
187 | 0 | } |
188 | | |
189 | 1.59k | free_f(field.s); |
190 | 1.59k | last = &(*last)->next; |
191 | | |
192 | 1.59k | if (ch == lim - 1) |
193 | 55 | return record; |
194 | | |
195 | 1.53k | break; |
196 | 7.44k | } |
197 | 7.44k | } |
198 | | |
199 | 52 | if (field_start) { |
200 | 33 | field.s = field_start; |
201 | 33 | field.len = lim - field.s; |
202 | 33 | } else { |
203 | 19 | field = in; |
204 | 19 | } |
205 | | |
206 | 52 | if (!push_csv_field(&field, last, parse_flags)) |
207 | 0 | goto oom; |
208 | | |
209 | 52 | return record; |
210 | | |
211 | 133 | bad_csv_str: |
212 | 133 | LM_DBG("invalid CSV string: '%.*s'\n", in.len, in.s); |
213 | 133 | free_csv_record(record); |
214 | 133 | return NULL; |
215 | | |
216 | 0 | oom: |
217 | 0 | LM_ERR("oom while parsing '%.*s'\n", in.len, in.s); |
218 | 0 | free_csv_record(record); |
219 | 0 | return NULL; |
220 | 52 | } |
221 | | |
222 | | void free_csv_record(csv_record *record) |
223 | 502 | { |
224 | 502 | enum csv_flags flags_holder; |
225 | 502 | str_list *prev; |
226 | | |
227 | 502 | if (!record) |
228 | 220 | return; |
229 | | |
230 | 282 | flags_holder = *(enum csv_flags *)(record + 1); |
231 | 282 | if (flags_holder & CSV_SHM) |
232 | 0 | free_f = osips_shm_free; |
233 | 282 | else |
234 | 282 | free_f = osips_pkg_free; |
235 | | |
236 | 7.78k | while (record) { |
237 | 7.50k | prev = record; |
238 | 7.50k | record = record->next; |
239 | | |
240 | 7.50k | if (flags_holder & CSV_DUP_FIELDS) |
241 | 3.14k | free_f(prev->s.s); |
242 | | |
243 | 7.50k | free_f(prev); |
244 | 7.50k | } |
245 | 282 | } |
246 | | |
247 | | static int check_quote_csv_record(str *val, int *escape) |
248 | 0 | { |
249 | 0 | char *p; |
250 | 0 | int quote = 0; |
251 | 0 | *escape = 0; |
252 | |
|
253 | 0 | for (p = val->s; p < val->s + val->len; p++) { |
254 | 0 | switch (*p) { |
255 | 0 | case '"': |
256 | 0 | (*escape)++; |
257 | | /* fallthrough */ |
258 | 0 | case ',': |
259 | 0 | case '\n': |
260 | 0 | quote = 1; |
261 | 0 | break; |
262 | 0 | } |
263 | 0 | } |
264 | 0 | return quote; |
265 | 0 | } |
266 | | |
267 | | str *__print_csv_record(csv_record *record, enum csv_flags print_flags, |
268 | | unsigned char sep) |
269 | 0 | { |
270 | 0 | static str ret; |
271 | 0 | str_list *it; |
272 | 0 | int len = -1, esc; |
273 | 0 | char *p, *c; |
274 | |
|
275 | 0 | if (print_flags & CSV_SHM) |
276 | 0 | malloc_f = osips_shm_malloc; |
277 | 0 | else |
278 | 0 | malloc_f = osips_pkg_malloc; |
279 | |
|
280 | 0 | for (it = record; it; it = it->next) { |
281 | 0 | len += 1 /* sep */ + it->s.len; |
282 | | /* check to see if ne need to encode */ |
283 | 0 | if (check_quote_csv_record(&it->s, &esc)) |
284 | 0 | len += 2 + esc; |
285 | 0 | } |
286 | |
|
287 | 0 | ret.s = malloc_f(len); |
288 | 0 | if (!ret.s) |
289 | 0 | return NULL; |
290 | 0 | p = ret.s; |
291 | 0 | for (it = record; it; it = it->next) { |
292 | 0 | if (it != record) |
293 | 0 | *p++ = sep; |
294 | |
|
295 | 0 | if (check_quote_csv_record(&it->s, &esc)) { |
296 | 0 | if (!esc) { |
297 | | /* simply add the quotes */ |
298 | 0 | *p++ = '"'; |
299 | 0 | memcpy(p, it->s.s, it->s.len); |
300 | 0 | p+= it->s.len; |
301 | 0 | *p++ = '"'; |
302 | 0 | } else { |
303 | 0 | for (c = it->s.s; c < it->s.s + it->s.len; c++) { |
304 | 0 | switch (*c) { |
305 | 0 | case '"': |
306 | 0 | *p++ = '"'; |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | *p++ = *c; |
310 | 0 | } |
311 | 0 | } |
312 | 0 | } else { |
313 | | /* simply copy the content */ |
314 | 0 | memcpy(p, it->s.s, it->s.len); |
315 | 0 | p += it->s.len; |
316 | 0 | } |
317 | 0 | } |
318 | 0 | ret.len = len; |
319 | 0 | return &ret; |
320 | 0 | } |