/src/libvips/libvips/iofuncs/sbuf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Buffered input from a source. |
2 | | * |
3 | | * J.Cupitt, 18/11/19 |
4 | | */ |
5 | | |
6 | | /* |
7 | | |
8 | | This file is part of VIPS. |
9 | | |
10 | | VIPS is free software; you can redistribute it and/or modify |
11 | | it under the terms of the GNU Lesser General Public License as published by |
12 | | the Free Software Foundation; either version 2 of the License, or |
13 | | (at your option) any later version. |
14 | | |
15 | | This program is distributed in the hope that it will be useful, |
16 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | GNU Lesser General Public License for more details. |
19 | | |
20 | | You should have received a copy of the GNU Lesser General Public License |
21 | | along with this program; if not, write to the Free Software |
22 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
23 | | 02110-1301 USA |
24 | | |
25 | | */ |
26 | | |
27 | | /* |
28 | | |
29 | | These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk |
30 | | |
31 | | */ |
32 | | |
33 | | /* |
34 | | #define VIPS_DEBUG |
35 | | */ |
36 | | |
37 | | #ifdef HAVE_CONFIG_H |
38 | | #include <config.h> |
39 | | #endif /*HAVE_CONFIG_H*/ |
40 | | #include <glib/gi18n-lib.h> |
41 | | |
42 | | #include <stdio.h> |
43 | | #include <ctype.h> |
44 | | #include <stdlib.h> |
45 | | #ifdef HAVE_UNISTD_H |
46 | | #include <unistd.h> |
47 | | #endif /*HAVE_UNISTD_H*/ |
48 | | #include <string.h> |
49 | | #include <errno.h> |
50 | | #include <sys/types.h> |
51 | | #include <sys/stat.h> |
52 | | #include <fcntl.h> |
53 | | |
54 | | #include <vips/vips.h> |
55 | | #include <vips/internal.h> |
56 | | #include <vips/debug.h> |
57 | | |
58 | | /** |
59 | | * SECTION: sbuf |
60 | | * @short_description: buffered read from a source |
61 | | * @stability: Stable |
62 | | * @see_also: <link linkend="libvips-foreign">foreign</link> |
63 | | * @include: vips/vips.h |
64 | | * @title: VipsSbuf |
65 | | * |
66 | | * #VipsSbuf wraps up a #VipsSource and provides a set of calls for |
67 | | * text-oriented buffered reading. You can fetch lines of text, skip |
68 | | * whitespace, and so on. |
69 | | * |
70 | | * It is useful for implementing things like CSV readers, for example. |
71 | | */ |
72 | | |
73 | | G_DEFINE_TYPE(VipsSbuf, vips_sbuf, VIPS_TYPE_OBJECT); |
74 | | |
75 | | static void |
76 | | vips_sbuf_class_init(VipsSbufClass *class) |
77 | 1 | { |
78 | 1 | VipsObjectClass *object_class = VIPS_OBJECT_CLASS(class); |
79 | 1 | GObjectClass *gobject_class = G_OBJECT_CLASS(class); |
80 | | |
81 | 1 | gobject_class->set_property = vips_object_set_property; |
82 | 1 | gobject_class->get_property = vips_object_get_property; |
83 | | |
84 | 1 | object_class->nickname = "sbuf"; |
85 | 1 | object_class->description = _("buffered source"); |
86 | | |
87 | 1 | VIPS_ARG_OBJECT(class, "input", 1, |
88 | 1 | _("Input"), |
89 | 1 | _("Source to load from"), |
90 | 1 | VIPS_ARGUMENT_REQUIRED_INPUT, |
91 | 1 | G_STRUCT_OFFSET(VipsSbuf, source), |
92 | 1 | VIPS_TYPE_SOURCE); |
93 | 1 | } |
94 | | |
95 | | static void |
96 | | vips_sbuf_init(VipsSbuf *sbuf) |
97 | 7.41k | { |
98 | 7.41k | sbuf->read_point = 0; |
99 | 7.41k | sbuf->chars_in_buffer = 0; |
100 | 7.41k | sbuf->input_buffer[0] = '\0'; |
101 | 7.41k | } |
102 | | |
103 | | /** |
104 | | * vips_sbuf_new_from_source: |
105 | | * @source: source to operate on |
106 | | * |
107 | | * Create a VipsSbuf wrapping a source. |
108 | | * |
109 | | * Returns: a new #VipsSbuf |
110 | | */ |
111 | | VipsSbuf * |
112 | | vips_sbuf_new_from_source(VipsSource *source) |
113 | 7.41k | { |
114 | 7.41k | VipsSbuf *sbuf; |
115 | | |
116 | 7.41k | g_assert(source); |
117 | | |
118 | 7.41k | sbuf = VIPS_SBUF(g_object_new(VIPS_TYPE_SBUF, |
119 | 7.41k | "input", source, |
120 | 7.41k | NULL)); |
121 | | |
122 | 7.41k | if (vips_object_build(VIPS_OBJECT(sbuf))) { |
123 | 0 | VIPS_UNREF(sbuf); |
124 | 0 | return NULL; |
125 | 0 | } |
126 | | |
127 | 7.41k | return sbuf; |
128 | 7.41k | } |
129 | | |
130 | | /** |
131 | | * vips_sbuf_unbuffer: |
132 | | * @sbuf: source to operate on |
133 | | * |
134 | | * Discard the input buffer and reset the read point. You must call this |
135 | | * before using read or seek on the underlying #VipsSource class. |
136 | | */ |
137 | | void |
138 | | vips_sbuf_unbuffer(VipsSbuf *sbuf) |
139 | 0 | { |
140 | | /* We'd read ahead a little way -- seek backwards by that amount. |
141 | | */ |
142 | 0 | vips_source_seek(sbuf->source, |
143 | 0 | sbuf->read_point - sbuf->chars_in_buffer, SEEK_CUR); |
144 | 0 | sbuf->read_point = 0; |
145 | 0 | sbuf->chars_in_buffer = 0; |
146 | 0 | } |
147 | | |
148 | | /* Returns -1 on error, 0 on EOF, otherwise bytes read. |
149 | | */ |
150 | | static gint64 |
151 | | vips_sbuf_refill(VipsSbuf *sbuf) |
152 | 11.3k | { |
153 | 11.3k | gint64 bytes_read; |
154 | | |
155 | 11.3k | VIPS_DEBUG_MSG("vips_sbuf_refill:\n"); |
156 | | |
157 | | /* We should not discard any unread bytes. |
158 | | */ |
159 | 11.3k | g_assert(sbuf->read_point == sbuf->chars_in_buffer); |
160 | | |
161 | 11.3k | bytes_read = vips_source_read(sbuf->source, |
162 | 11.3k | sbuf->input_buffer, VIPS_SBUF_BUFFER_SIZE); |
163 | 11.3k | if (bytes_read == -1) |
164 | 0 | return -1; |
165 | | |
166 | 11.3k | sbuf->read_point = 0; |
167 | 11.3k | sbuf->chars_in_buffer = bytes_read; |
168 | | |
169 | | /* Always add a null byte so we can use strchr() etc. on lines. This is |
170 | | * safe because input_buffer is VIPS_SBUF_BUFFER_SIZE + 1 bytes. |
171 | | */ |
172 | 11.3k | sbuf->input_buffer[bytes_read] = '\0'; |
173 | | |
174 | 11.3k | return bytes_read; |
175 | 11.3k | } |
176 | | |
177 | | /** |
178 | | * vips_sbuf_getc: |
179 | | * @sbuf: source to operate on |
180 | | * |
181 | | * Fetch the next character from the source. |
182 | | * |
183 | | * If you can, use the macro VIPS_SBUF_GETC() instead for speed. |
184 | | * |
185 | | * Returns: the next char from @sbuf, -1 on read error or EOF. |
186 | | */ |
187 | | int |
188 | | vips_sbuf_getc(VipsSbuf *sbuf) |
189 | 11.3k | { |
190 | 11.3k | if (sbuf->read_point == sbuf->chars_in_buffer && |
191 | 11.3k | vips_sbuf_refill(sbuf) <= 0) |
192 | 2.40k | return -1; |
193 | | |
194 | 11.3k | g_assert(sbuf->read_point < sbuf->chars_in_buffer); |
195 | | |
196 | 8.95k | return sbuf->input_buffer[sbuf->read_point++]; |
197 | 11.3k | } |
198 | | |
199 | | /** |
200 | | * VIPS_SBUF_GETC: |
201 | | * @sbuf: source to operate on |
202 | | * |
203 | | * Fetch the next character from the source. |
204 | | * |
205 | | * Returns: the next char from @sbuf, -1 on read error or EOF. |
206 | | */ |
207 | | |
208 | | /** |
209 | | * vips_sbuf_ungetc: |
210 | | * @sbuf: source to operate on |
211 | | * |
212 | | * The opposite of vips_sbuf_getc(): undo the previous getc. |
213 | | * |
214 | | * unget more than one character is undefined. Unget at the start of the file |
215 | | * does nothing. |
216 | | * |
217 | | * If you can, use the macro VIPS_SBUF_UNGETC() instead for speed. |
218 | | */ |
219 | | void |
220 | | vips_sbuf_ungetc(VipsSbuf *sbuf) |
221 | 0 | { |
222 | 0 | if (sbuf->read_point > 0) |
223 | 0 | sbuf->read_point -= 1; |
224 | 0 | } |
225 | | |
226 | | /** |
227 | | * VIPS_SBUF_UNGETC: |
228 | | * @sbuf: source to operate on |
229 | | * |
230 | | * The opposite of vips_sbuf_getc(): undo the previous getc. |
231 | | * |
232 | | * unget more than one character is undefined. Unget at the start of the file |
233 | | * does nothing. |
234 | | */ |
235 | | |
236 | | /** |
237 | | * vips_sbuf_require: |
238 | | * @sbuf: source to operate on |
239 | | * @require: make sure we have at least this many chars available |
240 | | * |
241 | | * Make sure there are at least @require bytes of readahead available. |
242 | | * |
243 | | * Returns: 0 on success, -1 on error or EOF. |
244 | | */ |
245 | | int |
246 | | vips_sbuf_require(VipsSbuf *sbuf, int require) |
247 | 218 | { |
248 | 218 | g_assert(require < VIPS_SBUF_BUFFER_SIZE); |
249 | 218 | g_assert(sbuf->chars_in_buffer >= 0); |
250 | 218 | g_assert(sbuf->chars_in_buffer <= VIPS_SBUF_BUFFER_SIZE); |
251 | 218 | g_assert(sbuf->read_point >= 0); |
252 | 218 | g_assert(sbuf->read_point <= sbuf->chars_in_buffer); |
253 | | |
254 | 218 | VIPS_DEBUG_MSG("vips_sbuf_require: %d\n", require); |
255 | | |
256 | 218 | if (sbuf->read_point + require > sbuf->chars_in_buffer) { |
257 | | /* Areas can overlap, so we must memmove(). |
258 | | */ |
259 | 218 | memmove(sbuf->input_buffer, |
260 | 218 | sbuf->input_buffer + sbuf->read_point, |
261 | 218 | sbuf->chars_in_buffer - sbuf->read_point); |
262 | 218 | sbuf->chars_in_buffer -= sbuf->read_point; |
263 | 218 | sbuf->read_point = 0; |
264 | | |
265 | 263 | while (require > sbuf->chars_in_buffer) { |
266 | 220 | unsigned char *to = sbuf->input_buffer + |
267 | 220 | sbuf->chars_in_buffer; |
268 | 220 | int space_available = |
269 | 220 | VIPS_SBUF_BUFFER_SIZE - |
270 | 220 | sbuf->chars_in_buffer; |
271 | 220 | gint64 bytes_read; |
272 | | |
273 | 220 | if ((bytes_read = vips_source_read(sbuf->source, |
274 | 220 | to, space_available)) < 0) |
275 | 0 | return -1; |
276 | 220 | if (bytes_read == 0) { |
277 | 175 | vips_error( |
278 | 175 | vips_connection_nick(VIPS_CONNECTION( |
279 | 175 | sbuf->source)), |
280 | 175 | "%s", _("end of file")); |
281 | 175 | return -1; |
282 | 175 | } |
283 | | |
284 | 45 | to[bytes_read] = '\0'; |
285 | 45 | sbuf->chars_in_buffer += bytes_read; |
286 | 45 | } |
287 | 218 | } |
288 | | |
289 | 43 | return 0; |
290 | 218 | } |
291 | | |
292 | | /** |
293 | | * VIPS_SBUF_REQUIRE: |
294 | | * @sbuf: source to operate on |
295 | | * @require: need this many characters |
296 | | * |
297 | | * Make sure at least @require characters are available for |
298 | | * VIPS_SBUF_PEEK() and VIPS_SBUF_FETCH(). |
299 | | * |
300 | | * Returns: 0 on success, -1 on read error or EOF. |
301 | | */ |
302 | | |
303 | | /** |
304 | | * VIPS_SBUF_PEEK: |
305 | | * @sbuf: source to operate on |
306 | | * |
307 | | * After a successful VIPS_SBUF_REQUIRE(), you can index this to get |
308 | | * require characters of input. |
309 | | * |
310 | | * Returns: a pointer to the next require characters of input. |
311 | | */ |
312 | | |
313 | | /** |
314 | | * VIPS_SBUF_FETCH: |
315 | | * @sbuf: source to operate on |
316 | | * |
317 | | * After a successful VIPS_SBUF_REQUIRE(), you can use this require times |
318 | | * to fetch characters of input. |
319 | | * |
320 | | * Returns: the next input character. |
321 | | */ |
322 | | |
323 | | /** |
324 | | * vips_sbuf_get_line: |
325 | | * @sbuf: source to operate on |
326 | | * |
327 | | * Fetch the next line of text from @sbuf and return it. The end of |
328 | | * line character (or characters, for DOS files) are removed, and the string |
329 | | * is terminated with a null (`\0` character). |
330 | | * |
331 | | * Returns NULL on end of file or read error. |
332 | | * |
333 | | * If the line is longer than some arbitrary (but large) limit, it is |
334 | | * truncated. If you need to be able to read very long lines, use the |
335 | | * slower vips_sbuf_get_line_copy(). |
336 | | * |
337 | | * The return value is owned by @sbuf and must not be freed. It |
338 | | * is valid until the next get call to @sbuf. |
339 | | * |
340 | | * Returns: the next line of text, or NULL on EOF or read error. |
341 | | */ |
342 | | const char * |
343 | | vips_sbuf_get_line(VipsSbuf *sbuf) |
344 | 19.1k | { |
345 | 19.1k | int write_point; |
346 | 19.1k | int space_remaining; |
347 | 19.1k | int ch; |
348 | | |
349 | 19.1k | VIPS_DEBUG_MSG("vips_sbuf_get_line:\n"); |
350 | | |
351 | 19.1k | write_point = 0; |
352 | 19.1k | space_remaining = VIPS_SBUF_BUFFER_SIZE; |
353 | | |
354 | 2.67M | while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 && |
355 | 2.67M | ch != '\n' && |
356 | 2.67M | space_remaining > 0) { |
357 | 2.65M | sbuf->line[write_point] = ch; |
358 | 2.65M | write_point += 1; |
359 | 2.65M | space_remaining -= 1; |
360 | 2.65M | } |
361 | 19.1k | sbuf->line[write_point] = '\0'; |
362 | | |
363 | | /* If we hit EOF immediately, return EOF. |
364 | | */ |
365 | 19.1k | if (ch == -1 && |
366 | 19.1k | write_point == 0) |
367 | 270 | return NULL; |
368 | | |
369 | | /* If the final char in the buffer is \r, this is probably a DOS file |
370 | | * and we should remove that too. |
371 | | * |
372 | | * There's a chance this could incorrectly remove \r in very long |
373 | | * lines, but ignore this. |
374 | | */ |
375 | 18.8k | if (write_point > 0 && |
376 | 18.8k | sbuf->line[write_point - 1] == '\r') |
377 | 750 | sbuf->line[write_point - 1] = '\0'; |
378 | | /* If we filled the output line without seeing \n, keep going to the |
379 | | * next \n. |
380 | | */ |
381 | 18.8k | if (ch != '\n' && |
382 | 18.8k | space_remaining == 0) { |
383 | 4.28M | while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 && |
384 | 4.28M | ch != '\n') |
385 | 4.28M | ; |
386 | 187 | } |
387 | | |
388 | 18.8k | VIPS_DEBUG_MSG(" %s\n", sbuf->line); |
389 | | |
390 | 18.8k | return (const char *) sbuf->line; |
391 | 19.1k | } |
392 | | |
393 | | /** |
394 | | * vips_sbuf_get_line_copy: |
395 | | * @sbuf: source to operate on |
396 | | * |
397 | | * Fetch the next line of text from @sbuf and return it. The end of |
398 | | * line character (or characters, for DOS files) are removed, and the string |
399 | | * is terminated with a null (`\0` character). |
400 | | * |
401 | | * The return result must be freed with g_free(). |
402 | | * |
403 | | * This is slower than vips_sbuf_get_line(), but can work with lines of |
404 | | * any length. |
405 | | * |
406 | | * Returns: the next line of text, or NULL on EOF or read error. |
407 | | */ |
408 | | char * |
409 | | vips_sbuf_get_line_copy(VipsSbuf *sbuf) |
410 | 0 | { |
411 | 0 | static const unsigned char null = '\0'; |
412 | |
|
413 | 0 | VIPS_DEBUG_MSG("vips_sbuf_get_line_copy:\n"); |
414 | |
|
415 | 0 | GByteArray *buffer; |
416 | 0 | int ch; |
417 | 0 | char *result; |
418 | |
|
419 | 0 | buffer = g_byte_array_new(); |
420 | |
|
421 | 0 | while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 && |
422 | 0 | ch != '\n') { |
423 | 0 | unsigned char c = ch; |
424 | |
|
425 | 0 | g_byte_array_append(buffer, &c, 1); |
426 | 0 | } |
427 | | |
428 | | /* Immediate EOF. |
429 | | */ |
430 | 0 | if (ch == -1 && |
431 | 0 | buffer->len == 0) { |
432 | 0 | VIPS_FREEF(g_byte_array_unref, buffer); |
433 | 0 | return NULL; |
434 | 0 | } |
435 | | |
436 | | /* If the character before the \n was \r, this is probably a DOS file |
437 | | * and we should remove the \r. |
438 | | */ |
439 | 0 | if (ch == '\n' && |
440 | 0 | buffer->len > 0 && |
441 | 0 | buffer->data[buffer->len - 1] == '\r') |
442 | 0 | g_byte_array_set_size(buffer, buffer->len - 1); |
443 | |
|
444 | 0 | g_byte_array_append(buffer, &null, 1); |
445 | |
|
446 | 0 | result = (char *) g_byte_array_free(buffer, FALSE); |
447 | |
|
448 | 0 | VIPS_DEBUG_MSG(" %s\n", result); |
449 | |
|
450 | 0 | return result; |
451 | 0 | } |
452 | | |
453 | | /** |
454 | | * vips_sbuf_get_non_whitespace: |
455 | | * @sbuf: source to operate on |
456 | | * |
457 | | * Fetch the next chunk of non-whitespace text from the source, and |
458 | | * null-terminate it. |
459 | | * |
460 | | * After this, the next getc will be the first char of the next block of |
461 | | * whitespace (or EOF). |
462 | | * |
463 | | * If the first getc is whitespace, stop instantly and return the empty |
464 | | * string. |
465 | | * |
466 | | * If the item is longer than some arbitrary (but large) limit, it is |
467 | | * truncated. |
468 | | * |
469 | | * The return value is owned by @sbuf and must not be freed. It |
470 | | * is valid until the next get call to @sbuf. |
471 | | * |
472 | | * Returns: the next block of non-whitespace, or NULL on EOF or read error. |
473 | | */ |
474 | | const char * |
475 | | vips_sbuf_get_non_whitespace(VipsSbuf *sbuf) |
476 | 0 | { |
477 | 0 | int ch; |
478 | 0 | int i; |
479 | |
|
480 | 0 | for (i = 0; i < VIPS_SBUF_BUFFER_SIZE && |
481 | 0 | !g_ascii_isspace(ch = VIPS_SBUF_GETC(sbuf)) && |
482 | 0 | ch != EOF; |
483 | 0 | i++) |
484 | 0 | sbuf->line[i] = ch; |
485 | 0 | sbuf->line[i] = '\0'; |
486 | | |
487 | | /* If we stopped before seeing any whitespace, skip to the end of the |
488 | | * block of non-whitespace. |
489 | | */ |
490 | 0 | if (!g_ascii_isspace(ch)) |
491 | 0 | while (!g_ascii_isspace(ch = VIPS_SBUF_GETC(sbuf)) && |
492 | 0 | ch != EOF) |
493 | 0 | ; |
494 | | |
495 | | /* If we finally stopped on whitespace, step back one so the next get |
496 | | * will be whitespace (or EOF). |
497 | | */ |
498 | 0 | if (g_ascii_isspace(ch)) |
499 | 0 | VIPS_SBUF_UNGETC(sbuf); |
500 | |
|
501 | 0 | return (const char *) sbuf->line; |
502 | 0 | } |
503 | | |
504 | | /** |
505 | | * vips_sbuf_skip_whitespace: |
506 | | * @sbuf: source to operate on |
507 | | * |
508 | | * After this, the next getc will be the first char of the next block of |
509 | | * non-whitespace (or EOF). |
510 | | * |
511 | | * Also skip comments, ie. from any '#' character to the end of the line. |
512 | | * |
513 | | * Returns: 0 on success, or -1 on EOF. |
514 | | */ |
515 | | int |
516 | | vips_sbuf_skip_whitespace(VipsSbuf *sbuf) |
517 | 0 | { |
518 | 0 | int ch; |
519 | |
|
520 | 0 | do { |
521 | 0 | ch = VIPS_SBUF_GETC(sbuf); |
522 | | |
523 | | /* # skip comments too. |
524 | | */ |
525 | 0 | while (ch == '#') { |
526 | | /* Probably EOF. |
527 | | */ |
528 | 0 | if (!vips_sbuf_get_line(sbuf)) |
529 | 0 | return -1; |
530 | 0 | ch = VIPS_SBUF_GETC(sbuf); |
531 | 0 | } |
532 | 0 | } while (g_ascii_isspace(ch)); |
533 | | |
534 | 0 | VIPS_SBUF_UNGETC(sbuf); |
535 | |
|
536 | 0 | return 0; |
537 | 0 | } |