Coverage Report

Created: 2025-01-28 06:31

/src/libvips/libvips/iofuncs/sbuf.c
Line
Count
Source (jump to first uncovered line)
1
/* Buffered input from a source.
2
 *
3
 * J.Cupitt, 18/11/19
4
 */
5
6
/*
7
8
  This file is part of VIPS.
9
10
  VIPS is free software; you can redistribute it and/or modify
11
  it under the terms of the GNU Lesser General Public License as published by
12
  the Free Software Foundation; either version 2 of the License, or
13
  (at your option) any later version.
14
15
  This program is distributed in the hope that it will be useful,
16
  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
  GNU Lesser General Public License for more details.
19
20
  You should have received a copy of the GNU Lesser General Public License
21
  along with this program; if not, write to the Free Software
22
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23
  02110-1301  USA
24
25
 */
26
27
/*
28
29
  These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
30
31
 */
32
33
/*
34
#define VIPS_DEBUG
35
 */
36
37
#ifdef HAVE_CONFIG_H
38
#include <config.h>
39
#endif /*HAVE_CONFIG_H*/
40
#include <glib/gi18n-lib.h>
41
42
#include <stdio.h>
43
#include <ctype.h>
44
#include <stdlib.h>
45
#ifdef HAVE_UNISTD_H
46
#include <unistd.h>
47
#endif /*HAVE_UNISTD_H*/
48
#include <string.h>
49
#include <errno.h>
50
#include <sys/types.h>
51
#include <sys/stat.h>
52
#include <fcntl.h>
53
54
#include <vips/vips.h>
55
#include <vips/internal.h>
56
#include <vips/debug.h>
57
58
/**
59
 * SECTION: sbuf
60
 * @short_description: buffered read from a source
61
 * @stability: Stable
62
 * @see_also: <link linkend="libvips-foreign">foreign</link>
63
 * @include: vips/vips.h
64
 * @title: VipsSbuf
65
 *
66
 * #VipsSbuf wraps up a #VipsSource and provides a set of calls for
67
 * text-oriented buffered reading. You can fetch lines of text, skip
68
 * whitespace, and so on.
69
 *
70
 * It is useful for implementing things like CSV readers, for example.
71
 */
72
73
G_DEFINE_TYPE(VipsSbuf, vips_sbuf, VIPS_TYPE_OBJECT);
74
75
static void
76
vips_sbuf_class_init(VipsSbufClass *class)
77
1
{
78
1
  VipsObjectClass *object_class = VIPS_OBJECT_CLASS(class);
79
1
  GObjectClass *gobject_class = G_OBJECT_CLASS(class);
80
81
1
  gobject_class->set_property = vips_object_set_property;
82
1
  gobject_class->get_property = vips_object_get_property;
83
84
1
  object_class->nickname = "sbuf";
85
1
  object_class->description = _("buffered source");
86
87
1
  VIPS_ARG_OBJECT(class, "input", 1,
88
1
    _("Input"),
89
1
    _("Source to load from"),
90
1
    VIPS_ARGUMENT_REQUIRED_INPUT,
91
1
    G_STRUCT_OFFSET(VipsSbuf, source),
92
1
    VIPS_TYPE_SOURCE);
93
1
}
94
95
static void
96
vips_sbuf_init(VipsSbuf *sbuf)
97
7.41k
{
98
7.41k
  sbuf->read_point = 0;
99
7.41k
  sbuf->chars_in_buffer = 0;
100
7.41k
  sbuf->input_buffer[0] = '\0';
101
7.41k
}
102
103
/**
104
 * vips_sbuf_new_from_source:
105
 * @source: source to operate on
106
 *
107
 * Create a VipsSbuf wrapping a source.
108
 *
109
 * Returns: a new #VipsSbuf
110
 */
111
VipsSbuf *
112
vips_sbuf_new_from_source(VipsSource *source)
113
7.41k
{
114
7.41k
  VipsSbuf *sbuf;
115
116
7.41k
  g_assert(source);
117
118
7.41k
  sbuf = VIPS_SBUF(g_object_new(VIPS_TYPE_SBUF,
119
7.41k
    "input", source,
120
7.41k
    NULL));
121
122
7.41k
  if (vips_object_build(VIPS_OBJECT(sbuf))) {
123
0
    VIPS_UNREF(sbuf);
124
0
    return NULL;
125
0
  }
126
127
7.41k
  return sbuf;
128
7.41k
}
129
130
/**
131
 * vips_sbuf_unbuffer:
132
 * @sbuf: source to operate on
133
 *
134
 * Discard the input buffer and reset the read point. You must call this
135
 * before using read or seek on the underlying #VipsSource class.
136
 */
137
void
138
vips_sbuf_unbuffer(VipsSbuf *sbuf)
139
0
{
140
  /* We'd read ahead a little way -- seek backwards by that amount.
141
   */
142
0
  vips_source_seek(sbuf->source,
143
0
    sbuf->read_point - sbuf->chars_in_buffer, SEEK_CUR);
144
0
  sbuf->read_point = 0;
145
0
  sbuf->chars_in_buffer = 0;
146
0
}
147
148
/* Returns -1 on error, 0 on EOF, otherwise bytes read.
149
 */
150
static gint64
151
vips_sbuf_refill(VipsSbuf *sbuf)
152
11.3k
{
153
11.3k
  gint64 bytes_read;
154
155
11.3k
  VIPS_DEBUG_MSG("vips_sbuf_refill:\n");
156
157
  /* We should not discard any unread bytes.
158
   */
159
11.3k
  g_assert(sbuf->read_point == sbuf->chars_in_buffer);
160
161
11.3k
  bytes_read = vips_source_read(sbuf->source,
162
11.3k
    sbuf->input_buffer, VIPS_SBUF_BUFFER_SIZE);
163
11.3k
  if (bytes_read == -1)
164
0
    return -1;
165
166
11.3k
  sbuf->read_point = 0;
167
11.3k
  sbuf->chars_in_buffer = bytes_read;
168
169
  /* Always add a null byte so we can use strchr() etc. on lines. This is
170
   * safe because input_buffer is VIPS_SBUF_BUFFER_SIZE + 1 bytes.
171
   */
172
11.3k
  sbuf->input_buffer[bytes_read] = '\0';
173
174
11.3k
  return bytes_read;
175
11.3k
}
176
177
/**
178
 * vips_sbuf_getc:
179
 * @sbuf: source to operate on
180
 *
181
 * Fetch the next character from the source.
182
 *
183
 * If you can, use the macro VIPS_SBUF_GETC() instead for speed.
184
 *
185
 * Returns: the next char from @sbuf, -1 on read error or EOF.
186
 */
187
int
188
vips_sbuf_getc(VipsSbuf *sbuf)
189
11.3k
{
190
11.3k
  if (sbuf->read_point == sbuf->chars_in_buffer &&
191
11.3k
    vips_sbuf_refill(sbuf) <= 0)
192
2.40k
    return -1;
193
194
11.3k
  g_assert(sbuf->read_point < sbuf->chars_in_buffer);
195
196
8.95k
  return sbuf->input_buffer[sbuf->read_point++];
197
11.3k
}
198
199
/**
200
 * VIPS_SBUF_GETC:
201
 * @sbuf: source to operate on
202
 *
203
 * Fetch the next character from the source.
204
 *
205
 * Returns: the next char from @sbuf, -1 on read error or EOF.
206
 */
207
208
/**
209
 * vips_sbuf_ungetc:
210
 * @sbuf: source to operate on
211
 *
212
 * The opposite of vips_sbuf_getc(): undo the previous getc.
213
 *
214
 * unget more than one character is undefined. Unget at the start of the file
215
 * does nothing.
216
 *
217
 * If you can, use the macro VIPS_SBUF_UNGETC() instead for speed.
218
 */
219
void
220
vips_sbuf_ungetc(VipsSbuf *sbuf)
221
0
{
222
0
  if (sbuf->read_point > 0)
223
0
    sbuf->read_point -= 1;
224
0
}
225
226
/**
227
 * VIPS_SBUF_UNGETC:
228
 * @sbuf: source to operate on
229
 *
230
 * The opposite of vips_sbuf_getc(): undo the previous getc.
231
 *
232
 * unget more than one character is undefined. Unget at the start of the file
233
 * does nothing.
234
 */
235
236
/**
237
 * vips_sbuf_require:
238
 * @sbuf: source to operate on
239
 * @require: make sure we have at least this many chars available
240
 *
241
 * Make sure there are at least @require bytes of readahead available.
242
 *
243
 * Returns: 0 on success, -1 on error or EOF.
244
 */
245
int
246
vips_sbuf_require(VipsSbuf *sbuf, int require)
247
218
{
248
218
  g_assert(require < VIPS_SBUF_BUFFER_SIZE);
249
218
  g_assert(sbuf->chars_in_buffer >= 0);
250
218
  g_assert(sbuf->chars_in_buffer <= VIPS_SBUF_BUFFER_SIZE);
251
218
  g_assert(sbuf->read_point >= 0);
252
218
  g_assert(sbuf->read_point <= sbuf->chars_in_buffer);
253
254
218
  VIPS_DEBUG_MSG("vips_sbuf_require: %d\n", require);
255
256
218
  if (sbuf->read_point + require > sbuf->chars_in_buffer) {
257
    /* Areas can overlap, so we must memmove().
258
     */
259
218
    memmove(sbuf->input_buffer,
260
218
      sbuf->input_buffer + sbuf->read_point,
261
218
      sbuf->chars_in_buffer - sbuf->read_point);
262
218
    sbuf->chars_in_buffer -= sbuf->read_point;
263
218
    sbuf->read_point = 0;
264
265
263
    while (require > sbuf->chars_in_buffer) {
266
220
      unsigned char *to = sbuf->input_buffer +
267
220
        sbuf->chars_in_buffer;
268
220
      int space_available =
269
220
        VIPS_SBUF_BUFFER_SIZE -
270
220
        sbuf->chars_in_buffer;
271
220
      gint64 bytes_read;
272
273
220
      if ((bytes_read = vips_source_read(sbuf->source,
274
220
           to, space_available)) < 0)
275
0
        return -1;
276
220
      if (bytes_read == 0) {
277
175
        vips_error(
278
175
          vips_connection_nick(VIPS_CONNECTION(
279
175
            sbuf->source)),
280
175
          "%s", _("end of file"));
281
175
        return -1;
282
175
      }
283
284
45
      to[bytes_read] = '\0';
285
45
      sbuf->chars_in_buffer += bytes_read;
286
45
    }
287
218
  }
288
289
43
  return 0;
290
218
}
291
292
/**
293
 * VIPS_SBUF_REQUIRE:
294
 * @sbuf: source to operate on
295
 * @require: need this many characters
296
 *
297
 * Make sure at least @require characters are available for
298
 * VIPS_SBUF_PEEK() and VIPS_SBUF_FETCH().
299
 *
300
 * Returns: 0 on success, -1 on read error or EOF.
301
 */
302
303
/**
304
 * VIPS_SBUF_PEEK:
305
 * @sbuf: source to operate on
306
 *
307
 * After a successful VIPS_SBUF_REQUIRE(), you can index this to get
308
 * require characters of input.
309
 *
310
 * Returns: a pointer to the next require characters of input.
311
 */
312
313
/**
314
 * VIPS_SBUF_FETCH:
315
 * @sbuf: source to operate on
316
 *
317
 * After a successful VIPS_SBUF_REQUIRE(), you can use this require times
318
 * to fetch characters of input.
319
 *
320
 * Returns: the next input character.
321
 */
322
323
/**
324
 * vips_sbuf_get_line:
325
 * @sbuf: source to operate on
326
 *
327
 * Fetch the next line of text from @sbuf and return it. The end of
328
 * line character (or characters, for DOS files) are removed, and the string
329
 * is terminated with a null (`\0` character).
330
 *
331
 * Returns NULL on end of file or read error.
332
 *
333
 * If the line is longer than some arbitrary (but large) limit, it is
334
 * truncated. If you need to be able to read very long lines, use the
335
 * slower vips_sbuf_get_line_copy().
336
 *
337
 * The return value is owned by @sbuf and must not be freed. It
338
 * is valid until the next get call to @sbuf.
339
 *
340
 * Returns: the next line of text, or NULL on EOF or read error.
341
 */
342
const char *
343
vips_sbuf_get_line(VipsSbuf *sbuf)
344
19.1k
{
345
19.1k
  int write_point;
346
19.1k
  int space_remaining;
347
19.1k
  int ch;
348
349
19.1k
  VIPS_DEBUG_MSG("vips_sbuf_get_line:\n");
350
351
19.1k
  write_point = 0;
352
19.1k
  space_remaining = VIPS_SBUF_BUFFER_SIZE;
353
354
2.67M
  while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 &&
355
2.67M
    ch != '\n' &&
356
2.67M
    space_remaining > 0) {
357
2.65M
    sbuf->line[write_point] = ch;
358
2.65M
    write_point += 1;
359
2.65M
    space_remaining -= 1;
360
2.65M
  }
361
19.1k
  sbuf->line[write_point] = '\0';
362
363
  /* If we hit EOF immediately, return EOF.
364
   */
365
19.1k
  if (ch == -1 &&
366
19.1k
    write_point == 0)
367
270
    return NULL;
368
369
  /* If the final char in the buffer is \r, this is probably a DOS file
370
   * and we should remove that too.
371
   *
372
   * There's a chance this could incorrectly remove \r in very long
373
   * lines, but ignore this.
374
   */
375
18.8k
  if (write_point > 0 &&
376
18.8k
    sbuf->line[write_point - 1] == '\r')
377
750
    sbuf->line[write_point - 1] = '\0';
378
  /* If we filled the output line without seeing \n, keep going to the
379
   * next \n.
380
   */
381
18.8k
  if (ch != '\n' &&
382
18.8k
    space_remaining == 0) {
383
4.28M
    while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 &&
384
4.28M
      ch != '\n')
385
4.28M
      ;
386
187
  }
387
388
18.8k
  VIPS_DEBUG_MSG("    %s\n", sbuf->line);
389
390
18.8k
  return (const char *) sbuf->line;
391
19.1k
}
392
393
/**
394
 * vips_sbuf_get_line_copy:
395
 * @sbuf: source to operate on
396
 *
397
 * Fetch the next line of text from @sbuf and return it. The end of
398
 * line character (or characters, for DOS files) are removed, and the string
399
 * is terminated with a null (`\0` character).
400
 *
401
 * The return result must be freed with g_free().
402
 *
403
 * This is slower than vips_sbuf_get_line(), but can work with lines of
404
 * any length.
405
 *
406
 * Returns: the next line of text, or NULL on EOF or read error.
407
 */
408
char *
409
vips_sbuf_get_line_copy(VipsSbuf *sbuf)
410
0
{
411
0
  static const unsigned char null = '\0';
412
413
0
  VIPS_DEBUG_MSG("vips_sbuf_get_line_copy:\n");
414
415
0
  GByteArray *buffer;
416
0
  int ch;
417
0
  char *result;
418
419
0
  buffer = g_byte_array_new();
420
421
0
  while ((ch = VIPS_SBUF_GETC(sbuf)) != -1 &&
422
0
    ch != '\n') {
423
0
    unsigned char c = ch;
424
425
0
    g_byte_array_append(buffer, &c, 1);
426
0
  }
427
428
  /* Immediate EOF.
429
   */
430
0
  if (ch == -1 &&
431
0
    buffer->len == 0) {
432
0
    VIPS_FREEF(g_byte_array_unref, buffer);
433
0
    return NULL;
434
0
  }
435
436
  /* If the character before the \n was \r, this is probably a DOS file
437
   * and we should remove the \r.
438
   */
439
0
  if (ch == '\n' &&
440
0
    buffer->len > 0 &&
441
0
    buffer->data[buffer->len - 1] == '\r')
442
0
    g_byte_array_set_size(buffer, buffer->len - 1);
443
444
0
  g_byte_array_append(buffer, &null, 1);
445
446
0
  result = (char *) g_byte_array_free(buffer, FALSE);
447
448
0
  VIPS_DEBUG_MSG("    %s\n", result);
449
450
0
  return result;
451
0
}
452
453
/**
454
 * vips_sbuf_get_non_whitespace:
455
 * @sbuf: source to operate on
456
 *
457
 * Fetch the next chunk of non-whitespace text from the source, and
458
 * null-terminate it.
459
 *
460
 * After this, the next getc will be the first char of the next block of
461
 * whitespace (or EOF).
462
 *
463
 * If the first getc is whitespace, stop instantly and return the empty
464
 * string.
465
 *
466
 * If the item is longer than some arbitrary (but large) limit, it is
467
 * truncated.
468
 *
469
 * The return value is owned by @sbuf and must not be freed. It
470
 * is valid until the next get call to @sbuf.
471
 *
472
 * Returns: the next block of non-whitespace, or NULL on EOF or read error.
473
 */
474
const char *
475
vips_sbuf_get_non_whitespace(VipsSbuf *sbuf)
476
0
{
477
0
  int ch;
478
0
  int i;
479
480
0
  for (i = 0; i < VIPS_SBUF_BUFFER_SIZE &&
481
0
     !g_ascii_isspace(ch = VIPS_SBUF_GETC(sbuf)) &&
482
0
     ch != EOF;
483
0
     i++)
484
0
    sbuf->line[i] = ch;
485
0
  sbuf->line[i] = '\0';
486
487
  /* If we stopped before seeing any whitespace, skip to the end of the
488
   * block of non-whitespace.
489
   */
490
0
  if (!g_ascii_isspace(ch))
491
0
    while (!g_ascii_isspace(ch = VIPS_SBUF_GETC(sbuf)) &&
492
0
      ch != EOF)
493
0
      ;
494
495
  /* If we finally stopped on whitespace, step back one so the next get
496
   * will be whitespace (or EOF).
497
   */
498
0
  if (g_ascii_isspace(ch))
499
0
    VIPS_SBUF_UNGETC(sbuf);
500
501
0
  return (const char *) sbuf->line;
502
0
}
503
504
/**
505
 * vips_sbuf_skip_whitespace:
506
 * @sbuf: source to operate on
507
 *
508
 * After this, the next getc will be the first char of the next block of
509
 * non-whitespace (or EOF).
510
 *
511
 * Also skip comments, ie. from any '#' character to the end of the line.
512
 *
513
 * Returns: 0 on success, or -1 on EOF.
514
 */
515
int
516
vips_sbuf_skip_whitespace(VipsSbuf *sbuf)
517
0
{
518
0
  int ch;
519
520
0
  do {
521
0
    ch = VIPS_SBUF_GETC(sbuf);
522
523
    /* # skip comments too.
524
     */
525
0
    while (ch == '#') {
526
      /* Probably EOF.
527
       */
528
0
      if (!vips_sbuf_get_line(sbuf))
529
0
        return -1;
530
0
      ch = VIPS_SBUF_GETC(sbuf);
531
0
    }
532
0
  } while (g_ascii_isspace(ch));
533
534
0
  VIPS_SBUF_UNGETC(sbuf);
535
536
0
  return 0;
537
0
}