Coverage Report

Created: 2025-12-03 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/fitz/stream-read.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
27
28
#define MIN_BOMB (100 << 20)
28
29
size_t
30
fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len)
31
14.0k
{
32
14.0k
  size_t count, n;
33
34
14.0k
  count = 0;
35
14.0k
  do
36
24.1k
  {
37
24.1k
    n = fz_available(ctx, stm, len);
38
24.1k
    if (n > len)
39
13.9k
      n = len;
40
24.1k
    if (n == 0)
41
34
      break;
42
43
24.0k
    memcpy(buf, stm->rp, n);
44
24.0k
    stm->rp += n;
45
24.0k
    buf += n;
46
24.0k
    count += n;
47
24.0k
    len -= n;
48
24.0k
  }
49
24.0k
  while (len > 0);
50
51
14.0k
  return count;
52
14.0k
}
53
54
static unsigned char skip_buf[4096];
55
56
size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len)
57
0
{
58
0
  size_t count, l, total = 0;
59
60
0
  while (len)
61
0
  {
62
0
    l = len;
63
0
    if (l > sizeof(skip_buf))
64
0
      l = sizeof(skip_buf);
65
0
    count = fz_read(ctx, stm, skip_buf, l);
66
0
    total += count;
67
0
    if (count < l)
68
0
      break;
69
0
    len -= count;
70
0
  }
71
0
  return total;
72
0
}
73
74
fz_buffer *
75
fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial)
76
13
{
77
13
  return fz_read_best(ctx, stm, initial, NULL, 0);
78
13
}
79
80
fz_buffer *
81
fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case)
82
15
{
83
15
  fz_buffer *buf = NULL;
84
15
  int check_bomb = (initial > 0);
85
15
  size_t n;
86
87
15
  fz_var(buf);
88
89
15
  if (truncated)
90
0
    *truncated = 0;
91
92
15
  if (worst_case == 0)
93
15
    worst_case = initial * 200;
94
15
  if (worst_case < MIN_BOMB)
95
13
    worst_case = MIN_BOMB;
96
97
30
  fz_try(ctx)
98
30
  {
99
15
    if (initial < 1024)
100
12
      initial = 1024;
101
102
15
    buf = fz_new_buffer(ctx, initial+1);
103
104
257
    while (1)
105
257
    {
106
257
      if (buf->len == buf->cap)
107
227
        fz_grow_buffer(ctx, buf);
108
109
257
      if (check_bomb && buf->len > worst_case)
110
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected");
111
112
257
      n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len);
113
257
      if (n == 0)
114
15
        break;
115
116
242
      buf->len += n;
117
242
    }
118
15
  }
119
30
  fz_catch(ctx)
120
0
  {
121
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
122
0
    {
123
0
      fz_drop_buffer(ctx, buf);
124
0
      fz_rethrow(ctx);
125
0
    }
126
0
    if (truncated)
127
0
    {
128
0
      *truncated = 1;
129
0
      fz_report_error(ctx);
130
0
    }
131
0
    else
132
0
    {
133
0
      fz_drop_buffer(ctx, buf);
134
0
      fz_rethrow(ctx);
135
0
    }
136
0
  }
137
138
15
  return buf;
139
15
}
140
141
char *
142
fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n)
143
0
{
144
0
  char *s = mem;
145
0
  int c = EOF;
146
0
  while (n > 1)
147
0
  {
148
0
    c = fz_read_byte(ctx, stm);
149
0
    if (c == EOF)
150
0
      break;
151
0
    if (c == '\r') {
152
0
      c = fz_peek_byte(ctx, stm);
153
0
      if (c == '\n')
154
0
        fz_read_byte(ctx, stm);
155
0
      break;
156
0
    }
157
0
    if (c == '\n')
158
0
      break;
159
0
    *s++ = c;
160
0
    n--;
161
0
  }
162
0
  if (n)
163
0
    *s = '\0';
164
0
  return (s == mem && c == EOF) ? NULL : mem;
165
0
}
166
167
int64_t
168
fz_tell(fz_context *ctx, fz_stream *stm)
169
324k
{
170
324k
  return stm->pos - (stm->wp - stm->rp);
171
324k
}
172
173
void
174
fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
175
15.8k
{
176
15.8k
  stm->avail = 0; /* Reset bit reading */
177
15.8k
  if (stm->seek)
178
15.8k
  {
179
15.8k
    if (whence == 1)
180
0
    {
181
0
      offset += fz_tell(ctx, stm);
182
0
      whence = 0;
183
0
    }
184
15.8k
    stm->seek(ctx, stm, offset, whence);
185
15.8k
    stm->eof = 0;
186
15.8k
  }
187
0
  else if (whence != 2)
188
0
  {
189
0
    if (whence == 0)
190
0
      offset -= fz_tell(ctx, stm);
191
0
    if (offset < 0)
192
0
      fz_warn(ctx, "cannot seek backwards");
193
    /* dog slow, but rare enough */
194
0
    while (offset-- > 0)
195
0
    {
196
0
      if (fz_read_byte(ctx, stm) == EOF)
197
0
      {
198
0
        fz_warn(ctx, "seek failed");
199
0
        break;
200
0
      }
201
0
    }
202
0
  }
203
0
  else
204
0
    fz_warn(ctx, "cannot seek");
205
15.8k
}
206
207
fz_buffer *
208
fz_read_file(fz_context *ctx, const char *filename)
209
0
{
210
0
  fz_stream *stm;
211
0
  fz_buffer *buf = NULL;
212
213
0
  fz_var(buf);
214
215
0
  stm = fz_open_file(ctx, filename);
216
0
  fz_try(ctx)
217
0
  {
218
0
    buf = fz_read_all(ctx, stm, 0);
219
0
  }
220
0
  fz_always(ctx)
221
0
  {
222
0
    fz_drop_stream(ctx, stm);
223
0
  }
224
0
  fz_catch(ctx)
225
0
  {
226
0
    fz_rethrow(ctx);
227
0
  }
228
229
0
  return buf;
230
0
}
231
232
fz_buffer *
233
fz_try_read_file(fz_context *ctx, const char *filename)
234
0
{
235
0
  fz_stream *stm;
236
0
  fz_buffer *buf = NULL;
237
238
0
  fz_var(buf);
239
240
0
  stm = fz_try_open_file(ctx, filename);
241
0
  if (stm == NULL)
242
0
    return NULL;
243
0
  fz_try(ctx)
244
0
  {
245
0
    buf = fz_read_all(ctx, stm, 0);
246
0
  }
247
0
  fz_always(ctx)
248
0
  {
249
0
    fz_drop_stream(ctx, stm);
250
0
  }
251
0
  fz_catch(ctx)
252
0
  {
253
0
    fz_rethrow(ctx);
254
0
  }
255
256
0
  return buf;
257
0
}
258
259
uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm)
260
0
{
261
0
  int a = fz_read_byte(ctx, stm);
262
0
  int b = fz_read_byte(ctx, stm);
263
0
  if (a == EOF || b == EOF)
264
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
265
0
  return ((uint16_t)a<<8) | ((uint16_t)b);
266
0
}
267
268
uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm)
269
0
{
270
0
  int a = fz_read_byte(ctx, stm);
271
0
  int b = fz_read_byte(ctx, stm);
272
0
  int c = fz_read_byte(ctx, stm);
273
0
  if (a == EOF || b == EOF || c == EOF)
274
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
275
0
  return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c);
276
0
}
277
278
uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm)
279
0
{
280
0
  int a = fz_read_byte(ctx, stm);
281
0
  int b = fz_read_byte(ctx, stm);
282
0
  int c = fz_read_byte(ctx, stm);
283
0
  int d = fz_read_byte(ctx, stm);
284
0
  if (a == EOF || b == EOF || c == EOF || d == EOF)
285
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
286
0
  return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d);
287
0
}
288
289
uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm)
290
0
{
291
0
  int a = fz_read_byte(ctx, stm);
292
0
  int b = fz_read_byte(ctx, stm);
293
0
  int c = fz_read_byte(ctx, stm);
294
0
  int d = fz_read_byte(ctx, stm);
295
0
  int e = fz_read_byte(ctx, stm);
296
0
  int f = fz_read_byte(ctx, stm);
297
0
  int g = fz_read_byte(ctx, stm);
298
0
  int h = fz_read_byte(ctx, stm);
299
0
  if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
300
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
301
0
  return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32)
302
0
    | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h);
303
0
}
304
305
uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
306
20
{
307
20
  int a = fz_read_byte(ctx, stm);
308
20
  int b = fz_read_byte(ctx, stm);
309
20
  if (a == EOF || b == EOF)
310
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
311
20
  return ((uint16_t)a) | ((uint16_t)b<<8);
312
20
}
313
314
uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm)
315
0
{
316
0
  int a = fz_read_byte(ctx, stm);
317
0
  int b = fz_read_byte(ctx, stm);
318
0
  int c = fz_read_byte(ctx, stm);
319
0
  if (a == EOF || b == EOF || c == EOF)
320
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
321
0
  return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16);
322
0
}
323
324
uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
325
472
{
326
472
  int a = fz_read_byte(ctx, stm);
327
472
  int b = fz_read_byte(ctx, stm);
328
472
  int c = fz_read_byte(ctx, stm);
329
472
  int d = fz_read_byte(ctx, stm);
330
472
  if (a == EOF || b == EOF || c == EOF || d == EOF)
331
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
332
472
  return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24);
333
472
}
334
335
uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
336
0
{
337
0
  int a = fz_read_byte(ctx, stm);
338
0
  int b = fz_read_byte(ctx, stm);
339
0
  int c = fz_read_byte(ctx, stm);
340
0
  int d = fz_read_byte(ctx, stm);
341
0
  int e = fz_read_byte(ctx, stm);
342
0
  int f = fz_read_byte(ctx, stm);
343
0
  int g = fz_read_byte(ctx, stm);
344
0
  int h = fz_read_byte(ctx, stm);
345
0
  if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
346
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
347
0
  return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24)
348
0
    | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56);
349
0
}
350
351
0
int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); }
352
0
int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); }
353
0
int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); }
354
355
0
int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); }
356
0
int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); }
357
0
int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); }
358
359
float
360
fz_read_float_le(fz_context *ctx, fz_stream *stm)
361
0
{
362
0
  union {float f;int32_t i;} u;
363
364
0
  u.i = fz_read_int32_le(ctx, stm);
365
0
  return u.f;
366
0
}
367
368
float
369
fz_read_float(fz_context *ctx, fz_stream *stm)
370
0
{
371
0
  union {float f;int32_t i;} u;
372
373
0
  u.i = fz_read_int32(ctx, stm);
374
0
  return u.f;
375
0
}
376
377
void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
378
0
{
379
0
  int c;
380
0
  do
381
0
  {
382
0
    if (len <= 0)
383
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string");
384
385
0
    c = fz_read_byte(ctx, stm);
386
0
    if (c == EOF)
387
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string");
388
0
    *buffer++ = c;
389
0
    len--;
390
0
  }
391
0
  while (c != 0);
392
0
}
393
394
int fz_read_rune(fz_context *ctx, fz_stream *in)
395
0
{
396
0
  uint8_t d, e, f;
397
0
  int x;
398
0
  int c = fz_read_byte(ctx, in);
399
0
  if (c == EOF)
400
0
    return EOF;
401
402
0
  if ((c & 0xF8) == 0xF0)
403
0
  {
404
0
    x = fz_read_byte(ctx, in);
405
0
    if (x == EOF)
406
0
      return 0xFFFD;
407
0
    d = (uint8_t)x;
408
0
    c = (c & 7)<<18;
409
0
    if ((d & 0xC0) == 0x80)
410
0
    {
411
0
      x = fz_read_byte(ctx, in);
412
0
      if (x == EOF)
413
0
        return 0xFFFD;
414
0
      e = (uint8_t)x;
415
0
      c += (d & 0x3f)<<12;
416
0
      if ((e & 0xC0) == 0x80)
417
0
      {
418
0
        x = fz_read_byte(ctx, in);
419
0
        if (x == EOF)
420
0
          return 0xFFFD;
421
0
        f = (uint8_t)x;
422
0
        c += (e & 0x3f)<<6;
423
0
        if ((f & 0xC0) == 0x80)
424
0
        {
425
0
          c += f & 0x3f;
426
0
        }
427
0
        else
428
0
          goto bad_byte;
429
0
      }
430
0
      else
431
0
        goto bad_byte;
432
0
    }
433
0
    else
434
0
      goto bad_byte;
435
0
  }
436
0
  else if ((c & 0xF0) == 0xE0)
437
0
  {
438
0
    x = fz_read_byte(ctx, in);
439
0
    if (x == EOF)
440
0
      return 0xFFFD;
441
0
    d = (uint8_t)x;
442
0
    c = (c & 15)<<12;
443
0
    if ((d & 0xC0) == 0x80)
444
0
    {
445
0
      x = fz_read_byte(ctx, in);
446
0
      if (x == EOF)
447
0
        return 0xFFFD;
448
0
      e = (uint8_t)x;
449
0
      c += (d & 0x3f)<<6;
450
0
      if ((e & 0xC0) == 0x80)
451
0
      {
452
0
        c += e & 0x3f;
453
0
      }
454
0
      else
455
0
        goto bad_byte;
456
0
    }
457
0
    else
458
0
      goto bad_byte;
459
0
  }
460
0
  else if ((c & 0xE0) == 0xC0)
461
0
  {
462
0
    x = fz_read_byte(ctx, in);
463
0
    if (x == EOF)
464
0
      return 0xFFFD;
465
0
    d = (uint8_t)x;
466
0
    c = (c & 31)<<6;
467
0
    if ((d & 0xC0) == 0x80)
468
0
    {
469
0
      c += d & 0x3f;
470
0
    }
471
0
    else
472
0
      fz_unread_byte(ctx, in);
473
0
  }
474
0
  else if ((c & 0xc0) == 0x80)
475
0
  {
476
0
bad_byte:
477
0
    fz_unread_byte(ctx, in);
478
0
    return 0xFFFD;
479
0
  }
480
481
0
  return c;
482
483
0
}
484
485
int fz_read_utf16_le(fz_context *ctx, fz_stream *stm)
486
0
{
487
0
  int c = fz_read_byte(ctx, stm);
488
0
  int d, e;
489
490
0
  if (c == EOF)
491
0
    return EOF;
492
493
0
  d = fz_read_byte(ctx, stm);
494
0
  if (d == EOF)
495
0
    return c; /* Might be wrong, but the best we can do. */
496
497
0
  c |= d<<8;
498
499
  /* If it's not a surrogate, we're done. */
500
0
  if (c < 0xd800 || c >= 0xe000)
501
0
    return c;
502
503
  /* It *ought* to be a leading (high) surrogate. If it's not,
504
   * then we're in trouble. */
505
0
  if (c >= 0xdc00)
506
0
    return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
507
508
  /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
509
   * 2 to tell if it was a low surrogate. Just assume it is. */
510
0
  d = fz_read_byte(ctx, stm);
511
0
  if (d == EOF)
512
0
  {
513
    /* Failure! Imagine the trailing surrogate was 0. */
514
0
    return 0x10000 + ((c - 0xd800)<<10);
515
0
  }
516
0
  e = fz_read_byte(ctx, stm);
517
0
  if (e == EOF)
518
0
  {
519
0
    e = 0xDC; /* Fudge a low surrogate */
520
0
  }
521
522
0
  d |= e<<8;
523
524
0
  if (d < 0xdc00 || d >= 0xe000)
525
0
  {
526
    /* Bad encoding! This is nasty, because we've eaten 2 bytes from the
527
     * stream which ideally we would not have. Serves you right for
528
     * having a broken stream. */
529
0
    return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */
530
0
  }
531
532
0
  c -= 0xd800;
533
0
  d -= 0xdc00;
534
535
0
  return 0x10000 + (c<<10) + d;
536
0
}
537
538
int fz_read_utf16_be(fz_context *ctx, fz_stream *stm)
539
0
{
540
0
  int c = fz_read_byte(ctx, stm);
541
0
  int d, e;
542
543
0
  if (c == EOF)
544
0
    return EOF;
545
546
0
  d = fz_read_byte(ctx, stm);
547
0
  if (d == EOF)
548
0
    return c; /* Might be wrong, but the best we can do. */
549
550
0
  c = (c<<8) | d;
551
552
  /* If it's not a surrogate, we're done. */
553
0
  if (c < 0xd800 || c >= 0xe000)
554
0
    return c;
555
556
  /* It *ought* to be a leading (high) surrogate. If it's not,
557
   * then we're in trouble. */
558
0
  if (c >= 0xdc00)
559
0
    return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
560
561
  /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
562
   * 2 to tell if it was a low surrogate. Just assume it is. */
563
0
  d = fz_read_byte(ctx, stm);
564
0
  if (d == EOF)
565
0
  {
566
    /* Failure! Imagine the trailing surrogate was 0. */
567
0
    return 0x10000 + ((c - 0xd800)<<10);
568
0
  }
569
570
  /* The next byte ought to be the start of a trailing (low) surrogate. */
571
0
  if (d < 0xdc || d >= 0xe0)
572
0
  {
573
    /* It wasn't. Put the byte back. */
574
0
    fz_unread_byte(ctx, stm);
575
0
    d = 0xdc00; /* Pretend it was a 0 surrogate. */
576
0
  }
577
0
  else
578
0
  {
579
0
    e = fz_read_byte(ctx, stm);
580
0
    if (e == EOF)
581
0
    {
582
0
      e = 0;
583
0
    }
584
0
    d = (d<<8) | e;
585
0
  }
586
587
0
  c -= 0xd800;
588
0
  d -= 0xdc00;
589
590
0
  return 0x10000 + (c<<10) + d;
591
0
}