Coverage Report

Created: 2026-03-31 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/fitz/stream-read.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
27
5
#define MIN_BOMB (100 << 20)
28
29
size_t
30
fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len)
31
13.5k
{
32
13.5k
  size_t count, n;
33
34
13.5k
  count = 0;
35
13.5k
  do
36
13.7k
  {
37
13.7k
    n = fz_available(ctx, stm, len);
38
13.7k
    if (n > len)
39
13.4k
      n = len;
40
13.7k
    if (n == 0)
41
10
      break;
42
43
13.7k
    memcpy(buf, stm->rp, n);
44
13.7k
    stm->rp += n;
45
13.7k
    buf += n;
46
13.7k
    count += n;
47
13.7k
    len -= n;
48
13.7k
  }
49
13.7k
  while (len > 0);
50
51
13.5k
  return count;
52
13.5k
}
53
54
static unsigned char skip_buf[4096];
55
56
size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len)
57
0
{
58
0
  size_t count, l, total = 0;
59
60
0
  while (len)
61
0
  {
62
0
    l = len;
63
0
    if (l > sizeof(skip_buf))
64
0
      l = sizeof(skip_buf);
65
0
    count = fz_read(ctx, stm, skip_buf, l);
66
0
    total += count;
67
0
    if (count < l)
68
0
      break;
69
0
    len -= count;
70
0
  }
71
0
  return total;
72
0
}
73
74
fz_buffer *
75
fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial)
76
2
{
77
2
  return fz_read_best(ctx, stm, initial, NULL, 0);
78
2
}
79
80
fz_buffer *
81
fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case)
82
3
{
83
3
  fz_buffer *buf = NULL;
84
3
  int check_bomb = (initial > 0);
85
3
  size_t n;
86
87
3
  fz_var(buf);
88
89
3
  if (truncated)
90
0
    *truncated = 0;
91
92
3
  if (worst_case == 0)
93
3
    worst_case = initial * 200;
94
3
  if (worst_case < MIN_BOMB)
95
2
    worst_case = MIN_BOMB;
96
97
6
  fz_try(ctx)
98
6
  {
99
3
    if (initial < 1024)
100
2
      initial = 1024;
101
102
3
    buf = fz_new_buffer(ctx, initial+1);
103
104
42
    while (1)
105
42
    {
106
42
      if (buf->len == buf->cap)
107
36
        fz_grow_buffer(ctx, buf);
108
109
42
      if (check_bomb && buf->len > worst_case)
110
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected");
111
112
42
      n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len);
113
42
      if (n == 0)
114
3
        break;
115
116
39
      buf->len += n;
117
39
    }
118
3
  }
119
6
  fz_catch(ctx)
120
0
  {
121
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
122
0
    {
123
0
      fz_drop_buffer(ctx, buf);
124
0
      fz_rethrow(ctx);
125
0
    }
126
0
    if (truncated)
127
0
    {
128
0
      *truncated = 1;
129
0
      fz_report_error(ctx);
130
0
    }
131
0
    else
132
0
    {
133
0
      fz_drop_buffer(ctx, buf);
134
0
      fz_rethrow(ctx);
135
0
    }
136
0
  }
137
138
3
  return buf;
139
3
}
140
141
char *
142
fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n)
143
0
{
144
0
  char *s = mem;
145
0
  int c = EOF;
146
0
  while (n > 1)
147
0
  {
148
0
    c = fz_read_byte(ctx, stm);
149
0
    if (c == EOF)
150
0
      break;
151
0
    if (c == '\r') {
152
0
      c = fz_peek_byte(ctx, stm);
153
0
      if (c == '\n')
154
0
        fz_read_byte(ctx, stm);
155
0
      break;
156
0
    }
157
0
    if (c == '\n')
158
0
      break;
159
0
    *s++ = c;
160
0
    n--;
161
0
  }
162
0
  if (n)
163
0
    *s = '\0';
164
0
  return (s == mem && c == EOF) ? NULL : mem;
165
0
}
166
167
int64_t
168
fz_tell(fz_context *ctx, fz_stream *stm)
169
290k
{
170
290k
  return stm->pos - (stm->wp - stm->rp);
171
290k
}
172
173
void
174
fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
175
14.7k
{
176
14.7k
  stm->avail = 0; /* Reset bit reading */
177
14.7k
  if (stm->seek)
178
14.7k
  {
179
14.7k
    if (whence == 1)
180
0
    {
181
0
      offset += fz_tell(ctx, stm);
182
0
      whence = 0;
183
0
    }
184
14.7k
    stm->seek(ctx, stm, offset, whence);
185
14.7k
    stm->eof = 0;
186
14.7k
  }
187
0
  else if (whence != 2)
188
0
  {
189
0
    if (whence == 0)
190
0
      offset -= fz_tell(ctx, stm);
191
0
    if (offset < 0)
192
0
      fz_warn(ctx, "cannot seek backwards");
193
    /* dog slow, but rare enough */
194
0
    while (offset-- > 0)
195
0
    {
196
0
      if (fz_read_byte(ctx, stm) == EOF)
197
0
      {
198
0
        fz_warn(ctx, "seek failed");
199
0
        break;
200
0
      }
201
0
    }
202
0
  }
203
0
  else
204
0
    fz_warn(ctx, "cannot seek");
205
14.7k
}
206
207
fz_buffer *
208
fz_read_file(fz_context *ctx, const char *filename)
209
0
{
210
0
  fz_stream *stm;
211
0
  fz_buffer *buf = NULL;
212
213
0
  fz_var(buf);
214
215
0
  stm = fz_open_file(ctx, filename);
216
0
  fz_try(ctx)
217
0
  {
218
0
    buf = fz_read_all(ctx, stm, 0);
219
0
  }
220
0
  fz_always(ctx)
221
0
  {
222
0
    fz_drop_stream(ctx, stm);
223
0
  }
224
0
  fz_catch(ctx)
225
0
  {
226
0
    fz_rethrow(ctx);
227
0
  }
228
229
0
  return buf;
230
0
}
231
232
fz_buffer *
233
fz_try_read_file(fz_context *ctx, const char *filename)
234
0
{
235
0
  fz_stream *stm;
236
0
  fz_buffer *buf = NULL;
237
238
0
  fz_var(buf);
239
240
0
  stm = fz_try_open_file(ctx, filename);
241
0
  if (stm == NULL)
242
0
    return NULL;
243
0
  fz_try(ctx)
244
0
  {
245
0
    buf = fz_read_all(ctx, stm, 0);
246
0
  }
247
0
  fz_always(ctx)
248
0
  {
249
0
    fz_drop_stream(ctx, stm);
250
0
  }
251
0
  fz_catch(ctx)
252
0
  {
253
0
    fz_rethrow(ctx);
254
0
  }
255
256
0
  return buf;
257
0
}
258
259
uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm)
260
0
{
261
0
  int a = fz_read_byte(ctx, stm);
262
0
  int b = fz_read_byte(ctx, stm);
263
0
  if (a == EOF || b == EOF)
264
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
265
0
  return ((uint16_t)a<<8) | ((uint16_t)b);
266
0
}
267
268
uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm)
269
0
{
270
0
  int a = fz_read_byte(ctx, stm);
271
0
  int b = fz_read_byte(ctx, stm);
272
0
  int c = fz_read_byte(ctx, stm);
273
0
  if (a == EOF || b == EOF || c == EOF)
274
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
275
0
  return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c);
276
0
}
277
278
uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm)
279
0
{
280
0
  int a = fz_read_byte(ctx, stm);
281
0
  int b = fz_read_byte(ctx, stm);
282
0
  int c = fz_read_byte(ctx, stm);
283
0
  int d = fz_read_byte(ctx, stm);
284
0
  if (a == EOF || b == EOF || c == EOF || d == EOF)
285
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
286
0
  return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d);
287
0
}
288
289
uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm)
290
0
{
291
0
  int a = fz_read_byte(ctx, stm);
292
0
  int b = fz_read_byte(ctx, stm);
293
0
  int c = fz_read_byte(ctx, stm);
294
0
  int d = fz_read_byte(ctx, stm);
295
0
  int e = fz_read_byte(ctx, stm);
296
0
  int f = fz_read_byte(ctx, stm);
297
0
  int g = fz_read_byte(ctx, stm);
298
0
  int h = fz_read_byte(ctx, stm);
299
0
  if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
300
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
301
0
  return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32)
302
0
    | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h);
303
0
}
304
305
uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
306
20
{
307
20
  int a = fz_read_byte(ctx, stm);
308
20
  int b = fz_read_byte(ctx, stm);
309
20
  if (a == EOF || b == EOF)
310
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
311
20
  return ((uint16_t)a) | ((uint16_t)b<<8);
312
20
}
313
314
uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm)
315
0
{
316
0
  int a = fz_read_byte(ctx, stm);
317
0
  int b = fz_read_byte(ctx, stm);
318
0
  int c = fz_read_byte(ctx, stm);
319
0
  if (a == EOF || b == EOF || c == EOF)
320
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
321
0
  return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16);
322
0
}
323
324
uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
325
472
{
326
472
  int a = fz_read_byte(ctx, stm);
327
472
  int b = fz_read_byte(ctx, stm);
328
472
  int c = fz_read_byte(ctx, stm);
329
472
  int d = fz_read_byte(ctx, stm);
330
472
  if (a == EOF || b == EOF || c == EOF || d == EOF)
331
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
332
472
  return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24);
333
472
}
334
335
uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
336
0
{
337
0
  int a = fz_read_byte(ctx, stm);
338
0
  int b = fz_read_byte(ctx, stm);
339
0
  int c = fz_read_byte(ctx, stm);
340
0
  int d = fz_read_byte(ctx, stm);
341
0
  int e = fz_read_byte(ctx, stm);
342
0
  int f = fz_read_byte(ctx, stm);
343
0
  int g = fz_read_byte(ctx, stm);
344
0
  int h = fz_read_byte(ctx, stm);
345
0
  if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
346
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
347
0
  return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24)
348
0
    | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56);
349
0
}
350
351
0
int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); }
352
0
int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); }
353
0
int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); }
354
355
0
int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); }
356
0
int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); }
357
0
int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); }
358
359
float
360
fz_read_float_le(fz_context *ctx, fz_stream *stm)
361
0
{
362
0
  uint32_t u;
363
0
  float x;
364
0
  u = fz_read_uint32_le(ctx, stm);
365
0
  memcpy(&x, &u, sizeof x);
366
0
  return x;
367
0
}
368
369
float
370
fz_read_float(fz_context *ctx, fz_stream *stm)
371
0
{
372
0
  uint32_t u;
373
0
  float x;
374
0
  u = fz_read_uint32(ctx, stm);
375
0
  memcpy(&x, &u, sizeof x);
376
0
  return x;
377
0
}
378
379
void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
380
0
{
381
0
  int c;
382
0
  do
383
0
  {
384
0
    if (len <= 0)
385
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string");
386
387
0
    c = fz_read_byte(ctx, stm);
388
0
    if (c == EOF)
389
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string");
390
0
    *buffer++ = c;
391
0
    len--;
392
0
  }
393
0
  while (c != 0);
394
0
}
395
396
int fz_read_rune(fz_context *ctx, fz_stream *in)
397
0
{
398
0
  uint8_t d, e, f;
399
0
  int x;
400
0
  int c = fz_read_byte(ctx, in);
401
0
  if (c == EOF)
402
0
    return EOF;
403
404
0
  if ((c & 0xF8) == 0xF0)
405
0
  {
406
0
    x = fz_read_byte(ctx, in);
407
0
    if (x == EOF)
408
0
      return 0xFFFD;
409
0
    d = (uint8_t)x;
410
0
    c = (c & 7)<<18;
411
0
    if ((d & 0xC0) == 0x80)
412
0
    {
413
0
      x = fz_read_byte(ctx, in);
414
0
      if (x == EOF)
415
0
        return 0xFFFD;
416
0
      e = (uint8_t)x;
417
0
      c += (d & 0x3f)<<12;
418
0
      if ((e & 0xC0) == 0x80)
419
0
      {
420
0
        x = fz_read_byte(ctx, in);
421
0
        if (x == EOF)
422
0
          return 0xFFFD;
423
0
        f = (uint8_t)x;
424
0
        c += (e & 0x3f)<<6;
425
0
        if ((f & 0xC0) == 0x80)
426
0
        {
427
0
          c += f & 0x3f;
428
0
        }
429
0
        else
430
0
          goto bad_byte;
431
0
      }
432
0
      else
433
0
        goto bad_byte;
434
0
    }
435
0
    else
436
0
      goto bad_byte;
437
0
  }
438
0
  else if ((c & 0xF0) == 0xE0)
439
0
  {
440
0
    x = fz_read_byte(ctx, in);
441
0
    if (x == EOF)
442
0
      return 0xFFFD;
443
0
    d = (uint8_t)x;
444
0
    c = (c & 15)<<12;
445
0
    if ((d & 0xC0) == 0x80)
446
0
    {
447
0
      x = fz_read_byte(ctx, in);
448
0
      if (x == EOF)
449
0
        return 0xFFFD;
450
0
      e = (uint8_t)x;
451
0
      c += (d & 0x3f)<<6;
452
0
      if ((e & 0xC0) == 0x80)
453
0
      {
454
0
        c += e & 0x3f;
455
0
      }
456
0
      else
457
0
        goto bad_byte;
458
0
    }
459
0
    else
460
0
      goto bad_byte;
461
0
  }
462
0
  else if ((c & 0xE0) == 0xC0)
463
0
  {
464
0
    x = fz_read_byte(ctx, in);
465
0
    if (x == EOF)
466
0
      return 0xFFFD;
467
0
    d = (uint8_t)x;
468
0
    c = (c & 31)<<6;
469
0
    if ((d & 0xC0) == 0x80)
470
0
    {
471
0
      c += d & 0x3f;
472
0
    }
473
0
    else
474
0
      fz_unread_byte(ctx, in);
475
0
  }
476
0
  else if ((c & 0xc0) == 0x80)
477
0
  {
478
0
bad_byte:
479
0
    fz_unread_byte(ctx, in);
480
0
    return 0xFFFD;
481
0
  }
482
483
0
  return c;
484
485
0
}
486
487
int fz_read_utf16_le(fz_context *ctx, fz_stream *stm)
488
0
{
489
0
  int c = fz_read_byte(ctx, stm);
490
0
  int d, e;
491
492
0
  if (c == EOF)
493
0
    return EOF;
494
495
0
  d = fz_read_byte(ctx, stm);
496
0
  if (d == EOF)
497
0
    return c; /* Might be wrong, but the best we can do. */
498
499
0
  c |= d<<8;
500
501
  /* If it's not a surrogate, we're done. */
502
0
  if (c < 0xd800 || c >= 0xe000)
503
0
    return c;
504
505
  /* It *ought* to be a leading (high) surrogate. If it's not,
506
   * then we're in trouble. */
507
0
  if (c >= 0xdc00)
508
0
    return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
509
510
  /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
511
   * 2 to tell if it was a low surrogate. Just assume it is. */
512
0
  d = fz_read_byte(ctx, stm);
513
0
  if (d == EOF)
514
0
  {
515
    /* Failure! Imagine the trailing surrogate was 0. */
516
0
    return 0x10000 + ((c - 0xd800)<<10);
517
0
  }
518
0
  e = fz_read_byte(ctx, stm);
519
0
  if (e == EOF)
520
0
  {
521
0
    e = 0xDC; /* Fudge a low surrogate */
522
0
  }
523
524
0
  d |= e<<8;
525
526
0
  if (d < 0xdc00 || d >= 0xe000)
527
0
  {
528
    /* Bad encoding! This is nasty, because we've eaten 2 bytes from the
529
     * stream which ideally we would not have. Serves you right for
530
     * having a broken stream. */
531
0
    return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */
532
0
  }
533
534
0
  c -= 0xd800;
535
0
  d -= 0xdc00;
536
537
0
  return 0x10000 + (c<<10) + d;
538
0
}
539
540
int fz_read_utf16_be(fz_context *ctx, fz_stream *stm)
541
0
{
542
0
  int c = fz_read_byte(ctx, stm);
543
0
  int d, e;
544
545
0
  if (c == EOF)
546
0
    return EOF;
547
548
0
  d = fz_read_byte(ctx, stm);
549
0
  if (d == EOF)
550
0
    return c; /* Might be wrong, but the best we can do. */
551
552
0
  c = (c<<8) | d;
553
554
  /* If it's not a surrogate, we're done. */
555
0
  if (c < 0xd800 || c >= 0xe000)
556
0
    return c;
557
558
  /* It *ought* to be a leading (high) surrogate. If it's not,
559
   * then we're in trouble. */
560
0
  if (c >= 0xdc00)
561
0
    return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
562
563
  /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
564
   * 2 to tell if it was a low surrogate. Just assume it is. */
565
0
  d = fz_read_byte(ctx, stm);
566
0
  if (d == EOF)
567
0
  {
568
    /* Failure! Imagine the trailing surrogate was 0. */
569
0
    return 0x10000 + ((c - 0xd800)<<10);
570
0
  }
571
572
  /* The next byte ought to be the start of a trailing (low) surrogate. */
573
0
  if (d < 0xdc || d >= 0xe0)
574
0
  {
575
    /* It wasn't. Put the byte back. */
576
0
    fz_unread_byte(ctx, stm);
577
0
    d = 0xdc00; /* Pretend it was a 0 surrogate. */
578
0
  }
579
0
  else
580
0
  {
581
0
    e = fz_read_byte(ctx, stm);
582
0
    if (e == EOF)
583
0
    {
584
0
      e = 0;
585
0
    }
586
0
    d = (d<<8) | e;
587
0
  }
588
589
0
  c -= 0xd800;
590
0
  d -= 0xdc00;
591
592
0
  return 0x10000 + (c<<10) + d;
593
0
}