/src/mupdf/source/fitz/stream-read.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | |
27 | 28 | #define MIN_BOMB (100 << 20) |
28 | | |
29 | | size_t |
30 | | fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len) |
31 | 14.0k | { |
32 | 14.0k | size_t count, n; |
33 | | |
34 | 14.0k | count = 0; |
35 | 14.0k | do |
36 | 24.1k | { |
37 | 24.1k | n = fz_available(ctx, stm, len); |
38 | 24.1k | if (n > len) |
39 | 13.9k | n = len; |
40 | 24.1k | if (n == 0) |
41 | 34 | break; |
42 | | |
43 | 24.0k | memcpy(buf, stm->rp, n); |
44 | 24.0k | stm->rp += n; |
45 | 24.0k | buf += n; |
46 | 24.0k | count += n; |
47 | 24.0k | len -= n; |
48 | 24.0k | } |
49 | 24.0k | while (len > 0); |
50 | | |
51 | 14.0k | return count; |
52 | 14.0k | } |
53 | | |
54 | | static unsigned char skip_buf[4096]; |
55 | | |
56 | | size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len) |
57 | 0 | { |
58 | 0 | size_t count, l, total = 0; |
59 | |
|
60 | 0 | while (len) |
61 | 0 | { |
62 | 0 | l = len; |
63 | 0 | if (l > sizeof(skip_buf)) |
64 | 0 | l = sizeof(skip_buf); |
65 | 0 | count = fz_read(ctx, stm, skip_buf, l); |
66 | 0 | total += count; |
67 | 0 | if (count < l) |
68 | 0 | break; |
69 | 0 | len -= count; |
70 | 0 | } |
71 | 0 | return total; |
72 | 0 | } |
73 | | |
74 | | fz_buffer * |
75 | | fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial) |
76 | 13 | { |
77 | 13 | return fz_read_best(ctx, stm, initial, NULL, 0); |
78 | 13 | } |
79 | | |
80 | | fz_buffer * |
81 | | fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case) |
82 | 15 | { |
83 | 15 | fz_buffer *buf = NULL; |
84 | 15 | int check_bomb = (initial > 0); |
85 | 15 | size_t n; |
86 | | |
87 | 15 | fz_var(buf); |
88 | | |
89 | 15 | if (truncated) |
90 | 0 | *truncated = 0; |
91 | | |
92 | 15 | if (worst_case == 0) |
93 | 15 | worst_case = initial * 200; |
94 | 15 | if (worst_case < MIN_BOMB) |
95 | 13 | worst_case = MIN_BOMB; |
96 | | |
97 | 30 | fz_try(ctx) |
98 | 30 | { |
99 | 15 | if (initial < 1024) |
100 | 12 | initial = 1024; |
101 | | |
102 | 15 | buf = fz_new_buffer(ctx, initial+1); |
103 | | |
104 | 257 | while (1) |
105 | 257 | { |
106 | 257 | if (buf->len == buf->cap) |
107 | 227 | fz_grow_buffer(ctx, buf); |
108 | | |
109 | 257 | if (check_bomb && buf->len > worst_case) |
110 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected"); |
111 | | |
112 | 257 | n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len); |
113 | 257 | if (n == 0) |
114 | 15 | break; |
115 | | |
116 | 242 | buf->len += n; |
117 | 242 | } |
118 | 15 | } |
119 | 30 | fz_catch(ctx) |
120 | 0 | { |
121 | 0 | if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM) |
122 | 0 | { |
123 | 0 | fz_drop_buffer(ctx, buf); |
124 | 0 | fz_rethrow(ctx); |
125 | 0 | } |
126 | 0 | if (truncated) |
127 | 0 | { |
128 | 0 | *truncated = 1; |
129 | 0 | fz_report_error(ctx); |
130 | 0 | } |
131 | 0 | else |
132 | 0 | { |
133 | 0 | fz_drop_buffer(ctx, buf); |
134 | 0 | fz_rethrow(ctx); |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | 15 | return buf; |
139 | 15 | } |
140 | | |
141 | | char * |
142 | | fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n) |
143 | 0 | { |
144 | 0 | char *s = mem; |
145 | 0 | int c = EOF; |
146 | 0 | while (n > 1) |
147 | 0 | { |
148 | 0 | c = fz_read_byte(ctx, stm); |
149 | 0 | if (c == EOF) |
150 | 0 | break; |
151 | 0 | if (c == '\r') { |
152 | 0 | c = fz_peek_byte(ctx, stm); |
153 | 0 | if (c == '\n') |
154 | 0 | fz_read_byte(ctx, stm); |
155 | 0 | break; |
156 | 0 | } |
157 | 0 | if (c == '\n') |
158 | 0 | break; |
159 | 0 | *s++ = c; |
160 | 0 | n--; |
161 | 0 | } |
162 | 0 | if (n) |
163 | 0 | *s = '\0'; |
164 | 0 | return (s == mem && c == EOF) ? NULL : mem; |
165 | 0 | } |
166 | | |
167 | | int64_t |
168 | | fz_tell(fz_context *ctx, fz_stream *stm) |
169 | 324k | { |
170 | 324k | return stm->pos - (stm->wp - stm->rp); |
171 | 324k | } |
172 | | |
173 | | void |
174 | | fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence) |
175 | 15.8k | { |
176 | 15.8k | stm->avail = 0; /* Reset bit reading */ |
177 | 15.8k | if (stm->seek) |
178 | 15.8k | { |
179 | 15.8k | if (whence == 1) |
180 | 0 | { |
181 | 0 | offset += fz_tell(ctx, stm); |
182 | 0 | whence = 0; |
183 | 0 | } |
184 | 15.8k | stm->seek(ctx, stm, offset, whence); |
185 | 15.8k | stm->eof = 0; |
186 | 15.8k | } |
187 | 0 | else if (whence != 2) |
188 | 0 | { |
189 | 0 | if (whence == 0) |
190 | 0 | offset -= fz_tell(ctx, stm); |
191 | 0 | if (offset < 0) |
192 | 0 | fz_warn(ctx, "cannot seek backwards"); |
193 | | /* dog slow, but rare enough */ |
194 | 0 | while (offset-- > 0) |
195 | 0 | { |
196 | 0 | if (fz_read_byte(ctx, stm) == EOF) |
197 | 0 | { |
198 | 0 | fz_warn(ctx, "seek failed"); |
199 | 0 | break; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | } |
203 | 0 | else |
204 | 0 | fz_warn(ctx, "cannot seek"); |
205 | 15.8k | } |
206 | | |
207 | | fz_buffer * |
208 | | fz_read_file(fz_context *ctx, const char *filename) |
209 | 0 | { |
210 | 0 | fz_stream *stm; |
211 | 0 | fz_buffer *buf = NULL; |
212 | |
|
213 | 0 | fz_var(buf); |
214 | |
|
215 | 0 | stm = fz_open_file(ctx, filename); |
216 | 0 | fz_try(ctx) |
217 | 0 | { |
218 | 0 | buf = fz_read_all(ctx, stm, 0); |
219 | 0 | } |
220 | 0 | fz_always(ctx) |
221 | 0 | { |
222 | 0 | fz_drop_stream(ctx, stm); |
223 | 0 | } |
224 | 0 | fz_catch(ctx) |
225 | 0 | { |
226 | 0 | fz_rethrow(ctx); |
227 | 0 | } |
228 | | |
229 | 0 | return buf; |
230 | 0 | } |
231 | | |
232 | | fz_buffer * |
233 | | fz_try_read_file(fz_context *ctx, const char *filename) |
234 | 0 | { |
235 | 0 | fz_stream *stm; |
236 | 0 | fz_buffer *buf = NULL; |
237 | |
|
238 | 0 | fz_var(buf); |
239 | |
|
240 | 0 | stm = fz_try_open_file(ctx, filename); |
241 | 0 | if (stm == NULL) |
242 | 0 | return NULL; |
243 | 0 | fz_try(ctx) |
244 | 0 | { |
245 | 0 | buf = fz_read_all(ctx, stm, 0); |
246 | 0 | } |
247 | 0 | fz_always(ctx) |
248 | 0 | { |
249 | 0 | fz_drop_stream(ctx, stm); |
250 | 0 | } |
251 | 0 | fz_catch(ctx) |
252 | 0 | { |
253 | 0 | fz_rethrow(ctx); |
254 | 0 | } |
255 | | |
256 | 0 | return buf; |
257 | 0 | } |
258 | | |
259 | | uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm) |
260 | 0 | { |
261 | 0 | int a = fz_read_byte(ctx, stm); |
262 | 0 | int b = fz_read_byte(ctx, stm); |
263 | 0 | if (a == EOF || b == EOF) |
264 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16"); |
265 | 0 | return ((uint16_t)a<<8) | ((uint16_t)b); |
266 | 0 | } |
267 | | |
268 | | uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm) |
269 | 0 | { |
270 | 0 | int a = fz_read_byte(ctx, stm); |
271 | 0 | int b = fz_read_byte(ctx, stm); |
272 | 0 | int c = fz_read_byte(ctx, stm); |
273 | 0 | if (a == EOF || b == EOF || c == EOF) |
274 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24"); |
275 | 0 | return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c); |
276 | 0 | } |
277 | | |
278 | | uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm) |
279 | 0 | { |
280 | 0 | int a = fz_read_byte(ctx, stm); |
281 | 0 | int b = fz_read_byte(ctx, stm); |
282 | 0 | int c = fz_read_byte(ctx, stm); |
283 | 0 | int d = fz_read_byte(ctx, stm); |
284 | 0 | if (a == EOF || b == EOF || c == EOF || d == EOF) |
285 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32"); |
286 | 0 | return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d); |
287 | 0 | } |
288 | | |
289 | | uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm) |
290 | 0 | { |
291 | 0 | int a = fz_read_byte(ctx, stm); |
292 | 0 | int b = fz_read_byte(ctx, stm); |
293 | 0 | int c = fz_read_byte(ctx, stm); |
294 | 0 | int d = fz_read_byte(ctx, stm); |
295 | 0 | int e = fz_read_byte(ctx, stm); |
296 | 0 | int f = fz_read_byte(ctx, stm); |
297 | 0 | int g = fz_read_byte(ctx, stm); |
298 | 0 | int h = fz_read_byte(ctx, stm); |
299 | 0 | if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF) |
300 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64"); |
301 | 0 | return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32) |
302 | 0 | | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h); |
303 | 0 | } |
304 | | |
305 | | uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm) |
306 | 20 | { |
307 | 20 | int a = fz_read_byte(ctx, stm); |
308 | 20 | int b = fz_read_byte(ctx, stm); |
309 | 20 | if (a == EOF || b == EOF) |
310 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16"); |
311 | 20 | return ((uint16_t)a) | ((uint16_t)b<<8); |
312 | 20 | } |
313 | | |
314 | | uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm) |
315 | 0 | { |
316 | 0 | int a = fz_read_byte(ctx, stm); |
317 | 0 | int b = fz_read_byte(ctx, stm); |
318 | 0 | int c = fz_read_byte(ctx, stm); |
319 | 0 | if (a == EOF || b == EOF || c == EOF) |
320 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24"); |
321 | 0 | return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16); |
322 | 0 | } |
323 | | |
324 | | uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm) |
325 | 472 | { |
326 | 472 | int a = fz_read_byte(ctx, stm); |
327 | 472 | int b = fz_read_byte(ctx, stm); |
328 | 472 | int c = fz_read_byte(ctx, stm); |
329 | 472 | int d = fz_read_byte(ctx, stm); |
330 | 472 | if (a == EOF || b == EOF || c == EOF || d == EOF) |
331 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32"); |
332 | 472 | return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24); |
333 | 472 | } |
334 | | |
335 | | uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm) |
336 | 0 | { |
337 | 0 | int a = fz_read_byte(ctx, stm); |
338 | 0 | int b = fz_read_byte(ctx, stm); |
339 | 0 | int c = fz_read_byte(ctx, stm); |
340 | 0 | int d = fz_read_byte(ctx, stm); |
341 | 0 | int e = fz_read_byte(ctx, stm); |
342 | 0 | int f = fz_read_byte(ctx, stm); |
343 | 0 | int g = fz_read_byte(ctx, stm); |
344 | 0 | int h = fz_read_byte(ctx, stm); |
345 | 0 | if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF) |
346 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64"); |
347 | 0 | return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24) |
348 | 0 | | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56); |
349 | 0 | } |
350 | | |
351 | 0 | int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); } |
352 | 0 | int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); } |
353 | 0 | int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); } |
354 | | |
355 | 0 | int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); } |
356 | 0 | int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); } |
357 | 0 | int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); } |
358 | | |
359 | | float |
360 | | fz_read_float_le(fz_context *ctx, fz_stream *stm) |
361 | 0 | { |
362 | 0 | union {float f;int32_t i;} u; |
363 | |
|
364 | 0 | u.i = fz_read_int32_le(ctx, stm); |
365 | 0 | return u.f; |
366 | 0 | } |
367 | | |
368 | | float |
369 | | fz_read_float(fz_context *ctx, fz_stream *stm) |
370 | 0 | { |
371 | 0 | union {float f;int32_t i;} u; |
372 | |
|
373 | 0 | u.i = fz_read_int32(ctx, stm); |
374 | 0 | return u.f; |
375 | 0 | } |
376 | | |
377 | | void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len) |
378 | 0 | { |
379 | 0 | int c; |
380 | 0 | do |
381 | 0 | { |
382 | 0 | if (len <= 0) |
383 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string"); |
384 | | |
385 | 0 | c = fz_read_byte(ctx, stm); |
386 | 0 | if (c == EOF) |
387 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string"); |
388 | 0 | *buffer++ = c; |
389 | 0 | len--; |
390 | 0 | } |
391 | 0 | while (c != 0); |
392 | 0 | } |
393 | | |
394 | | int fz_read_rune(fz_context *ctx, fz_stream *in) |
395 | 0 | { |
396 | 0 | uint8_t d, e, f; |
397 | 0 | int x; |
398 | 0 | int c = fz_read_byte(ctx, in); |
399 | 0 | if (c == EOF) |
400 | 0 | return EOF; |
401 | | |
402 | 0 | if ((c & 0xF8) == 0xF0) |
403 | 0 | { |
404 | 0 | x = fz_read_byte(ctx, in); |
405 | 0 | if (x == EOF) |
406 | 0 | return 0xFFFD; |
407 | 0 | d = (uint8_t)x; |
408 | 0 | c = (c & 7)<<18; |
409 | 0 | if ((d & 0xC0) == 0x80) |
410 | 0 | { |
411 | 0 | x = fz_read_byte(ctx, in); |
412 | 0 | if (x == EOF) |
413 | 0 | return 0xFFFD; |
414 | 0 | e = (uint8_t)x; |
415 | 0 | c += (d & 0x3f)<<12; |
416 | 0 | if ((e & 0xC0) == 0x80) |
417 | 0 | { |
418 | 0 | x = fz_read_byte(ctx, in); |
419 | 0 | if (x == EOF) |
420 | 0 | return 0xFFFD; |
421 | 0 | f = (uint8_t)x; |
422 | 0 | c += (e & 0x3f)<<6; |
423 | 0 | if ((f & 0xC0) == 0x80) |
424 | 0 | { |
425 | 0 | c += f & 0x3f; |
426 | 0 | } |
427 | 0 | else |
428 | 0 | goto bad_byte; |
429 | 0 | } |
430 | 0 | else |
431 | 0 | goto bad_byte; |
432 | 0 | } |
433 | 0 | else |
434 | 0 | goto bad_byte; |
435 | 0 | } |
436 | 0 | else if ((c & 0xF0) == 0xE0) |
437 | 0 | { |
438 | 0 | x = fz_read_byte(ctx, in); |
439 | 0 | if (x == EOF) |
440 | 0 | return 0xFFFD; |
441 | 0 | d = (uint8_t)x; |
442 | 0 | c = (c & 15)<<12; |
443 | 0 | if ((d & 0xC0) == 0x80) |
444 | 0 | { |
445 | 0 | x = fz_read_byte(ctx, in); |
446 | 0 | if (x == EOF) |
447 | 0 | return 0xFFFD; |
448 | 0 | e = (uint8_t)x; |
449 | 0 | c += (d & 0x3f)<<6; |
450 | 0 | if ((e & 0xC0) == 0x80) |
451 | 0 | { |
452 | 0 | c += e & 0x3f; |
453 | 0 | } |
454 | 0 | else |
455 | 0 | goto bad_byte; |
456 | 0 | } |
457 | 0 | else |
458 | 0 | goto bad_byte; |
459 | 0 | } |
460 | 0 | else if ((c & 0xE0) == 0xC0) |
461 | 0 | { |
462 | 0 | x = fz_read_byte(ctx, in); |
463 | 0 | if (x == EOF) |
464 | 0 | return 0xFFFD; |
465 | 0 | d = (uint8_t)x; |
466 | 0 | c = (c & 31)<<6; |
467 | 0 | if ((d & 0xC0) == 0x80) |
468 | 0 | { |
469 | 0 | c += d & 0x3f; |
470 | 0 | } |
471 | 0 | else |
472 | 0 | fz_unread_byte(ctx, in); |
473 | 0 | } |
474 | 0 | else if ((c & 0xc0) == 0x80) |
475 | 0 | { |
476 | 0 | bad_byte: |
477 | 0 | fz_unread_byte(ctx, in); |
478 | 0 | return 0xFFFD; |
479 | 0 | } |
480 | | |
481 | 0 | return c; |
482 | |
|
483 | 0 | } |
484 | | |
485 | | int fz_read_utf16_le(fz_context *ctx, fz_stream *stm) |
486 | 0 | { |
487 | 0 | int c = fz_read_byte(ctx, stm); |
488 | 0 | int d, e; |
489 | |
|
490 | 0 | if (c == EOF) |
491 | 0 | return EOF; |
492 | | |
493 | 0 | d = fz_read_byte(ctx, stm); |
494 | 0 | if (d == EOF) |
495 | 0 | return c; /* Might be wrong, but the best we can do. */ |
496 | | |
497 | 0 | c |= d<<8; |
498 | | |
499 | | /* If it's not a surrogate, we're done. */ |
500 | 0 | if (c < 0xd800 || c >= 0xe000) |
501 | 0 | return c; |
502 | | |
503 | | /* It *ought* to be a leading (high) surrogate. If it's not, |
504 | | * then we're in trouble. */ |
505 | 0 | if (c >= 0xdc00) |
506 | 0 | return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */ |
507 | | |
508 | | /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need |
509 | | * 2 to tell if it was a low surrogate. Just assume it is. */ |
510 | 0 | d = fz_read_byte(ctx, stm); |
511 | 0 | if (d == EOF) |
512 | 0 | { |
513 | | /* Failure! Imagine the trailing surrogate was 0. */ |
514 | 0 | return 0x10000 + ((c - 0xd800)<<10); |
515 | 0 | } |
516 | 0 | e = fz_read_byte(ctx, stm); |
517 | 0 | if (e == EOF) |
518 | 0 | { |
519 | 0 | e = 0xDC; /* Fudge a low surrogate */ |
520 | 0 | } |
521 | |
|
522 | 0 | d |= e<<8; |
523 | |
|
524 | 0 | if (d < 0xdc00 || d >= 0xe000) |
525 | 0 | { |
526 | | /* Bad encoding! This is nasty, because we've eaten 2 bytes from the |
527 | | * stream which ideally we would not have. Serves you right for |
528 | | * having a broken stream. */ |
529 | 0 | return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */ |
530 | 0 | } |
531 | | |
532 | 0 | c -= 0xd800; |
533 | 0 | d -= 0xdc00; |
534 | |
|
535 | 0 | return 0x10000 + (c<<10) + d; |
536 | 0 | } |
537 | | |
538 | | int fz_read_utf16_be(fz_context *ctx, fz_stream *stm) |
539 | 0 | { |
540 | 0 | int c = fz_read_byte(ctx, stm); |
541 | 0 | int d, e; |
542 | |
|
543 | 0 | if (c == EOF) |
544 | 0 | return EOF; |
545 | | |
546 | 0 | d = fz_read_byte(ctx, stm); |
547 | 0 | if (d == EOF) |
548 | 0 | return c; /* Might be wrong, but the best we can do. */ |
549 | | |
550 | 0 | c = (c<<8) | d; |
551 | | |
552 | | /* If it's not a surrogate, we're done. */ |
553 | 0 | if (c < 0xd800 || c >= 0xe000) |
554 | 0 | return c; |
555 | | |
556 | | /* It *ought* to be a leading (high) surrogate. If it's not, |
557 | | * then we're in trouble. */ |
558 | 0 | if (c >= 0xdc00) |
559 | 0 | return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */ |
560 | | |
561 | | /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need |
562 | | * 2 to tell if it was a low surrogate. Just assume it is. */ |
563 | 0 | d = fz_read_byte(ctx, stm); |
564 | 0 | if (d == EOF) |
565 | 0 | { |
566 | | /* Failure! Imagine the trailing surrogate was 0. */ |
567 | 0 | return 0x10000 + ((c - 0xd800)<<10); |
568 | 0 | } |
569 | | |
570 | | /* The next byte ought to be the start of a trailing (low) surrogate. */ |
571 | 0 | if (d < 0xdc || d >= 0xe0) |
572 | 0 | { |
573 | | /* It wasn't. Put the byte back. */ |
574 | 0 | fz_unread_byte(ctx, stm); |
575 | 0 | d = 0xdc00; /* Pretend it was a 0 surrogate. */ |
576 | 0 | } |
577 | 0 | else |
578 | 0 | { |
579 | 0 | e = fz_read_byte(ctx, stm); |
580 | 0 | if (e == EOF) |
581 | 0 | { |
582 | 0 | e = 0; |
583 | 0 | } |
584 | 0 | d = (d<<8) | e; |
585 | 0 | } |
586 | |
|
587 | 0 | c -= 0xd800; |
588 | 0 | d -= 0xdc00; |
589 | |
|
590 | 0 | return 0x10000 + (c<<10) + d; |
591 | 0 | } |