1#
2# The Python Imaging Library.
3# $Id$
4#
5# base class for image file handlers
6#
7# history:
8# 1995-09-09 fl Created
9# 1996-03-11 fl Fixed load mechanism.
10# 1996-04-15 fl Added pcx/xbm decoders.
11# 1996-04-30 fl Added encoders.
12# 1996-12-14 fl Added load helpers
13# 1997-01-11 fl Use encode_to_file where possible
14# 1997-08-27 fl Flush output in _save
15# 1998-03-05 fl Use memory mapping for some modes
16# 1999-02-04 fl Use memory mapping also for "I;16" and "I;16B"
17# 1999-05-31 fl Added image parser
18# 2000-10-12 fl Set readonly flag on memory-mapped images
19# 2002-03-20 fl Use better messages for common decoder errors
20# 2003-04-21 fl Fall back on mmap/map_buffer if map is not available
21# 2003-10-30 fl Added StubImageFile class
22# 2004-02-25 fl Made incremental parser more robust
23#
24# Copyright (c) 1997-2004 by Secret Labs AB
25# Copyright (c) 1995-2004 by Fredrik Lundh
26#
27# See the README file for information on usage and redistribution.
28#
29from __future__ import annotations
30
31import abc
32import io
33import itertools
34import struct
35import sys
36from typing import IO, Any, NamedTuple
37
38from . import Image
39from ._deprecate import deprecate
40from ._util import is_path
41
42MAXBLOCK = 65536
43
44SAFEBLOCK = 1024 * 1024
45
46LOAD_TRUNCATED_IMAGES = False
47"""Whether or not to load truncated image files. User code may change this."""
48
49ERRORS = {
50 -1: "image buffer overrun error",
51 -2: "decoding error",
52 -3: "unknown error",
53 -8: "bad configuration",
54 -9: "out of memory error",
55}
56"""
57Dict of known error codes returned from :meth:`.PyDecoder.decode`,
58:meth:`.PyEncoder.encode` :meth:`.PyEncoder.encode_to_pyfd` and
59:meth:`.PyEncoder.encode_to_file`.
60"""
61
62
63#
64# --------------------------------------------------------------------
65# Helpers
66
67
68def _get_oserror(error: int, *, encoder: bool) -> OSError:
69 try:
70 msg = Image.core.getcodecstatus(error)
71 except AttributeError:
72 msg = ERRORS.get(error)
73 if not msg:
74 msg = f"{'encoder' if encoder else 'decoder'} error {error}"
75 msg += f" when {'writing' if encoder else 'reading'} image file"
76 return OSError(msg)
77
78
79def raise_oserror(error: int) -> OSError:
80 deprecate(
81 "raise_oserror",
82 12,
83 action="It is only useful for translating error codes returned by a codec's "
84 "decode() method, which ImageFile already does automatically.",
85 )
86 raise _get_oserror(error, encoder=False)
87
88
89def _tilesort(t):
90 # sort on offset
91 return t[2]
92
93
94class _Tile(NamedTuple):
95 codec_name: str
96 extents: tuple[int, int, int, int]
97 offset: int
98 args: tuple[Any, ...] | str | None
99
100
101#
102# --------------------------------------------------------------------
103# ImageFile base class
104
105
106class ImageFile(Image.Image):
107 """Base class for image file format handlers."""
108
109 def __init__(self, fp=None, filename=None):
110 super().__init__()
111
112 self._min_frame = 0
113
114 self.custom_mimetype = None
115
116 self.tile = None
117 """ A list of tile descriptors, or ``None`` """
118
119 self.readonly = 1 # until we know better
120
121 self.decoderconfig = ()
122 self.decodermaxblock = MAXBLOCK
123
124 if is_path(fp):
125 # filename
126 self.fp = open(fp, "rb")
127 self.filename = fp
128 self._exclusive_fp = True
129 else:
130 # stream
131 self.fp = fp
132 self.filename = filename
133 # can be overridden
134 self._exclusive_fp = None
135
136 try:
137 try:
138 self._open()
139 except (
140 IndexError, # end of data
141 TypeError, # end of data (ord)
142 KeyError, # unsupported mode
143 EOFError, # got header but not the first frame
144 struct.error,
145 ) as v:
146 raise SyntaxError(v) from v
147
148 if not self.mode or self.size[0] <= 0 or self.size[1] <= 0:
149 msg = "not identified by this driver"
150 raise SyntaxError(msg)
151 except BaseException:
152 # close the file only if we have opened it this constructor
153 if self._exclusive_fp:
154 self.fp.close()
155 raise
156
157 def get_format_mimetype(self) -> str | None:
158 if self.custom_mimetype:
159 return self.custom_mimetype
160 if self.format is not None:
161 return Image.MIME.get(self.format.upper())
162 return None
163
164 def __setstate__(self, state):
165 self.tile = []
166 super().__setstate__(state)
167
168 def verify(self) -> None:
169 """Check file integrity"""
170
171 # raise exception if something's wrong. must be called
172 # directly after open, and closes file when finished.
173 if self._exclusive_fp:
174 self.fp.close()
175 self.fp = None
176
177 def load(self):
178 """Load image data based on tile list"""
179
180 if self.tile is None:
181 msg = "cannot load this image"
182 raise OSError(msg)
183
184 pixel = Image.Image.load(self)
185 if not self.tile:
186 return pixel
187
188 self.map = None
189 use_mmap = self.filename and len(self.tile) == 1
190 # As of pypy 2.1.0, memory mapping was failing here.
191 use_mmap = use_mmap and not hasattr(sys, "pypy_version_info")
192
193 readonly = 0
194
195 # look for read/seek overrides
196 try:
197 read = self.load_read
198 # don't use mmap if there are custom read/seek functions
199 use_mmap = False
200 except AttributeError:
201 read = self.fp.read
202
203 try:
204 seek = self.load_seek
205 use_mmap = False
206 except AttributeError:
207 seek = self.fp.seek
208
209 if use_mmap:
210 # try memory mapping
211 decoder_name, extents, offset, args = self.tile[0]
212 if isinstance(args, str):
213 args = (args, 0, 1)
214 if (
215 decoder_name == "raw"
216 and len(args) >= 3
217 and args[0] == self.mode
218 and args[0] in Image._MAPMODES
219 ):
220 try:
221 # use mmap, if possible
222 import mmap
223
224 with open(self.filename) as fp:
225 self.map = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
226 if offset + self.size[1] * args[1] > self.map.size():
227 msg = "buffer is not large enough"
228 raise OSError(msg)
229 self.im = Image.core.map_buffer(
230 self.map, self.size, decoder_name, offset, args
231 )
232 readonly = 1
233 # After trashing self.im,
234 # we might need to reload the palette data.
235 if self.palette:
236 self.palette.dirty = 1
237 except (AttributeError, OSError, ImportError):
238 self.map = None
239
240 self.load_prepare()
241 err_code = -3 # initialize to unknown error
242 if not self.map:
243 # sort tiles in file order
244 self.tile.sort(key=_tilesort)
245
246 try:
247 # FIXME: This is a hack to handle TIFF's JpegTables tag.
248 prefix = self.tile_prefix
249 except AttributeError:
250 prefix = b""
251
252 # Remove consecutive duplicates that only differ by their offset
253 self.tile = [
254 list(tiles)[-1]
255 for _, tiles in itertools.groupby(
256 self.tile, lambda tile: (tile[0], tile[1], tile[3])
257 )
258 ]
259 for decoder_name, extents, offset, args in self.tile:
260 seek(offset)
261 decoder = Image._getdecoder(
262 self.mode, decoder_name, args, self.decoderconfig
263 )
264 try:
265 decoder.setimage(self.im, extents)
266 if decoder.pulls_fd:
267 decoder.setfd(self.fp)
268 err_code = decoder.decode(b"")[1]
269 else:
270 b = prefix
271 while True:
272 try:
273 s = read(self.decodermaxblock)
274 except (IndexError, struct.error) as e:
275 # truncated png/gif
276 if LOAD_TRUNCATED_IMAGES:
277 break
278 else:
279 msg = "image file is truncated"
280 raise OSError(msg) from e
281
282 if not s: # truncated jpeg
283 if LOAD_TRUNCATED_IMAGES:
284 break
285 else:
286 msg = (
287 "image file is truncated "
288 f"({len(b)} bytes not processed)"
289 )
290 raise OSError(msg)
291
292 b = b + s
293 n, err_code = decoder.decode(b)
294 if n < 0:
295 break
296 b = b[n:]
297 finally:
298 # Need to cleanup here to prevent leaks
299 decoder.cleanup()
300
301 self.tile = []
302 self.readonly = readonly
303
304 self.load_end()
305
306 if self._exclusive_fp and self._close_exclusive_fp_after_loading:
307 self.fp.close()
308 self.fp = None
309
310 if not self.map and not LOAD_TRUNCATED_IMAGES and err_code < 0:
311 # still raised if decoder fails to return anything
312 raise _get_oserror(err_code, encoder=False)
313
314 return Image.Image.load(self)
315
316 def load_prepare(self) -> None:
317 # create image memory if necessary
318 if not self.im or self.im.mode != self.mode or self.im.size != self.size:
319 self.im = Image.core.new(self.mode, self.size)
320 # create palette (optional)
321 if self.mode == "P":
322 Image.Image.load(self)
323
324 def load_end(self) -> None:
325 # may be overridden
326 pass
327
328 # may be defined for contained formats
329 # def load_seek(self, pos: int) -> None:
330 # pass
331
332 # may be defined for blocked formats (e.g. PNG)
333 # def load_read(self, read_bytes: int) -> bytes:
334 # pass
335
336 def _seek_check(self, frame):
337 if (
338 frame < self._min_frame
339 # Only check upper limit on frames if additional seek operations
340 # are not required to do so
341 or (
342 not (hasattr(self, "_n_frames") and self._n_frames is None)
343 and frame >= self.n_frames + self._min_frame
344 )
345 ):
346 msg = "attempt to seek outside sequence"
347 raise EOFError(msg)
348
349 return self.tell() != frame
350
351
352class StubHandler:
353 def open(self, im: StubImageFile) -> None:
354 pass
355
356 @abc.abstractmethod
357 def load(self, im: StubImageFile) -> Image.Image:
358 pass
359
360
361class StubImageFile(ImageFile):
362 """
363 Base class for stub image loaders.
364
365 A stub loader is an image loader that can identify files of a
366 certain format, but relies on external code to load the file.
367 """
368
369 def _open(self) -> None:
370 msg = "StubImageFile subclass must implement _open"
371 raise NotImplementedError(msg)
372
373 def load(self):
374 loader = self._load()
375 if loader is None:
376 msg = f"cannot find loader for this {self.format} file"
377 raise OSError(msg)
378 image = loader.load(self)
379 assert image is not None
380 # become the other object (!)
381 self.__class__ = image.__class__
382 self.__dict__ = image.__dict__
383 return image.load()
384
385 def _load(self) -> StubHandler | None:
386 """(Hook) Find actual image loader."""
387 msg = "StubImageFile subclass must implement _load"
388 raise NotImplementedError(msg)
389
390
391class Parser:
392 """
393 Incremental image parser. This class implements the standard
394 feed/close consumer interface.
395 """
396
397 incremental = None
398 image: Image.Image | None = None
399 data = None
400 decoder = None
401 offset = 0
402 finished = 0
403
404 def reset(self) -> None:
405 """
406 (Consumer) Reset the parser. Note that you can only call this
407 method immediately after you've created a parser; parser
408 instances cannot be reused.
409 """
410 assert self.data is None, "cannot reuse parsers"
411
412 def feed(self, data):
413 """
414 (Consumer) Feed data to the parser.
415
416 :param data: A string buffer.
417 :exception OSError: If the parser failed to parse the image file.
418 """
419 # collect data
420
421 if self.finished:
422 return
423
424 if self.data is None:
425 self.data = data
426 else:
427 self.data = self.data + data
428
429 # parse what we have
430 if self.decoder:
431 if self.offset > 0:
432 # skip header
433 skip = min(len(self.data), self.offset)
434 self.data = self.data[skip:]
435 self.offset = self.offset - skip
436 if self.offset > 0 or not self.data:
437 return
438
439 n, e = self.decoder.decode(self.data)
440
441 if n < 0:
442 # end of stream
443 self.data = None
444 self.finished = 1
445 if e < 0:
446 # decoding error
447 self.image = None
448 raise _get_oserror(e, encoder=False)
449 else:
450 # end of image
451 return
452 self.data = self.data[n:]
453
454 elif self.image:
455 # if we end up here with no decoder, this file cannot
456 # be incrementally parsed. wait until we've gotten all
457 # available data
458 pass
459
460 else:
461 # attempt to open this file
462 try:
463 with io.BytesIO(self.data) as fp:
464 im = Image.open(fp)
465 except OSError:
466 pass # not enough data
467 else:
468 flag = hasattr(im, "load_seek") or hasattr(im, "load_read")
469 if flag or len(im.tile) != 1:
470 # custom load code, or multiple tiles
471 self.decode = None
472 else:
473 # initialize decoder
474 im.load_prepare()
475 d, e, o, a = im.tile[0]
476 im.tile = []
477 self.decoder = Image._getdecoder(im.mode, d, a, im.decoderconfig)
478 self.decoder.setimage(im.im, e)
479
480 # calculate decoder offset
481 self.offset = o
482 if self.offset <= len(self.data):
483 self.data = self.data[self.offset :]
484 self.offset = 0
485
486 self.image = im
487
488 def __enter__(self):
489 return self
490
491 def __exit__(self, *args: object) -> None:
492 self.close()
493
494 def close(self):
495 """
496 (Consumer) Close the stream.
497
498 :returns: An image object.
499 :exception OSError: If the parser failed to parse the image file either
500 because it cannot be identified or cannot be
501 decoded.
502 """
503 # finish decoding
504 if self.decoder:
505 # get rid of what's left in the buffers
506 self.feed(b"")
507 self.data = self.decoder = None
508 if not self.finished:
509 msg = "image was incomplete"
510 raise OSError(msg)
511 if not self.image:
512 msg = "cannot parse this image"
513 raise OSError(msg)
514 if self.data:
515 # incremental parsing not possible; reopen the file
516 # not that we have all data
517 with io.BytesIO(self.data) as fp:
518 try:
519 self.image = Image.open(fp)
520 finally:
521 self.image.load()
522 return self.image
523
524
525# --------------------------------------------------------------------
526
527
528def _save(im, fp, tile, bufsize=0) -> None:
529 """Helper to save image based on tile list
530
531 :param im: Image object.
532 :param fp: File object.
533 :param tile: Tile list.
534 :param bufsize: Optional buffer size
535 """
536
537 im.load()
538 if not hasattr(im, "encoderconfig"):
539 im.encoderconfig = ()
540 tile.sort(key=_tilesort)
541 # FIXME: make MAXBLOCK a configuration parameter
542 # It would be great if we could have the encoder specify what it needs
543 # But, it would need at least the image size in most cases. RawEncode is
544 # a tricky case.
545 bufsize = max(MAXBLOCK, bufsize, im.size[0] * 4) # see RawEncode.c
546 try:
547 fh = fp.fileno()
548 fp.flush()
549 _encode_tile(im, fp, tile, bufsize, fh)
550 except (AttributeError, io.UnsupportedOperation) as exc:
551 _encode_tile(im, fp, tile, bufsize, None, exc)
552 if hasattr(fp, "flush"):
553 fp.flush()
554
555
556def _encode_tile(im, fp, tile: list[_Tile], bufsize, fh, exc=None):
557 for encoder_name, extents, offset, args in tile:
558 if offset > 0:
559 fp.seek(offset)
560 encoder = Image._getencoder(im.mode, encoder_name, args, im.encoderconfig)
561 try:
562 encoder.setimage(im.im, extents)
563 if encoder.pushes_fd:
564 encoder.setfd(fp)
565 errcode = encoder.encode_to_pyfd()[1]
566 else:
567 if exc:
568 # compress to Python file-compatible object
569 while True:
570 errcode, data = encoder.encode(bufsize)[1:]
571 fp.write(data)
572 if errcode:
573 break
574 else:
575 # slight speedup: compress to real file object
576 errcode = encoder.encode_to_file(fh, bufsize)
577 if errcode < 0:
578 raise _get_oserror(errcode, encoder=True) from exc
579 finally:
580 encoder.cleanup()
581
582
583def _safe_read(fp, size):
584 """
585 Reads large blocks in a safe way. Unlike fp.read(n), this function
586 doesn't trust the user. If the requested size is larger than
587 SAFEBLOCK, the file is read block by block.
588
589 :param fp: File handle. Must implement a <b>read</b> method.
590 :param size: Number of bytes to read.
591 :returns: A string containing <i>size</i> bytes of data.
592
593 Raises an OSError if the file is truncated and the read cannot be completed
594
595 """
596 if size <= 0:
597 return b""
598 if size <= SAFEBLOCK:
599 data = fp.read(size)
600 if len(data) < size:
601 msg = "Truncated File Read"
602 raise OSError(msg)
603 return data
604 data = []
605 remaining_size = size
606 while remaining_size > 0:
607 block = fp.read(min(remaining_size, SAFEBLOCK))
608 if not block:
609 break
610 data.append(block)
611 remaining_size -= len(block)
612 if sum(len(d) for d in data) < size:
613 msg = "Truncated File Read"
614 raise OSError(msg)
615 return b"".join(data)
616
617
618class PyCodecState:
619 def __init__(self) -> None:
620 self.xsize = 0
621 self.ysize = 0
622 self.xoff = 0
623 self.yoff = 0
624
625 def extents(self) -> tuple[int, int, int, int]:
626 return self.xoff, self.yoff, self.xoff + self.xsize, self.yoff + self.ysize
627
628
629class PyCodec:
630 fd: IO[bytes] | None
631
632 def __init__(self, mode, *args):
633 self.im = None
634 self.state = PyCodecState()
635 self.fd = None
636 self.mode = mode
637 self.init(args)
638
639 def init(self, args):
640 """
641 Override to perform codec specific initialization
642
643 :param args: Array of args items from the tile entry
644 :returns: None
645 """
646 self.args = args
647
648 def cleanup(self) -> None:
649 """
650 Override to perform codec specific cleanup
651
652 :returns: None
653 """
654 pass
655
656 def setfd(self, fd):
657 """
658 Called from ImageFile to set the Python file-like object
659
660 :param fd: A Python file-like object
661 :returns: None
662 """
663 self.fd = fd
664
665 def setimage(self, im, extents: tuple[int, int, int, int] | None = None) -> None:
666 """
667 Called from ImageFile to set the core output image for the codec
668
669 :param im: A core image object
670 :param extents: a 4 tuple of (x0, y0, x1, y1) defining the rectangle
671 for this tile
672 :returns: None
673 """
674
675 # following c code
676 self.im = im
677
678 if extents:
679 (x0, y0, x1, y1) = extents
680 else:
681 (x0, y0, x1, y1) = (0, 0, 0, 0)
682
683 if x0 == 0 and x1 == 0:
684 self.state.xsize, self.state.ysize = self.im.size
685 else:
686 self.state.xoff = x0
687 self.state.yoff = y0
688 self.state.xsize = x1 - x0
689 self.state.ysize = y1 - y0
690
691 if self.state.xsize <= 0 or self.state.ysize <= 0:
692 msg = "Size cannot be negative"
693 raise ValueError(msg)
694
695 if (
696 self.state.xsize + self.state.xoff > self.im.size[0]
697 or self.state.ysize + self.state.yoff > self.im.size[1]
698 ):
699 msg = "Tile cannot extend outside image"
700 raise ValueError(msg)
701
702
703class PyDecoder(PyCodec):
704 """
705 Python implementation of a format decoder. Override this class and
706 add the decoding logic in the :meth:`decode` method.
707
708 See :ref:`Writing Your Own File Codec in Python<file-codecs-py>`
709 """
710
711 _pulls_fd = False
712
713 @property
714 def pulls_fd(self) -> bool:
715 return self._pulls_fd
716
717 def decode(self, buffer: bytes) -> tuple[int, int]:
718 """
719 Override to perform the decoding process.
720
721 :param buffer: A bytes object with the data to be decoded.
722 :returns: A tuple of ``(bytes consumed, errcode)``.
723 If finished with decoding return -1 for the bytes consumed.
724 Err codes are from :data:`.ImageFile.ERRORS`.
725 """
726 msg = "unavailable in base decoder"
727 raise NotImplementedError(msg)
728
729 def set_as_raw(self, data: bytes, rawmode=None) -> None:
730 """
731 Convenience method to set the internal image from a stream of raw data
732
733 :param data: Bytes to be set
734 :param rawmode: The rawmode to be used for the decoder.
735 If not specified, it will default to the mode of the image
736 :returns: None
737 """
738
739 if not rawmode:
740 rawmode = self.mode
741 d = Image._getdecoder(self.mode, "raw", rawmode)
742 assert self.im is not None
743 d.setimage(self.im, self.state.extents())
744 s = d.decode(data)
745
746 if s[0] >= 0:
747 msg = "not enough image data"
748 raise ValueError(msg)
749 if s[1] != 0:
750 msg = "cannot decode image data"
751 raise ValueError(msg)
752
753
754class PyEncoder(PyCodec):
755 """
756 Python implementation of a format encoder. Override this class and
757 add the decoding logic in the :meth:`encode` method.
758
759 See :ref:`Writing Your Own File Codec in Python<file-codecs-py>`
760 """
761
762 _pushes_fd = False
763
764 @property
765 def pushes_fd(self) -> bool:
766 return self._pushes_fd
767
768 def encode(self, bufsize: int) -> tuple[int, int, bytes]:
769 """
770 Override to perform the encoding process.
771
772 :param bufsize: Buffer size.
773 :returns: A tuple of ``(bytes encoded, errcode, bytes)``.
774 If finished with encoding return 1 for the error code.
775 Err codes are from :data:`.ImageFile.ERRORS`.
776 """
777 msg = "unavailable in base encoder"
778 raise NotImplementedError(msg)
779
780 def encode_to_pyfd(self) -> tuple[int, int]:
781 """
782 If ``pushes_fd`` is ``True``, then this method will be used,
783 and ``encode()`` will only be called once.
784
785 :returns: A tuple of ``(bytes consumed, errcode)``.
786 Err codes are from :data:`.ImageFile.ERRORS`.
787 """
788 if not self.pushes_fd:
789 return 0, -8 # bad configuration
790 bytes_consumed, errcode, data = self.encode(0)
791 if data:
792 assert self.fd is not None
793 self.fd.write(data)
794 return bytes_consumed, errcode
795
796 def encode_to_file(self, fh, bufsize):
797 """
798 :param fh: File handle.
799 :param bufsize: Buffer size.
800
801 :returns: If finished successfully, return 0.
802 Otherwise, return an error code. Err codes are from
803 :data:`.ImageFile.ERRORS`.
804 """
805 errcode = 0
806 while errcode == 0:
807 status, errcode, buf = self.encode(bufsize)
808 if status > 0:
809 fh.write(buf[status:])
810 return errcode