1# -*- coding: utf-8 -*-
2# imageio is distributed under the terms of the (new) BSD License.
3
4"""
5Definition of the Request object, which acts as a kind of bridge between
6what the user wants and what the plugins can.
7"""
8
9import os
10from io import BytesIO
11import zipfile
12import tempfile
13import shutil
14import enum
15import warnings
16
17from ..core import urlopen, get_remote_file
18
19from pathlib import Path
20from urllib.parse import urlparse
21from typing import Optional
22
23# URI types
24URI_BYTES = 1
25URI_FILE = 2
26URI_FILENAME = 3
27URI_ZIPPED = 4
28URI_HTTP = 5
29URI_FTP = 6
30
31
32class IOMode(str, enum.Enum):
33 """Available Image modes
34
35 This is a helper enum for ``Request.Mode`` which is a composite of a
36 ``Request.ImageMode`` and ``Request.IOMode``. The IOMode that tells the
37 plugin if the resource should be read from or written to. Available values are
38
39 - read ("r"): Read from the specified resource
40 - write ("w"): Write to the specified resource
41
42 """
43
44 read = "r"
45 write = "w"
46
47
48class ImageMode(str, enum.Enum):
49 """Available Image modes
50
51 This is a helper enum for ``Request.Mode`` which is a composite of a
52 ``Request.ImageMode`` and ``Request.IOMode``. The image mode that tells the
53 plugin the desired (and expected) image shape. Available values are
54
55 - single_image ("i"): Return a single image extending in two spacial
56 dimensions
57 - multi_image ("I"): Return a list of images extending in two spacial
58 dimensions
59 - single_volume ("v"): Return an image extending into multiple dimensions.
60 E.g. three spacial dimensions for image stacks, or two spatial and one
61 time dimension for videos
62 - multi_volume ("V"): Return a list of images extending into multiple
63 dimensions.
64 - any_mode ("?"): Return an image in any format (the plugin decides the
65 appropriate action).
66
67 """
68
69 single_image = "i"
70 multi_image = "I"
71 single_volume = "v"
72 multi_volume = "V"
73 any_mode = "?"
74
75
76@enum.unique
77class Mode(str, enum.Enum):
78 """The mode to use when interacting with the resource
79
80 ``Request.Mode`` is a composite of ``Request.ImageMode`` and
81 ``Request.IOMode``. The image mode that tells the plugin the desired (and
82 expected) image shape and the ``Request.IOMode`` tells the plugin the way
83 the resource should be interacted with. For a detailed description of the
84 available modes, see the documentation for ``Request.ImageMode`` and
85 ``Request.IOMode`` respectively.
86
87 Available modes are all combinations of ``Request.IOMode`` and ``Request.ImageMode``:
88
89 - read_single_image ("ri")
90 - read_multi_image ("rI")
91 - read_single_volume ("rv")
92 - read_multi_volume ("rV")
93 - read_any ("r?")
94 - write_single_image ("wi")
95 - write_multi_image ("wI")
96 - write_single_volume ("wv")
97 - write_multi_volume ("wV")
98 - write_any ("w?")
99
100 Examples
101 --------
102 >>> Request.Mode("rI") # a list of simple images should be read from the resource
103 >>> Request.Mode("wv") # a single volume should be written to the resource
104
105 """
106
107 read_single_image = "ri"
108 read_multi_image = "rI"
109 read_single_volume = "rv"
110 read_multi_volume = "rV"
111 read_any = "r?"
112 write_single_image = "wi"
113 write_multi_image = "wI"
114 write_single_volume = "wv"
115 write_multi_volume = "wV"
116 write_any = "w?"
117
118 @classmethod
119 def _missing_(cls, value):
120 """Enable Mode("r") and Mode("w")
121
122 The sunder method ``_missing_`` is called whenever the constructor fails
123 to directly look up the corresponding enum value from the given input.
124 In our case, we use it to convert the modes "r" and "w" (from the v3
125 API) into their legacy versions "r?" and "w?".
126
127 More info on _missing_:
128 https://docs.python.org/3/library/enum.html#supported-sunder-names
129 """
130
131 if value == "r":
132 return cls("r?")
133 elif value == "w":
134 return cls("w?")
135 else:
136 raise ValueError(f"{value} is no valid Mode.")
137
138 @property
139 def io_mode(self) -> IOMode:
140 return IOMode(self.value[0])
141
142 @property
143 def image_mode(self) -> ImageMode:
144 return ImageMode(self.value[1])
145
146 def __getitem__(self, key):
147 """For backwards compatibility with the old non-enum modes"""
148 if key == 0:
149 return self.io_mode
150 elif key == 1:
151 return self.image_mode
152 else:
153 raise IndexError(f"Mode has no item {key}")
154
155
156SPECIAL_READ_URIS = "<video", "<screen>", "<clipboard>"
157
158# The user can use this string in a write call to get the data back as bytes.
159RETURN_BYTES = "<bytes>"
160
161# Example images that will be auto-downloaded
162EXAMPLE_IMAGES = {
163 "astronaut.png": "Image of the astronaut Eileen Collins",
164 "camera.png": "A grayscale image of a photographer",
165 "checkerboard.png": "Black and white image of a chekerboard",
166 "wood.jpg": "A (repeatable) texture of wooden planks",
167 "bricks.jpg": "A (repeatable) texture of stone bricks",
168 "clock.png": "Photo of a clock with motion blur (Stefan van der Walt)",
169 "coffee.png": "Image of a cup of coffee (Rachel Michetti)",
170 "chelsea.png": "Image of Stefan's cat",
171 "wikkie.png": "Image of Almar's cat",
172 "coins.png": "Image showing greek coins from Pompeii",
173 "horse.png": "Image showing the silhouette of a horse (Andreas Preuss)",
174 "hubble_deep_field.png": "Photograph taken by Hubble telescope (NASA)",
175 "immunohistochemistry.png": "Immunohistochemical (IHC) staining",
176 "moon.png": "Image showing a portion of the surface of the moon",
177 "page.png": "A scanned page of text",
178 "text.png": "A photograph of handdrawn text",
179 "bacterial_colony.tif": "Multi-page TIFF image of a bacterial colony",
180 "chelsea.zip": "The chelsea.png in a zipfile (for testing)",
181 "chelsea.bsdf": "The chelsea.png in a BSDF file(for testing)",
182 "newtonscradle.gif": "Animated GIF of a newton's cradle",
183 "cockatoo.mp4": "Video file of a cockatoo",
184 "cockatoo_yuv420.mp4": "Video file of a cockatoo with yuv420 pixel format",
185 "stent.npz": "Volumetric image showing a stented abdominal aorta",
186 "meadow_cube.jpg": "A cubemap image of a meadow, e.g. to render a skybox.",
187}
188
189
190class Request(object):
191 """ImageResource handling utility.
192
193 Represents a request for reading or saving an image resource. This
194 object wraps information to that request and acts as an interface
195 for the plugins to several resources; it allows the user to read
196 from filenames, files, http, zipfiles, raw bytes, etc., but offer
197 a simple interface to the plugins via ``get_file()`` and
198 ``get_local_filename()``.
199
200 For each read/write operation a single Request instance is used and passed
201 to the can_read/can_write method of a format, and subsequently to
202 the Reader/Writer class. This allows rudimentary passing of
203 information between different formats and between a format and
204 associated reader/writer.
205
206 Parameters
207 ----------
208 uri : {str, bytes, file}
209 The resource to load the image from.
210 mode : str
211 The first character is "r" or "w", indicating a read or write
212 request. The second character is used to indicate the kind of data:
213 "i" for an image, "I" for multiple images, "v" for a volume,
214 "V" for multiple volumes, "?" for don't care.
215
216 """
217
218 def __init__(self, uri, mode, *, extension=None, format_hint: str = None, **kwargs):
219 # General
220 self.raw_uri = uri
221 self._uri_type = None
222 self._filename = None
223 self._extension = None
224 self._format_hint = None
225 self._kwargs = kwargs
226 self._result = None # Some write actions may have a result
227
228 # To handle the user-side
229 self._filename_zip = None # not None if a zipfile is used
230 self._bytes = None # Incoming bytes
231 self._zipfile = None # To store a zipfile instance (if used)
232
233 # To handle the plugin side
234 self._file = None # To store the file instance
235 self._file_is_local = False # whether the data needs to be copied at end
236 self._filename_local = None # not None if using tempfile on this FS
237 self._firstbytes = None # For easy header parsing
238
239 # To store formats that may be able to fulfil this request
240 # self._potential_formats = []
241
242 # Check mode
243 try:
244 self._mode = Mode(mode)
245 except ValueError:
246 raise ValueError(f"Invalid Request.Mode: {mode}")
247
248 # Parse what was given
249 self._parse_uri(uri)
250
251 # Set extension
252 if extension is not None:
253 if extension[0] != ".":
254 raise ValueError(
255 "`extension` should be a file extension starting with a `.`,"
256 f" but is `{extension}`."
257 )
258 self._extension = extension
259 elif self._filename is not None:
260 if self._uri_type in (URI_FILENAME, URI_ZIPPED):
261 path = self._filename
262 else:
263 path = urlparse(self._filename).path
264 ext = Path(path).suffix.lower()
265 self._extension = ext if ext != "" else None
266
267 if format_hint is not None:
268 warnings.warn(
269 "The usage of `format_hint` is deprecated and will be removed "
270 "in ImageIO v3. Use `extension` instead.",
271 DeprecationWarning,
272 )
273
274 if format_hint is not None and format_hint[0] != ".":
275 raise ValueError(
276 "`format_hint` should be a file extension starting with a `.`,"
277 f" but is `{format_hint}`."
278 )
279
280 self.format_hint = format_hint
281
282 def _parse_uri(self, uri):
283 """Try to figure our what we were given"""
284 is_read_request = self.mode.io_mode is IOMode.read
285 is_write_request = self.mode.io_mode is IOMode.write
286
287 if isinstance(uri, str):
288 # Explicit
289 if uri.startswith("imageio:"):
290 if is_write_request:
291 raise RuntimeError("Cannot write to the standard images.")
292 fn = uri.split(":", 1)[-1].lower()
293 fn, _, zip_part = fn.partition(".zip/")
294 if zip_part:
295 fn += ".zip"
296 if fn not in EXAMPLE_IMAGES:
297 raise ValueError("Unknown standard image %r." % fn)
298 self._uri_type = URI_FILENAME
299 self._filename = get_remote_file("images/" + fn, auto=True)
300 if zip_part:
301 self._filename += "/" + zip_part
302 elif uri.startswith("http://") or uri.startswith("https://"):
303 self._uri_type = URI_HTTP
304 self._filename = uri
305 elif uri.startswith("ftp://") or uri.startswith("ftps://"):
306 self._uri_type = URI_FTP
307 self._filename = uri
308 elif uri.startswith("file://"):
309 self._uri_type = URI_FILENAME
310 self._filename = uri[7:]
311 elif uri.startswith(SPECIAL_READ_URIS) and is_read_request:
312 self._uri_type = URI_BYTES
313 self._filename = uri
314 elif uri.startswith(RETURN_BYTES) and is_write_request:
315 self._uri_type = URI_BYTES
316 self._filename = uri
317 else:
318 self._uri_type = URI_FILENAME
319 self._filename = uri
320
321 elif isinstance(uri, memoryview) and is_read_request:
322 self._uri_type = URI_BYTES
323 self._filename = "<bytes>"
324 self._bytes = uri.tobytes()
325 elif isinstance(uri, bytes) and is_read_request:
326 self._uri_type = URI_BYTES
327 self._filename = "<bytes>"
328 self._bytes = uri
329 elif isinstance(uri, Path):
330 self._uri_type = URI_FILENAME
331 self._filename = str(uri)
332 # Files
333 elif is_read_request:
334 if hasattr(uri, "read") and hasattr(uri, "close"):
335 self._uri_type = URI_FILE
336 self._filename = "<file>"
337 self._file = uri # Data must be read from here
338 elif is_write_request:
339 if hasattr(uri, "write") and hasattr(uri, "close"):
340 self._uri_type = URI_FILE
341 self._filename = "<file>"
342 self._file = uri # Data must be written here
343
344 # Expand user dir
345 if self._uri_type == URI_FILENAME and self._filename.startswith("~"):
346 self._filename = os.path.expanduser(self._filename)
347
348 # Check if a zipfile
349 if self._uri_type == URI_FILENAME:
350 # Search for zip extension followed by a path separator
351 for needle in [".zip/", ".zip\\"]:
352 zip_i = self._filename.lower().find(needle)
353 if zip_i > 0:
354 zip_i += 4
355 zip_path = self._filename[:zip_i]
356 if os.path.isdir(zip_path):
357 pass # is an existing dir (see #548)
358 elif is_write_request or os.path.isfile(zip_path):
359 self._uri_type = URI_ZIPPED
360 self._filename_zip = (
361 zip_path,
362 self._filename[zip_i:].lstrip("/\\"),
363 )
364 break
365
366 # Check if we could read it
367 if self._uri_type is None:
368 uri_r = repr(uri)
369 if len(uri_r) > 60:
370 uri_r = uri_r[:57] + "..."
371 raise IOError("Cannot understand given URI: %s." % uri_r)
372
373 # Check if this is supported
374 noWriting = [URI_HTTP, URI_FTP]
375 if is_write_request and self._uri_type in noWriting:
376 raise IOError("imageio does not support writing to http/ftp.")
377
378 # Deprecated way to load standard images, give a sensible error message
379 if is_read_request and self._uri_type in [URI_FILENAME, URI_ZIPPED]:
380 fn = self._filename
381 if self._filename_zip:
382 fn = self._filename_zip[0]
383 if (not os.path.exists(fn)) and (fn in EXAMPLE_IMAGES):
384 raise IOError(
385 "No such file: %r. This file looks like one of "
386 "the standard images, but from imageio 2.1, "
387 "standard images have to be specified using "
388 '"imageio:%s".' % (fn, fn)
389 )
390
391 # Make filename absolute
392 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
393 if self._filename_zip:
394 self._filename_zip = (
395 os.path.abspath(self._filename_zip[0]),
396 self._filename_zip[1],
397 )
398 else:
399 self._filename = os.path.abspath(self._filename)
400
401 # Check whether file name is valid
402 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
403 fn = self._filename
404 if self._filename_zip:
405 fn = self._filename_zip[0]
406 if is_read_request:
407 # Reading: check that the file exists (but is allowed a dir)
408 if not os.path.exists(fn):
409 raise FileNotFoundError("No such file: '%s'" % fn)
410 else:
411 # Writing: check that the directory to write to does exist
412 dn = os.path.dirname(fn)
413 if not os.path.exists(dn):
414 raise FileNotFoundError("The directory %r does not exist" % dn)
415
416 @property
417 def filename(self):
418 """Name of the ImageResource.
419
420
421 The uri for which reading/saving was requested. This
422 can be a filename, an http address, or other resource
423 identifier. Do not rely on the filename to obtain the data,
424 but use ``get_file()`` or ``get_local_filename()`` instead.
425 """
426 return self._filename
427
428 @property
429 def extension(self) -> str:
430 """The (lowercase) extension of the requested filename.
431 Suffixes in url's are stripped. Can be None if the request is
432 not based on a filename.
433 """
434 return self._extension
435
436 @property
437 def format_hint(self) -> Optional[str]:
438 return self._format_hint
439
440 @format_hint.setter
441 def format_hint(self, format: str) -> None:
442 self._format_hint = format
443 if self._extension is None:
444 self._extension = format
445
446 @property
447 def mode(self):
448 """The mode of the request. The first character is "r" or "w",
449 indicating a read or write request. The second character is
450 used to indicate the kind of data:
451 "i" for an image, "I" for multiple images, "v" for a volume,
452 "V" for multiple volumes, "?" for don't care.
453 """
454 return self._mode
455
456 @property
457 def kwargs(self):
458 """The dict of keyword arguments supplied by the user."""
459 return self._kwargs
460
461 # For obtaining data
462
463 def get_file(self):
464 """get_file()
465 Get a file object for the resource associated with this request.
466 If this is a reading request, the file is in read mode,
467 otherwise in write mode. This method is not thread safe. Plugins
468 should not close the file when done.
469
470 This is the preferred way to read/write the data. But if a
471 format cannot handle file-like objects, they should use
472 ``get_local_filename()``.
473 """
474 want_to_write = self.mode.io_mode is IOMode.write
475
476 # Is there already a file?
477 # Either _uri_type == URI_FILE, or we already opened the file,
478 # e.g. by using firstbytes
479 if self._file is not None:
480 return self._file
481
482 if self._uri_type == URI_BYTES:
483 if want_to_write:
484 # Create new file object, we catch the bytes in finish()
485 self._file = BytesIO()
486 self._file_is_local = True
487 else:
488 self._file = BytesIO(self._bytes)
489
490 elif self._uri_type == URI_FILENAME:
491 if want_to_write:
492 self._file = open(self.filename, "wb")
493 else:
494 self._file = open(self.filename, "rb")
495
496 elif self._uri_type == URI_ZIPPED:
497 # Get the correct filename
498 filename, name = self._filename_zip
499 if want_to_write:
500 # Create new file object, we catch the bytes in finish()
501 self._file = BytesIO()
502 self._file_is_local = True
503 else:
504 # Open zipfile and open new file object for specific file
505 self._zipfile = zipfile.ZipFile(filename, "r")
506 self._file = self._zipfile.open(name, "r")
507 self._file = SeekableFileObject(self._file)
508
509 elif self._uri_type in [URI_HTTP or URI_FTP]:
510 assert not want_to_write # This should have been tested in init
511 timeout = os.getenv("IMAGEIO_REQUEST_TIMEOUT")
512 if timeout is None or not timeout.isdigit():
513 timeout = 5
514 self._file = urlopen(self.filename, timeout=float(timeout))
515 self._file = SeekableFileObject(self._file)
516
517 return self._file
518
519 def get_local_filename(self):
520 """get_local_filename()
521 If the filename is an existing file on this filesystem, return
522 that. Otherwise a temporary file is created on the local file
523 system which can be used by the format to read from or write to.
524 """
525
526 if self._uri_type == URI_FILENAME:
527 return self._filename
528 else:
529 # Get filename
530 if self.extension is not None:
531 ext = self.extension
532 else:
533 ext = os.path.splitext(self._filename)[1]
534 fd, self._filename_local = tempfile.mkstemp(ext, "imageio_")
535 os.close(fd)
536 # Write stuff to it?
537 if self.mode.io_mode == IOMode.read:
538 with open(self._filename_local, "wb") as file:
539 shutil.copyfileobj(self.get_file(), file)
540 return self._filename_local
541
542 def finish(self) -> None:
543 """Wrap up this request.
544
545 Finishes any pending reads or writes, closes any open files and frees
546 any resources allocated by this request.
547 """
548
549 if self.mode.io_mode == IOMode.write:
550 # See if we "own" the data and must put it somewhere
551 bytes = None
552 if self._filename_local:
553 bytes = Path(self._filename_local).read_bytes()
554 elif self._file_is_local:
555 self._file_is_local = False
556 bytes = self._file.getvalue()
557
558 # Put the data in the right place
559 if bytes is not None:
560 if self._uri_type == URI_BYTES:
561 self._result = bytes # Picked up by imread function
562 elif self._uri_type == URI_FILE:
563 self._file.write(bytes)
564 elif self._uri_type == URI_ZIPPED:
565 zf = zipfile.ZipFile(self._filename_zip[0], "a")
566 zf.writestr(self._filename_zip[1], bytes)
567 zf.close()
568 # elif self._uri_type == URI_FILENAME: -> is always direct
569 # elif self._uri_type == URI_FTP/HTTP: -> write not supported
570
571 # Close open files that we know of (and are responsible for)
572 if self._file and self._uri_type != URI_FILE:
573 self._file.close()
574 self._file = None
575 if self._zipfile:
576 self._zipfile.close()
577 self._zipfile = None
578
579 # Remove temp file
580 if self._filename_local:
581 try:
582 os.remove(self._filename_local)
583 except Exception: # pragma: no cover
584 warnings.warn(
585 "Failed to delete the temporary file at "
586 f"`{self._filename_local}`. Please report this issue."
587 )
588 self._filename_local = None
589
590 # Detach so gc can clean even if a reference of self lingers
591 self._bytes = None
592
593 def get_result(self):
594 """For internal use. In some situations a write action can have
595 a result (bytes data). That is obtained with this function.
596 """
597 # Is there a reason to disallow reading multiple times?
598 self._result, res = None, self._result
599 return res
600
601 @property
602 def firstbytes(self):
603 """The first 256 bytes of the file. These can be used to
604 parse the header to determine the file-format.
605 """
606 if self._firstbytes is None:
607 self._read_first_bytes()
608 return self._firstbytes
609
610 def _read_first_bytes(self, N=256):
611 if self._bytes is not None:
612 self._firstbytes = self._bytes[:N]
613 else:
614 # Prepare
615 try:
616 f = self.get_file()
617 except IOError:
618 if os.path.isdir(self.filename): # A directory, e.g. for DICOM
619 self._firstbytes = bytes()
620 return
621 raise
622 try:
623 i = f.tell()
624 except Exception:
625 i = None
626 # Read
627 self._firstbytes = read_n_bytes(f, N)
628 # Set back
629 try:
630 if i is None:
631 raise Exception("cannot seek with None")
632 f.seek(i)
633 except Exception:
634 # Prevent get_file() from reusing the file
635 self._file = None
636 # If the given URI was a file object, we have a problem,
637 if self._uri_type == URI_FILE:
638 raise IOError("Cannot seek back after getting firstbytes!")
639
640
641def read_n_bytes(f, N):
642 """read_n_bytes(file, n)
643
644 Read n bytes from the given file, or less if the file has less
645 bytes. Returns zero bytes if the file is closed.
646 """
647 bb = bytes()
648 while len(bb) < N:
649 extra_bytes = f.read(N - len(bb))
650 if not extra_bytes:
651 break
652 bb += extra_bytes
653 return bb
654
655
656class SeekableFileObject:
657 """A readonly wrapper file object that add support for seeking, even if
658 the wrapped file object does not. The allows us to stream from http and
659 still use Pillow.
660 """
661
662 def __init__(self, f):
663 self.f = f
664 self._i = 0 # >=0 but can exceed buffer
665 self._buffer = b""
666 self._have_all = False
667 self.closed = False
668
669 def read(self, n=None):
670 # Fix up n
671 if n is None:
672 pass
673 else:
674 n = int(n)
675 if n < 0:
676 n = None
677
678 # Can and must we read more?
679 if not self._have_all:
680 more = b""
681 if n is None:
682 more = self.f.read()
683 self._have_all = True
684 else:
685 want_i = self._i + n
686 want_more = want_i - len(self._buffer)
687 if want_more > 0:
688 more = self.f.read(want_more)
689 if len(more) < want_more:
690 self._have_all = True
691 self._buffer += more
692
693 # Read data from buffer and update pointer
694 if n is None:
695 res = self._buffer[self._i :]
696 else:
697 res = self._buffer[self._i : self._i + n]
698 self._i += len(res)
699
700 return res
701
702 def readline(self):
703 yield from self._file.readline()
704
705 def tell(self):
706 return self._i
707
708 def seek(self, i, mode=0):
709 # Mimic BytesIO behavior
710
711 # Get the absolute new position
712 i = int(i)
713 if mode == 0:
714 if i < 0:
715 raise ValueError("negative seek value " + str(i))
716 real_i = i
717 elif mode == 1:
718 real_i = max(0, self._i + i) # negative ok here
719 elif mode == 2:
720 if not self._have_all:
721 self.read()
722 real_i = max(0, len(self._buffer) + i)
723 else:
724 raise ValueError("invalid whence (%s, should be 0, 1 or 2)" % i)
725
726 # Read some?
727 if real_i <= len(self._buffer):
728 pass # no need to read
729 elif not self._have_all:
730 assert real_i > self._i # if we don't have all, _i cannot be > _buffer
731 self.read(real_i - self._i) # sets self._i
732
733 self._i = real_i
734 return self._i
735
736 def close(self):
737 self.closed = True
738 self.f.close()
739
740 def isatty(self):
741 return False
742
743 def seekable(self):
744 return True
745
746
747class InitializationError(Exception):
748 """The plugin could not initialize from the given request.
749
750 This is a _internal_ error that is raised by plugins that fail to handle
751 a given request. We use this to differentiate incompatibility between
752 a plugin and a request from an actual error/bug inside a plugin.
753
754 """
755
756 pass