1# -*- coding: utf-8 -*-
2# imageio is distributed under the terms of the (new) BSD License.
3
4"""
5Definition of the Request object, which acts as a kind of bridge between
6what the user wants and what the plugins can.
7"""
8
9import os
10from io import BytesIO
11import zipfile
12import tempfile
13import shutil
14import enum
15import warnings
16
17from ..core import urlopen, get_remote_file
18
19from pathlib import Path
20from urllib.parse import urlparse
21from typing import Optional
22
23# URI types
24URI_BYTES = 1
25URI_FILE = 2
26URI_FILENAME = 3
27URI_ZIPPED = 4
28URI_HTTP = 5
29URI_FTP = 6
30
31
32class IOMode(str, enum.Enum):
33 """Available Image modes
34
35 This is a helper enum for ``Request.Mode`` which is a composite of a
36 ``Request.ImageMode`` and ``Request.IOMode``. The IOMode that tells the
37 plugin if the resource should be read from or written to. Available values are
38
39 - read ("r"): Read from the specified resource
40 - write ("w"): Write to the specified resource
41
42 """
43
44 read = "r"
45 write = "w"
46
47
48class ImageMode(str, enum.Enum):
49 """Available Image modes
50
51 This is a helper enum for ``Request.Mode`` which is a composite of a
52 ``Request.ImageMode`` and ``Request.IOMode``. The image mode that tells the
53 plugin the desired (and expected) image shape. Available values are
54
55 - single_image ("i"): Return a single image extending in two spacial
56 dimensions
57 - multi_image ("I"): Return a list of images extending in two spacial
58 dimensions
59 - single_volume ("v"): Return an image extending into multiple dimensions.
60 E.g. three spacial dimensions for image stacks, or two spatial and one
61 time dimension for videos
62 - multi_volume ("V"): Return a list of images extending into multiple
63 dimensions.
64 - any_mode ("?"): Return an image in any format (the plugin decides the
65 appropriate action).
66
67 """
68
69 single_image = "i"
70 multi_image = "I"
71 single_volume = "v"
72 multi_volume = "V"
73 any_mode = "?"
74
75
76@enum.unique
77class Mode(str, enum.Enum):
78 """The mode to use when interacting with the resource
79
80 ``Request.Mode`` is a composite of ``Request.ImageMode`` and
81 ``Request.IOMode``. The image mode that tells the plugin the desired (and
82 expected) image shape and the ``Request.IOMode`` tells the plugin the way
83 the resource should be interacted with. For a detailed description of the
84 available modes, see the documentation for ``Request.ImageMode`` and
85 ``Request.IOMode`` respectively.
86
87 Available modes are all combinations of ``Request.IOMode`` and ``Request.ImageMode``:
88
89 - read_single_image ("ri")
90 - read_multi_image ("rI")
91 - read_single_volume ("rv")
92 - read_multi_volume ("rV")
93 - read_any ("r?")
94 - write_single_image ("wi")
95 - write_multi_image ("wI")
96 - write_single_volume ("wv")
97 - write_multi_volume ("wV")
98 - write_any ("w?")
99
100 Examples
101 --------
102 >>> Request.Mode("rI") # a list of simple images should be read from the resource
103 >>> Request.Mode("wv") # a single volume should be written to the resource
104
105 """
106
107 read_single_image = "ri"
108 read_multi_image = "rI"
109 read_single_volume = "rv"
110 read_multi_volume = "rV"
111 read_any = "r?"
112 write_single_image = "wi"
113 write_multi_image = "wI"
114 write_single_volume = "wv"
115 write_multi_volume = "wV"
116 write_any = "w?"
117
118 @classmethod
119 def _missing_(cls, value):
120 """Enable Mode("r") and Mode("w")
121
122 The sunder method ``_missing_`` is called whenever the constructor fails
123 to directly look up the corresponding enum value from the given input.
124 In our case, we use it to convert the modes "r" and "w" (from the v3
125 API) into their legacy versions "r?" and "w?".
126
127 More info on _missing_:
128 https://docs.python.org/3/library/enum.html#supported-sunder-names
129 """
130
131 if value == "r":
132 return cls("r?")
133 elif value == "w":
134 return cls("w?")
135 else:
136 raise ValueError(f"{value} is no valid Mode.")
137
138 @property
139 def io_mode(self) -> IOMode:
140 return IOMode(self.value[0])
141
142 @property
143 def image_mode(self) -> ImageMode:
144 return ImageMode(self.value[1])
145
146 def __getitem__(self, key):
147 """For backwards compatibility with the old non-enum modes"""
148 if key == 0:
149 return self.io_mode
150 elif key == 1:
151 return self.image_mode
152 else:
153 raise IndexError(f"Mode has no item {key}")
154
155
156SPECIAL_READ_URIS = "<video", "<screen>", "<clipboard>"
157
158# The user can use this string in a write call to get the data back as bytes.
159RETURN_BYTES = "<bytes>"
160
161# Example images that will be auto-downloaded
162EXAMPLE_IMAGES = {
163 "astronaut.png": "Image of the astronaut Eileen Collins",
164 "camera.png": "A grayscale image of a photographer",
165 "checkerboard.png": "Black and white image of a chekerboard",
166 "wood.jpg": "A (repeatable) texture of wooden planks",
167 "bricks.jpg": "A (repeatable) texture of stone bricks",
168 "clock.png": "Photo of a clock with motion blur (Stefan van der Walt)",
169 "coffee.png": "Image of a cup of coffee (Rachel Michetti)",
170 "chelsea.png": "Image of Stefan's cat",
171 "wikkie.png": "Image of Almar's cat",
172 "coins.png": "Image showing greek coins from Pompeii",
173 "horse.png": "Image showing the silhouette of a horse (Andreas Preuss)",
174 "hubble_deep_field.png": "Photograph taken by Hubble telescope (NASA)",
175 "immunohistochemistry.png": "Immunohistochemical (IHC) staining",
176 "moon.png": "Image showing a portion of the surface of the moon",
177 "page.png": "A scanned page of text",
178 "text.png": "A photograph of handdrawn text",
179 "chelsea.zip": "The chelsea.png in a zipfile (for testing)",
180 "chelsea.bsdf": "The chelsea.png in a BSDF file(for testing)",
181 "newtonscradle.gif": "Animated GIF of a newton's cradle",
182 "cockatoo.mp4": "Video file of a cockatoo",
183 "stent.npz": "Volumetric image showing a stented abdominal aorta",
184 "meadow_cube.jpg": "A cubemap image of a meadow, e.g. to render a skybox.",
185}
186
187
188class Request(object):
189 """ImageResource handling utility.
190
191 Represents a request for reading or saving an image resource. This
192 object wraps information to that request and acts as an interface
193 for the plugins to several resources; it allows the user to read
194 from filenames, files, http, zipfiles, raw bytes, etc., but offer
195 a simple interface to the plugins via ``get_file()`` and
196 ``get_local_filename()``.
197
198 For each read/write operation a single Request instance is used and passed
199 to the can_read/can_write method of a format, and subsequently to
200 the Reader/Writer class. This allows rudimentary passing of
201 information between different formats and between a format and
202 associated reader/writer.
203
204 Parameters
205 ----------
206 uri : {str, bytes, file}
207 The resource to load the image from.
208 mode : str
209 The first character is "r" or "w", indicating a read or write
210 request. The second character is used to indicate the kind of data:
211 "i" for an image, "I" for multiple images, "v" for a volume,
212 "V" for multiple volumes, "?" for don't care.
213
214 """
215
216 def __init__(self, uri, mode, *, extension=None, format_hint: str = None, **kwargs):
217 # General
218 self.raw_uri = uri
219 self._uri_type = None
220 self._filename = None
221 self._extension = None
222 self._format_hint = None
223 self._kwargs = kwargs
224 self._result = None # Some write actions may have a result
225
226 # To handle the user-side
227 self._filename_zip = None # not None if a zipfile is used
228 self._bytes = None # Incoming bytes
229 self._zipfile = None # To store a zipfile instance (if used)
230
231 # To handle the plugin side
232 self._file = None # To store the file instance
233 self._file_is_local = False # whether the data needs to be copied at end
234 self._filename_local = None # not None if using tempfile on this FS
235 self._firstbytes = None # For easy header parsing
236
237 # To store formats that may be able to fulfil this request
238 # self._potential_formats = []
239
240 # Check mode
241 try:
242 self._mode = Mode(mode)
243 except ValueError:
244 raise ValueError(f"Invalid Request.Mode: {mode}")
245
246 # Parse what was given
247 self._parse_uri(uri)
248
249 # Set extension
250 if extension is not None:
251 if extension[0] != ".":
252 raise ValueError(
253 "`extension` should be a file extension starting with a `.`,"
254 f" but is `{extension}`."
255 )
256 self._extension = extension
257 elif self._filename is not None:
258 if self._uri_type in (URI_FILENAME, URI_ZIPPED):
259 path = self._filename
260 else:
261 path = urlparse(self._filename).path
262 ext = Path(path).suffix.lower()
263 self._extension = ext if ext != "" else None
264
265 if format_hint is not None:
266 warnings.warn(
267 "The usage of `format_hint` is deprecated and will be removed "
268 "in ImageIO v3. Use `extension` instead.",
269 DeprecationWarning,
270 )
271
272 if format_hint is not None and format_hint[0] != ".":
273 raise ValueError(
274 "`format_hint` should be a file extension starting with a `.`,"
275 f" but is `{format_hint}`."
276 )
277
278 self.format_hint = format_hint
279
280 def _parse_uri(self, uri):
281 """Try to figure our what we were given"""
282 is_read_request = self.mode.io_mode is IOMode.read
283 is_write_request = self.mode.io_mode is IOMode.write
284
285 if isinstance(uri, str):
286 # Explicit
287 if uri.startswith("imageio:"):
288 if is_write_request:
289 raise RuntimeError("Cannot write to the standard images.")
290 fn = uri.split(":", 1)[-1].lower()
291 fn, _, zip_part = fn.partition(".zip/")
292 if zip_part:
293 fn += ".zip"
294 if fn not in EXAMPLE_IMAGES:
295 raise ValueError("Unknown standard image %r." % fn)
296 self._uri_type = URI_FILENAME
297 self._filename = get_remote_file("images/" + fn, auto=True)
298 if zip_part:
299 self._filename += "/" + zip_part
300 elif uri.startswith("http://") or uri.startswith("https://"):
301 self._uri_type = URI_HTTP
302 self._filename = uri
303 elif uri.startswith("ftp://") or uri.startswith("ftps://"):
304 self._uri_type = URI_FTP
305 self._filename = uri
306 elif uri.startswith("file://"):
307 self._uri_type = URI_FILENAME
308 self._filename = uri[7:]
309 elif uri.startswith(SPECIAL_READ_URIS) and is_read_request:
310 self._uri_type = URI_BYTES
311 self._filename = uri
312 elif uri.startswith(RETURN_BYTES) and is_write_request:
313 self._uri_type = URI_BYTES
314 self._filename = uri
315 else:
316 self._uri_type = URI_FILENAME
317 self._filename = uri
318
319 elif isinstance(uri, memoryview) and is_read_request:
320 self._uri_type = URI_BYTES
321 self._filename = "<bytes>"
322 self._bytes = uri.tobytes()
323 elif isinstance(uri, bytes) and is_read_request:
324 self._uri_type = URI_BYTES
325 self._filename = "<bytes>"
326 self._bytes = uri
327 elif isinstance(uri, Path):
328 self._uri_type = URI_FILENAME
329 self._filename = str(uri)
330 # Files
331 elif is_read_request:
332 if hasattr(uri, "read") and hasattr(uri, "close"):
333 self._uri_type = URI_FILE
334 self._filename = "<file>"
335 self._file = uri # Data must be read from here
336 elif is_write_request:
337 if hasattr(uri, "write") and hasattr(uri, "close"):
338 self._uri_type = URI_FILE
339 self._filename = "<file>"
340 self._file = uri # Data must be written here
341
342 # Expand user dir
343 if self._uri_type == URI_FILENAME and self._filename.startswith("~"):
344 self._filename = os.path.expanduser(self._filename)
345
346 # Check if a zipfile
347 if self._uri_type == URI_FILENAME:
348 # Search for zip extension followed by a path separator
349 for needle in [".zip/", ".zip\\"]:
350 zip_i = self._filename.lower().find(needle)
351 if zip_i > 0:
352 zip_i += 4
353 zip_path = self._filename[:zip_i]
354 if os.path.isdir(zip_path):
355 pass # is an existing dir (see #548)
356 elif is_write_request or os.path.isfile(zip_path):
357 self._uri_type = URI_ZIPPED
358 self._filename_zip = (
359 zip_path,
360 self._filename[zip_i:].lstrip("/\\"),
361 )
362 break
363
364 # Check if we could read it
365 if self._uri_type is None:
366 uri_r = repr(uri)
367 if len(uri_r) > 60:
368 uri_r = uri_r[:57] + "..."
369 raise IOError("Cannot understand given URI: %s." % uri_r)
370
371 # Check if this is supported
372 noWriting = [URI_HTTP, URI_FTP]
373 if is_write_request and self._uri_type in noWriting:
374 raise IOError("imageio does not support writing to http/ftp.")
375
376 # Deprecated way to load standard images, give a sensible error message
377 if is_read_request and self._uri_type in [URI_FILENAME, URI_ZIPPED]:
378 fn = self._filename
379 if self._filename_zip:
380 fn = self._filename_zip[0]
381 if (not os.path.exists(fn)) and (fn in EXAMPLE_IMAGES):
382 raise IOError(
383 "No such file: %r. This file looks like one of "
384 "the standard images, but from imageio 2.1, "
385 "standard images have to be specified using "
386 '"imageio:%s".' % (fn, fn)
387 )
388
389 # Make filename absolute
390 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
391 if self._filename_zip:
392 self._filename_zip = (
393 os.path.abspath(self._filename_zip[0]),
394 self._filename_zip[1],
395 )
396 else:
397 self._filename = os.path.abspath(self._filename)
398
399 # Check whether file name is valid
400 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
401 fn = self._filename
402 if self._filename_zip:
403 fn = self._filename_zip[0]
404 if is_read_request:
405 # Reading: check that the file exists (but is allowed a dir)
406 if not os.path.exists(fn):
407 raise FileNotFoundError("No such file: '%s'" % fn)
408 else:
409 # Writing: check that the directory to write to does exist
410 dn = os.path.dirname(fn)
411 if not os.path.exists(dn):
412 raise FileNotFoundError("The directory %r does not exist" % dn)
413
414 @property
415 def filename(self):
416 """Name of the ImageResource.
417
418
419 The uri for which reading/saving was requested. This
420 can be a filename, an http address, or other resource
421 identifier. Do not rely on the filename to obtain the data,
422 but use ``get_file()`` or ``get_local_filename()`` instead.
423 """
424 return self._filename
425
426 @property
427 def extension(self) -> str:
428 """The (lowercase) extension of the requested filename.
429 Suffixes in url's are stripped. Can be None if the request is
430 not based on a filename.
431 """
432 return self._extension
433
434 @property
435 def format_hint(self) -> Optional[str]:
436 return self._format_hint
437
438 @format_hint.setter
439 def format_hint(self, format: str) -> None:
440 self._format_hint = format
441 if self._extension is None:
442 self._extension = format
443
444 @property
445 def mode(self):
446 """The mode of the request. The first character is "r" or "w",
447 indicating a read or write request. The second character is
448 used to indicate the kind of data:
449 "i" for an image, "I" for multiple images, "v" for a volume,
450 "V" for multiple volumes, "?" for don't care.
451 """
452 return self._mode
453
454 @property
455 def kwargs(self):
456 """The dict of keyword arguments supplied by the user."""
457 return self._kwargs
458
459 # For obtaining data
460
461 def get_file(self):
462 """get_file()
463 Get a file object for the resource associated with this request.
464 If this is a reading request, the file is in read mode,
465 otherwise in write mode. This method is not thread safe. Plugins
466 should not close the file when done.
467
468 This is the preferred way to read/write the data. But if a
469 format cannot handle file-like objects, they should use
470 ``get_local_filename()``.
471 """
472 want_to_write = self.mode.io_mode is IOMode.write
473
474 # Is there already a file?
475 # Either _uri_type == URI_FILE, or we already opened the file,
476 # e.g. by using firstbytes
477 if self._file is not None:
478 return self._file
479
480 if self._uri_type == URI_BYTES:
481 if want_to_write:
482 # Create new file object, we catch the bytes in finish()
483 self._file = BytesIO()
484 self._file_is_local = True
485 else:
486 self._file = BytesIO(self._bytes)
487
488 elif self._uri_type == URI_FILENAME:
489 if want_to_write:
490 self._file = open(self.filename, "wb")
491 else:
492 self._file = open(self.filename, "rb")
493
494 elif self._uri_type == URI_ZIPPED:
495 # Get the correct filename
496 filename, name = self._filename_zip
497 if want_to_write:
498 # Create new file object, we catch the bytes in finish()
499 self._file = BytesIO()
500 self._file_is_local = True
501 else:
502 # Open zipfile and open new file object for specific file
503 self._zipfile = zipfile.ZipFile(filename, "r")
504 self._file = self._zipfile.open(name, "r")
505 self._file = SeekableFileObject(self._file)
506
507 elif self._uri_type in [URI_HTTP or URI_FTP]:
508 assert not want_to_write # This should have been tested in init
509 timeout = os.getenv("IMAGEIO_REQUEST_TIMEOUT")
510 if timeout is None or not timeout.isdigit():
511 timeout = 5
512 self._file = urlopen(self.filename, timeout=float(timeout))
513 self._file = SeekableFileObject(self._file)
514
515 return self._file
516
517 def get_local_filename(self):
518 """get_local_filename()
519 If the filename is an existing file on this filesystem, return
520 that. Otherwise a temporary file is created on the local file
521 system which can be used by the format to read from or write to.
522 """
523
524 if self._uri_type == URI_FILENAME:
525 return self._filename
526 else:
527 # Get filename
528 if self.extension is not None:
529 ext = self.extension
530 else:
531 ext = os.path.splitext(self._filename)[1]
532 fd, self._filename_local = tempfile.mkstemp(ext, "imageio_")
533 os.close(fd)
534 # Write stuff to it?
535 if self.mode.io_mode == IOMode.read:
536 with open(self._filename_local, "wb") as file:
537 shutil.copyfileobj(self.get_file(), file)
538 return self._filename_local
539
540 def finish(self) -> None:
541 """Wrap up this request.
542
543 Finishes any pending reads or writes, closes any open files and frees
544 any resources allocated by this request.
545 """
546
547 if self.mode.io_mode == IOMode.write:
548 # See if we "own" the data and must put it somewhere
549 bytes = None
550 if self._filename_local:
551 bytes = Path(self._filename_local).read_bytes()
552 elif self._file_is_local:
553 self._file_is_local = False
554 bytes = self._file.getvalue()
555
556 # Put the data in the right place
557 if bytes is not None:
558 if self._uri_type == URI_BYTES:
559 self._result = bytes # Picked up by imread function
560 elif self._uri_type == URI_FILE:
561 self._file.write(bytes)
562 elif self._uri_type == URI_ZIPPED:
563 zf = zipfile.ZipFile(self._filename_zip[0], "a")
564 zf.writestr(self._filename_zip[1], bytes)
565 zf.close()
566 # elif self._uri_type == URI_FILENAME: -> is always direct
567 # elif self._uri_type == URI_FTP/HTTP: -> write not supported
568
569 # Close open files that we know of (and are responsible for)
570 if self._file and self._uri_type != URI_FILE:
571 self._file.close()
572 self._file = None
573 if self._zipfile:
574 self._zipfile.close()
575 self._zipfile = None
576
577 # Remove temp file
578 if self._filename_local:
579 try:
580 os.remove(self._filename_local)
581 except Exception: # pragma: no cover
582 warnings.warn(
583 "Failed to delete the temporary file at "
584 f"`{self._filename_local}`. Please report this issue."
585 )
586 self._filename_local = None
587
588 # Detach so gc can clean even if a reference of self lingers
589 self._bytes = None
590
591 def get_result(self):
592 """For internal use. In some situations a write action can have
593 a result (bytes data). That is obtained with this function.
594 """
595 # Is there a reason to disallow reading multiple times?
596 self._result, res = None, self._result
597 return res
598
599 @property
600 def firstbytes(self):
601 """The first 256 bytes of the file. These can be used to
602 parse the header to determine the file-format.
603 """
604 if self._firstbytes is None:
605 self._read_first_bytes()
606 return self._firstbytes
607
608 def _read_first_bytes(self, N=256):
609 if self._bytes is not None:
610 self._firstbytes = self._bytes[:N]
611 else:
612 # Prepare
613 try:
614 f = self.get_file()
615 except IOError:
616 if os.path.isdir(self.filename): # A directory, e.g. for DICOM
617 self._firstbytes = bytes()
618 return
619 raise
620 try:
621 i = f.tell()
622 except Exception:
623 i = None
624 # Read
625 self._firstbytes = read_n_bytes(f, N)
626 # Set back
627 try:
628 if i is None:
629 raise Exception("cannot seek with None")
630 f.seek(i)
631 except Exception:
632 # Prevent get_file() from reusing the file
633 self._file = None
634 # If the given URI was a file object, we have a problem,
635 if self._uri_type == URI_FILE:
636 raise IOError("Cannot seek back after getting firstbytes!")
637
638
639def read_n_bytes(f, N):
640 """read_n_bytes(file, n)
641
642 Read n bytes from the given file, or less if the file has less
643 bytes. Returns zero bytes if the file is closed.
644 """
645 bb = bytes()
646 while len(bb) < N:
647 extra_bytes = f.read(N - len(bb))
648 if not extra_bytes:
649 break
650 bb += extra_bytes
651 return bb
652
653
654class SeekableFileObject:
655 """A readonly wrapper file object that add support for seeking, even if
656 the wrapped file object does not. The allows us to stream from http and
657 still use Pillow.
658 """
659
660 def __init__(self, f):
661 self.f = f
662 self._i = 0 # >=0 but can exceed buffer
663 self._buffer = b""
664 self._have_all = False
665 self.closed = False
666
667 def read(self, n=None):
668 # Fix up n
669 if n is None:
670 pass
671 else:
672 n = int(n)
673 if n < 0:
674 n = None
675
676 # Can and must we read more?
677 if not self._have_all:
678 more = b""
679 if n is None:
680 more = self.f.read()
681 self._have_all = True
682 else:
683 want_i = self._i + n
684 want_more = want_i - len(self._buffer)
685 if want_more > 0:
686 more = self.f.read(want_more)
687 if len(more) < want_more:
688 self._have_all = True
689 self._buffer += more
690
691 # Read data from buffer and update pointer
692 if n is None:
693 res = self._buffer[self._i :]
694 else:
695 res = self._buffer[self._i : self._i + n]
696 self._i += len(res)
697
698 return res
699
700 def readline(self):
701 yield from self._file.readline()
702
703 def tell(self):
704 return self._i
705
706 def seek(self, i, mode=0):
707 # Mimic BytesIO behavior
708
709 # Get the absolute new position
710 i = int(i)
711 if mode == 0:
712 if i < 0:
713 raise ValueError("negative seek value " + str(i))
714 real_i = i
715 elif mode == 1:
716 real_i = max(0, self._i + i) # negative ok here
717 elif mode == 2:
718 if not self._have_all:
719 self.read()
720 real_i = max(0, len(self._buffer) + i)
721 else:
722 raise ValueError("invalid whence (%s, should be 0, 1 or 2)" % i)
723
724 # Read some?
725 if real_i <= len(self._buffer):
726 pass # no need to read
727 elif not self._have_all:
728 assert real_i > self._i # if we don't have all, _i cannot be > _buffer
729 self.read(real_i - self._i) # sets self._i
730
731 self._i = real_i
732 return self._i
733
734 def close(self):
735 self.closed = True
736 self.f.close()
737
738 def isatty(self):
739 return False
740
741 def seekable(self):
742 return True
743
744
745class InitializationError(Exception):
746 """The plugin could not initialize from the given request.
747
748 This is a _internal_ error that is raised by plugins that fail to handle
749 a given request. We use this to differentiate incompatibility between
750 a plugin and a request from an actual error/bug inside a plugin.
751
752 """
753
754 pass