1# -*- coding: utf-8 -*-
2# imageio is distributed under the terms of the (new) BSD License.
3
4"""
5Definition of the Request object, which acts as a kind of bridge between
6what the user wants and what the plugins can.
7"""
8
9import os
10from io import BytesIO
11import zipfile
12import tempfile
13import shutil
14import enum
15import warnings
16
17from ..core import urlopen, get_remote_file
18
19from pathlib import Path
20from urllib.parse import urlparse
21from typing import Optional
22
23# URI types
24URI_BYTES = 1
25URI_FILE = 2
26URI_FILENAME = 3
27URI_ZIPPED = 4
28URI_HTTP = 5
29URI_FTP = 6
30
31
32class IOMode(str, enum.Enum):
33 """Available Image modes
34
35 This is a helper enum for ``Request.Mode`` which is a composite of a
36 ``Request.ImageMode`` and ``Request.IOMode``. The IOMode that tells the
37 plugin if the resource should be read from or written to. Available values are
38
39 - read ("r"): Read from the specified resource
40 - write ("w"): Write to the specified resource
41
42 """
43
44 read = "r"
45 write = "w"
46
47
48class ImageMode(str, enum.Enum):
49 """Available Image modes
50
51 This is a helper enum for ``Request.Mode`` which is a composite of a
52 ``Request.ImageMode`` and ``Request.IOMode``. The image mode that tells the
53 plugin the desired (and expected) image shape. Available values are
54
55 - single_image ("i"): Return a single image extending in two spacial
56 dimensions
57 - multi_image ("I"): Return a list of images extending in two spacial
58 dimensions
59 - single_volume ("v"): Return an image extending into multiple dimensions.
60 E.g. three spacial dimensions for image stacks, or two spatial and one
61 time dimension for videos
62 - multi_volume ("V"): Return a list of images extending into multiple
63 dimensions.
64 - any_mode ("?"): Return an image in any format (the plugin decides the
65 appropriate action).
66
67 """
68
69 single_image = "i"
70 multi_image = "I"
71 single_volume = "v"
72 multi_volume = "V"
73 any_mode = "?"
74
75
76@enum.unique
77class Mode(str, enum.Enum):
78 """The mode to use when interacting with the resource
79
80 ``Request.Mode`` is a composite of ``Request.ImageMode`` and
81 ``Request.IOMode``. The image mode that tells the plugin the desired (and
82 expected) image shape and the ``Request.IOMode`` tells the plugin the way
83 the resource should be interacted with. For a detailed description of the
84 available modes, see the documentation for ``Request.ImageMode`` and
85 ``Request.IOMode`` respectively.
86
87 Available modes are all combinations of ``Request.IOMode`` and ``Request.ImageMode``:
88
89 - read_single_image ("ri")
90 - read_multi_image ("rI")
91 - read_single_volume ("rv")
92 - read_multi_volume ("rV")
93 - read_any ("r?")
94 - write_single_image ("wi")
95 - write_multi_image ("wI")
96 - write_single_volume ("wv")
97 - write_multi_volume ("wV")
98 - write_any ("w?")
99
100 Examples
101 --------
102 >>> Request.Mode("rI") # a list of simple images should be read from the resource
103 >>> Request.Mode("wv") # a single volume should be written to the resource
104
105 """
106
107 read_single_image = "ri"
108 read_multi_image = "rI"
109 read_single_volume = "rv"
110 read_multi_volume = "rV"
111 read_any = "r?"
112 write_single_image = "wi"
113 write_multi_image = "wI"
114 write_single_volume = "wv"
115 write_multi_volume = "wV"
116 write_any = "w?"
117
118 @classmethod
119 def _missing_(cls, value):
120 """Enable Mode("r") and Mode("w")
121
122 The sunder method ``_missing_`` is called whenever the constructor fails
123 to directly look up the corresponding enum value from the given input.
124 In our case, we use it to convert the modes "r" and "w" (from the v3
125 API) into their legacy versions "r?" and "w?".
126
127 More info on _missing_:
128 https://docs.python.org/3/library/enum.html#supported-sunder-names
129 """
130
131 if value == "r":
132 return cls("r?")
133 elif value == "w":
134 return cls("w?")
135 else:
136 raise ValueError(f"{value} is no valid Mode.")
137
138 @property
139 def io_mode(self) -> IOMode:
140 return IOMode(self.value[0])
141
142 @property
143 def image_mode(self) -> ImageMode:
144 return ImageMode(self.value[1])
145
146 def __getitem__(self, key):
147 """For backwards compatibility with the old non-enum modes"""
148 if key == 0:
149 return self.io_mode
150 elif key == 1:
151 return self.image_mode
152 else:
153 raise IndexError(f"Mode has no item {key}")
154
155
156SPECIAL_READ_URIS = "<video", "<screen>", "<clipboard>"
157
158# The user can use this string in a write call to get the data back as bytes.
159RETURN_BYTES = "<bytes>"
160
161# Example images that will be auto-downloaded
162EXAMPLE_IMAGES = {
163 "astronaut.png": "Image of the astronaut Eileen Collins",
164 "camera.png": "A grayscale image of a photographer",
165 "checkerboard.png": "Black and white image of a chekerboard",
166 "wood.jpg": "A (repeatable) texture of wooden planks",
167 "bricks.jpg": "A (repeatable) texture of stone bricks",
168 "clock.png": "Photo of a clock with motion blur (Stefan van der Walt)",
169 "coffee.png": "Image of a cup of coffee (Rachel Michetti)",
170 "chelsea.png": "Image of Stefan's cat",
171 "wikkie.png": "Image of Almar's cat",
172 "coins.png": "Image showing greek coins from Pompeii",
173 "horse.png": "Image showing the silhouette of a horse (Andreas Preuss)",
174 "hubble_deep_field.png": "Photograph taken by Hubble telescope (NASA)",
175 "immunohistochemistry.png": "Immunohistochemical (IHC) staining",
176 "moon.png": "Image showing a portion of the surface of the moon",
177 "page.png": "A scanned page of text",
178 "text.png": "A photograph of handdrawn text",
179 "chelsea.zip": "The chelsea.png in a zipfile (for testing)",
180 "chelsea.bsdf": "The chelsea.png in a BSDF file(for testing)",
181 "newtonscradle.gif": "Animated GIF of a newton's cradle",
182 "cockatoo.mp4": "Video file of a cockatoo",
183 "cockatoo_yuv420.mp4": "Video file of a cockatoo with yuv420 pixel format",
184 "stent.npz": "Volumetric image showing a stented abdominal aorta",
185 "meadow_cube.jpg": "A cubemap image of a meadow, e.g. to render a skybox.",
186}
187
188
189class Request(object):
190 """ImageResource handling utility.
191
192 Represents a request for reading or saving an image resource. This
193 object wraps information to that request and acts as an interface
194 for the plugins to several resources; it allows the user to read
195 from filenames, files, http, zipfiles, raw bytes, etc., but offer
196 a simple interface to the plugins via ``get_file()`` and
197 ``get_local_filename()``.
198
199 For each read/write operation a single Request instance is used and passed
200 to the can_read/can_write method of a format, and subsequently to
201 the Reader/Writer class. This allows rudimentary passing of
202 information between different formats and between a format and
203 associated reader/writer.
204
205 Parameters
206 ----------
207 uri : {str, bytes, file}
208 The resource to load the image from.
209 mode : str
210 The first character is "r" or "w", indicating a read or write
211 request. The second character is used to indicate the kind of data:
212 "i" for an image, "I" for multiple images, "v" for a volume,
213 "V" for multiple volumes, "?" for don't care.
214
215 """
216
217 def __init__(self, uri, mode, *, extension=None, format_hint: str = None, **kwargs):
218 # General
219 self.raw_uri = uri
220 self._uri_type = None
221 self._filename = None
222 self._extension = None
223 self._format_hint = None
224 self._kwargs = kwargs
225 self._result = None # Some write actions may have a result
226
227 # To handle the user-side
228 self._filename_zip = None # not None if a zipfile is used
229 self._bytes = None # Incoming bytes
230 self._zipfile = None # To store a zipfile instance (if used)
231
232 # To handle the plugin side
233 self._file = None # To store the file instance
234 self._file_is_local = False # whether the data needs to be copied at end
235 self._filename_local = None # not None if using tempfile on this FS
236 self._firstbytes = None # For easy header parsing
237
238 # To store formats that may be able to fulfil this request
239 # self._potential_formats = []
240
241 # Check mode
242 try:
243 self._mode = Mode(mode)
244 except ValueError:
245 raise ValueError(f"Invalid Request.Mode: {mode}")
246
247 # Parse what was given
248 self._parse_uri(uri)
249
250 # Set extension
251 if extension is not None:
252 if extension[0] != ".":
253 raise ValueError(
254 "`extension` should be a file extension starting with a `.`,"
255 f" but is `{extension}`."
256 )
257 self._extension = extension
258 elif self._filename is not None:
259 if self._uri_type in (URI_FILENAME, URI_ZIPPED):
260 path = self._filename
261 else:
262 path = urlparse(self._filename).path
263 ext = Path(path).suffix.lower()
264 self._extension = ext if ext != "" else None
265
266 if format_hint is not None:
267 warnings.warn(
268 "The usage of `format_hint` is deprecated and will be removed "
269 "in ImageIO v3. Use `extension` instead.",
270 DeprecationWarning,
271 )
272
273 if format_hint is not None and format_hint[0] != ".":
274 raise ValueError(
275 "`format_hint` should be a file extension starting with a `.`,"
276 f" but is `{format_hint}`."
277 )
278
279 self.format_hint = format_hint
280
281 def _parse_uri(self, uri):
282 """Try to figure our what we were given"""
283 is_read_request = self.mode.io_mode is IOMode.read
284 is_write_request = self.mode.io_mode is IOMode.write
285
286 if isinstance(uri, str):
287 # Explicit
288 if uri.startswith("imageio:"):
289 if is_write_request:
290 raise RuntimeError("Cannot write to the standard images.")
291 fn = uri.split(":", 1)[-1].lower()
292 fn, _, zip_part = fn.partition(".zip/")
293 if zip_part:
294 fn += ".zip"
295 if fn not in EXAMPLE_IMAGES:
296 raise ValueError("Unknown standard image %r." % fn)
297 self._uri_type = URI_FILENAME
298 self._filename = get_remote_file("images/" + fn, auto=True)
299 if zip_part:
300 self._filename += "/" + zip_part
301 elif uri.startswith("http://") or uri.startswith("https://"):
302 self._uri_type = URI_HTTP
303 self._filename = uri
304 elif uri.startswith("ftp://") or uri.startswith("ftps://"):
305 self._uri_type = URI_FTP
306 self._filename = uri
307 elif uri.startswith("file://"):
308 self._uri_type = URI_FILENAME
309 self._filename = uri[7:]
310 elif uri.startswith(SPECIAL_READ_URIS) and is_read_request:
311 self._uri_type = URI_BYTES
312 self._filename = uri
313 elif uri.startswith(RETURN_BYTES) and is_write_request:
314 self._uri_type = URI_BYTES
315 self._filename = uri
316 else:
317 self._uri_type = URI_FILENAME
318 self._filename = uri
319
320 elif isinstance(uri, memoryview) and is_read_request:
321 self._uri_type = URI_BYTES
322 self._filename = "<bytes>"
323 self._bytes = uri.tobytes()
324 elif isinstance(uri, bytes) and is_read_request:
325 self._uri_type = URI_BYTES
326 self._filename = "<bytes>"
327 self._bytes = uri
328 elif isinstance(uri, Path):
329 self._uri_type = URI_FILENAME
330 self._filename = str(uri)
331 # Files
332 elif is_read_request:
333 if hasattr(uri, "read") and hasattr(uri, "close"):
334 self._uri_type = URI_FILE
335 self._filename = "<file>"
336 self._file = uri # Data must be read from here
337 elif is_write_request:
338 if hasattr(uri, "write") and hasattr(uri, "close"):
339 self._uri_type = URI_FILE
340 self._filename = "<file>"
341 self._file = uri # Data must be written here
342
343 # Expand user dir
344 if self._uri_type == URI_FILENAME and self._filename.startswith("~"):
345 self._filename = os.path.expanduser(self._filename)
346
347 # Check if a zipfile
348 if self._uri_type == URI_FILENAME:
349 # Search for zip extension followed by a path separator
350 for needle in [".zip/", ".zip\\"]:
351 zip_i = self._filename.lower().find(needle)
352 if zip_i > 0:
353 zip_i += 4
354 zip_path = self._filename[:zip_i]
355 if os.path.isdir(zip_path):
356 pass # is an existing dir (see #548)
357 elif is_write_request or os.path.isfile(zip_path):
358 self._uri_type = URI_ZIPPED
359 self._filename_zip = (
360 zip_path,
361 self._filename[zip_i:].lstrip("/\\"),
362 )
363 break
364
365 # Check if we could read it
366 if self._uri_type is None:
367 uri_r = repr(uri)
368 if len(uri_r) > 60:
369 uri_r = uri_r[:57] + "..."
370 raise IOError("Cannot understand given URI: %s." % uri_r)
371
372 # Check if this is supported
373 noWriting = [URI_HTTP, URI_FTP]
374 if is_write_request and self._uri_type in noWriting:
375 raise IOError("imageio does not support writing to http/ftp.")
376
377 # Deprecated way to load standard images, give a sensible error message
378 if is_read_request and self._uri_type in [URI_FILENAME, URI_ZIPPED]:
379 fn = self._filename
380 if self._filename_zip:
381 fn = self._filename_zip[0]
382 if (not os.path.exists(fn)) and (fn in EXAMPLE_IMAGES):
383 raise IOError(
384 "No such file: %r. This file looks like one of "
385 "the standard images, but from imageio 2.1, "
386 "standard images have to be specified using "
387 '"imageio:%s".' % (fn, fn)
388 )
389
390 # Make filename absolute
391 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
392 if self._filename_zip:
393 self._filename_zip = (
394 os.path.abspath(self._filename_zip[0]),
395 self._filename_zip[1],
396 )
397 else:
398 self._filename = os.path.abspath(self._filename)
399
400 # Check whether file name is valid
401 if self._uri_type in [URI_FILENAME, URI_ZIPPED]:
402 fn = self._filename
403 if self._filename_zip:
404 fn = self._filename_zip[0]
405 if is_read_request:
406 # Reading: check that the file exists (but is allowed a dir)
407 if not os.path.exists(fn):
408 raise FileNotFoundError("No such file: '%s'" % fn)
409 else:
410 # Writing: check that the directory to write to does exist
411 dn = os.path.dirname(fn)
412 if not os.path.exists(dn):
413 raise FileNotFoundError("The directory %r does not exist" % dn)
414
415 @property
416 def filename(self):
417 """Name of the ImageResource.
418
419
420 The uri for which reading/saving was requested. This
421 can be a filename, an http address, or other resource
422 identifier. Do not rely on the filename to obtain the data,
423 but use ``get_file()`` or ``get_local_filename()`` instead.
424 """
425 return self._filename
426
427 @property
428 def extension(self) -> str:
429 """The (lowercase) extension of the requested filename.
430 Suffixes in url's are stripped. Can be None if the request is
431 not based on a filename.
432 """
433 return self._extension
434
435 @property
436 def format_hint(self) -> Optional[str]:
437 return self._format_hint
438
439 @format_hint.setter
440 def format_hint(self, format: str) -> None:
441 self._format_hint = format
442 if self._extension is None:
443 self._extension = format
444
445 @property
446 def mode(self):
447 """The mode of the request. The first character is "r" or "w",
448 indicating a read or write request. The second character is
449 used to indicate the kind of data:
450 "i" for an image, "I" for multiple images, "v" for a volume,
451 "V" for multiple volumes, "?" for don't care.
452 """
453 return self._mode
454
455 @property
456 def kwargs(self):
457 """The dict of keyword arguments supplied by the user."""
458 return self._kwargs
459
460 # For obtaining data
461
462 def get_file(self):
463 """get_file()
464 Get a file object for the resource associated with this request.
465 If this is a reading request, the file is in read mode,
466 otherwise in write mode. This method is not thread safe. Plugins
467 should not close the file when done.
468
469 This is the preferred way to read/write the data. But if a
470 format cannot handle file-like objects, they should use
471 ``get_local_filename()``.
472 """
473 want_to_write = self.mode.io_mode is IOMode.write
474
475 # Is there already a file?
476 # Either _uri_type == URI_FILE, or we already opened the file,
477 # e.g. by using firstbytes
478 if self._file is not None:
479 return self._file
480
481 if self._uri_type == URI_BYTES:
482 if want_to_write:
483 # Create new file object, we catch the bytes in finish()
484 self._file = BytesIO()
485 self._file_is_local = True
486 else:
487 self._file = BytesIO(self._bytes)
488
489 elif self._uri_type == URI_FILENAME:
490 if want_to_write:
491 self._file = open(self.filename, "wb")
492 else:
493 self._file = open(self.filename, "rb")
494
495 elif self._uri_type == URI_ZIPPED:
496 # Get the correct filename
497 filename, name = self._filename_zip
498 if want_to_write:
499 # Create new file object, we catch the bytes in finish()
500 self._file = BytesIO()
501 self._file_is_local = True
502 else:
503 # Open zipfile and open new file object for specific file
504 self._zipfile = zipfile.ZipFile(filename, "r")
505 self._file = self._zipfile.open(name, "r")
506 self._file = SeekableFileObject(self._file)
507
508 elif self._uri_type in [URI_HTTP or URI_FTP]:
509 assert not want_to_write # This should have been tested in init
510 timeout = os.getenv("IMAGEIO_REQUEST_TIMEOUT")
511 if timeout is None or not timeout.isdigit():
512 timeout = 5
513 self._file = urlopen(self.filename, timeout=float(timeout))
514 self._file = SeekableFileObject(self._file)
515
516 return self._file
517
518 def get_local_filename(self):
519 """get_local_filename()
520 If the filename is an existing file on this filesystem, return
521 that. Otherwise a temporary file is created on the local file
522 system which can be used by the format to read from or write to.
523 """
524
525 if self._uri_type == URI_FILENAME:
526 return self._filename
527 else:
528 # Get filename
529 if self.extension is not None:
530 ext = self.extension
531 else:
532 ext = os.path.splitext(self._filename)[1]
533 fd, self._filename_local = tempfile.mkstemp(ext, "imageio_")
534 os.close(fd)
535 # Write stuff to it?
536 if self.mode.io_mode == IOMode.read:
537 with open(self._filename_local, "wb") as file:
538 shutil.copyfileobj(self.get_file(), file)
539 return self._filename_local
540
541 def finish(self) -> None:
542 """Wrap up this request.
543
544 Finishes any pending reads or writes, closes any open files and frees
545 any resources allocated by this request.
546 """
547
548 if self.mode.io_mode == IOMode.write:
549 # See if we "own" the data and must put it somewhere
550 bytes = None
551 if self._filename_local:
552 bytes = Path(self._filename_local).read_bytes()
553 elif self._file_is_local:
554 self._file_is_local = False
555 bytes = self._file.getvalue()
556
557 # Put the data in the right place
558 if bytes is not None:
559 if self._uri_type == URI_BYTES:
560 self._result = bytes # Picked up by imread function
561 elif self._uri_type == URI_FILE:
562 self._file.write(bytes)
563 elif self._uri_type == URI_ZIPPED:
564 zf = zipfile.ZipFile(self._filename_zip[0], "a")
565 zf.writestr(self._filename_zip[1], bytes)
566 zf.close()
567 # elif self._uri_type == URI_FILENAME: -> is always direct
568 # elif self._uri_type == URI_FTP/HTTP: -> write not supported
569
570 # Close open files that we know of (and are responsible for)
571 if self._file and self._uri_type != URI_FILE:
572 self._file.close()
573 self._file = None
574 if self._zipfile:
575 self._zipfile.close()
576 self._zipfile = None
577
578 # Remove temp file
579 if self._filename_local:
580 try:
581 os.remove(self._filename_local)
582 except Exception: # pragma: no cover
583 warnings.warn(
584 "Failed to delete the temporary file at "
585 f"`{self._filename_local}`. Please report this issue."
586 )
587 self._filename_local = None
588
589 # Detach so gc can clean even if a reference of self lingers
590 self._bytes = None
591
592 def get_result(self):
593 """For internal use. In some situations a write action can have
594 a result (bytes data). That is obtained with this function.
595 """
596 # Is there a reason to disallow reading multiple times?
597 self._result, res = None, self._result
598 return res
599
600 @property
601 def firstbytes(self):
602 """The first 256 bytes of the file. These can be used to
603 parse the header to determine the file-format.
604 """
605 if self._firstbytes is None:
606 self._read_first_bytes()
607 return self._firstbytes
608
609 def _read_first_bytes(self, N=256):
610 if self._bytes is not None:
611 self._firstbytes = self._bytes[:N]
612 else:
613 # Prepare
614 try:
615 f = self.get_file()
616 except IOError:
617 if os.path.isdir(self.filename): # A directory, e.g. for DICOM
618 self._firstbytes = bytes()
619 return
620 raise
621 try:
622 i = f.tell()
623 except Exception:
624 i = None
625 # Read
626 self._firstbytes = read_n_bytes(f, N)
627 # Set back
628 try:
629 if i is None:
630 raise Exception("cannot seek with None")
631 f.seek(i)
632 except Exception:
633 # Prevent get_file() from reusing the file
634 self._file = None
635 # If the given URI was a file object, we have a problem,
636 if self._uri_type == URI_FILE:
637 raise IOError("Cannot seek back after getting firstbytes!")
638
639
640def read_n_bytes(f, N):
641 """read_n_bytes(file, n)
642
643 Read n bytes from the given file, or less if the file has less
644 bytes. Returns zero bytes if the file is closed.
645 """
646 bb = bytes()
647 while len(bb) < N:
648 extra_bytes = f.read(N - len(bb))
649 if not extra_bytes:
650 break
651 bb += extra_bytes
652 return bb
653
654
655class SeekableFileObject:
656 """A readonly wrapper file object that add support for seeking, even if
657 the wrapped file object does not. The allows us to stream from http and
658 still use Pillow.
659 """
660
661 def __init__(self, f):
662 self.f = f
663 self._i = 0 # >=0 but can exceed buffer
664 self._buffer = b""
665 self._have_all = False
666 self.closed = False
667
668 def read(self, n=None):
669 # Fix up n
670 if n is None:
671 pass
672 else:
673 n = int(n)
674 if n < 0:
675 n = None
676
677 # Can and must we read more?
678 if not self._have_all:
679 more = b""
680 if n is None:
681 more = self.f.read()
682 self._have_all = True
683 else:
684 want_i = self._i + n
685 want_more = want_i - len(self._buffer)
686 if want_more > 0:
687 more = self.f.read(want_more)
688 if len(more) < want_more:
689 self._have_all = True
690 self._buffer += more
691
692 # Read data from buffer and update pointer
693 if n is None:
694 res = self._buffer[self._i :]
695 else:
696 res = self._buffer[self._i : self._i + n]
697 self._i += len(res)
698
699 return res
700
701 def readline(self):
702 yield from self._file.readline()
703
704 def tell(self):
705 return self._i
706
707 def seek(self, i, mode=0):
708 # Mimic BytesIO behavior
709
710 # Get the absolute new position
711 i = int(i)
712 if mode == 0:
713 if i < 0:
714 raise ValueError("negative seek value " + str(i))
715 real_i = i
716 elif mode == 1:
717 real_i = max(0, self._i + i) # negative ok here
718 elif mode == 2:
719 if not self._have_all:
720 self.read()
721 real_i = max(0, len(self._buffer) + i)
722 else:
723 raise ValueError("invalid whence (%s, should be 0, 1 or 2)" % i)
724
725 # Read some?
726 if real_i <= len(self._buffer):
727 pass # no need to read
728 elif not self._have_all:
729 assert real_i > self._i # if we don't have all, _i cannot be > _buffer
730 self.read(real_i - self._i) # sets self._i
731
732 self._i = real_i
733 return self._i
734
735 def close(self):
736 self.closed = True
737 self.f.close()
738
739 def isatty(self):
740 return False
741
742 def seekable(self):
743 return True
744
745
746class InitializationError(Exception):
747 """The plugin could not initialize from the given request.
748
749 This is a _internal_ error that is raised by plugins that fail to handle
750 a given request. We use this to differentiate incompatibility between
751 a plugin and a request from an actual error/bug inside a plugin.
752
753 """
754
755 pass