1# -*- coding: utf-8 -*-
2# Based on code from the vispy project
3# Distributed under the (new) BSD License. See LICENSE.txt for more info.
4
5"""Data downloading and reading functions
6"""
7
8from math import log
9import os
10from os import path as op
11import sys
12import shutil
13import time
14
15from . import appdata_dir, resource_dirs
16from . import StdoutProgressIndicator, urlopen
17
18
19class InternetNotAllowedError(IOError):
20 """Plugins that need resources can just use get_remote_file(), but
21 should catch this error and silently ignore it.
22 """
23
24 pass
25
26
27class NeedDownloadError(IOError):
28 """Is raised when a remote file is requested that is not locally
29 available, but which needs to be explicitly downloaded by the user.
30 """
31
32
33def get_remote_file(fname, directory=None, force_download=False, auto=True):
34 """Get a the filename for the local version of a file from the web
35
36 Parameters
37 ----------
38 fname : str
39 The relative filename on the remote data repository to download.
40 These correspond to paths on
41 ``https://github.com/imageio/imageio-binaries/``.
42 directory : str | None
43 The directory where the file will be cached if a download was
44 required to obtain the file. By default, the appdata directory
45 is used. This is also the first directory that is checked for
46 a local version of the file. If the directory does not exist,
47 it will be created.
48 force_download : bool | str
49 If True, the file will be downloaded even if a local copy exists
50 (and this copy will be overwritten). Can also be a YYYY-MM-DD date
51 to ensure a file is up-to-date (modified date of a file on disk,
52 if present, is checked).
53 auto : bool
54 Whether to auto-download the file if its not present locally. Default
55 True. If False and a download is needed, raises NeedDownloadError.
56
57 Returns
58 -------
59 fname : str
60 The path to the file on the local system.
61 """
62 _url_root = "https://github.com/imageio/imageio-binaries/raw/master/"
63 url = _url_root + fname
64 nfname = op.normcase(fname) # convert to native
65 # Get dirs to look for the resource
66 given_directory = directory
67 directory = given_directory or appdata_dir("imageio")
68 dirs = resource_dirs()
69 dirs.insert(0, directory) # Given dir has preference
70 # Try to find the resource locally
71 for dir in dirs:
72 filename = op.join(dir, nfname)
73 if op.isfile(filename):
74 if not force_download: # we're done
75 if given_directory and given_directory != dir:
76 filename2 = os.path.join(given_directory, nfname)
77 # Make sure the output directory exists
78 if not op.isdir(op.dirname(filename2)):
79 os.makedirs(op.abspath(op.dirname(filename2)))
80 shutil.copy(filename, filename2)
81 return filename2
82 return filename
83 if isinstance(force_download, str):
84 ntime = time.strptime(force_download, "%Y-%m-%d")
85 ftime = time.gmtime(op.getctime(filename))
86 if ftime >= ntime:
87 if given_directory and given_directory != dir:
88 filename2 = os.path.join(given_directory, nfname)
89 # Make sure the output directory exists
90 if not op.isdir(op.dirname(filename2)):
91 os.makedirs(op.abspath(op.dirname(filename2)))
92 shutil.copy(filename, filename2)
93 return filename2
94 return filename
95 else:
96 print("File older than %s, updating..." % force_download)
97 break
98
99 # If we get here, we're going to try to download the file
100 if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"):
101 raise InternetNotAllowedError(
102 "Will not download resource from the "
103 "internet because environment variable "
104 "IMAGEIO_NO_INTERNET is set."
105 )
106
107 # Can we proceed with auto-download?
108 if not auto:
109 raise NeedDownloadError()
110
111 # Get filename to store to and make sure the dir exists
112 filename = op.join(directory, nfname)
113 if not op.isdir(op.dirname(filename)):
114 os.makedirs(op.abspath(op.dirname(filename)))
115 # let's go get the file
116 if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover
117 # On CI, we retry a few times ...
118 for i in range(2):
119 try:
120 _fetch_file(url, filename)
121 return filename
122 except IOError:
123 time.sleep(0.5)
124 else:
125 _fetch_file(url, filename)
126 return filename
127 else: # pragma: no cover
128 _fetch_file(url, filename)
129 return filename
130
131
132def _fetch_file(url, file_name, print_destination=True):
133 """Load requested file, downloading it if needed or requested
134
135 Parameters
136 ----------
137 url: string
138 The url of file to be downloaded.
139 file_name: string
140 Name, along with the path, of where downloaded file will be saved.
141 print_destination: bool, optional
142 If true, destination of where file was saved will be printed after
143 download finishes.
144 resume: bool, optional
145 If true, try to resume partially downloaded files.
146 """
147 # Adapted from NISL:
148 # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
149
150 print(
151 "Imageio: %r was not found on your computer; "
152 "downloading it now." % os.path.basename(file_name)
153 )
154
155 temp_file_name = file_name + ".part"
156 local_file = None
157 initial_size = 0
158 errors = []
159 for tries in range(4):
160 try:
161 # Checking file size and displaying it alongside the download url
162 remote_file = urlopen(url, timeout=5.0)
163 file_size = int(remote_file.headers["Content-Length"].strip())
164 size_str = _sizeof_fmt(file_size)
165 print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str))
166 # Downloading data (can be extended to resume if need be)
167 local_file = open(temp_file_name, "wb")
168 _chunk_read(remote_file, local_file, initial_size=initial_size)
169 # temp file must be closed prior to the move
170 if not local_file.closed:
171 local_file.close()
172 shutil.move(temp_file_name, file_name)
173 if print_destination is True:
174 sys.stdout.write("File saved as %s.\n" % file_name)
175 break
176 except Exception as e:
177 errors.append(e)
178 print("Error while fetching file: %s." % str(e))
179 finally:
180 if local_file is not None:
181 if not local_file.closed:
182 local_file.close()
183 else:
184 raise IOError(
185 "Unable to download %r. Perhaps there is no internet "
186 "connection? If there is, please report this problem."
187 % os.path.basename(file_name)
188 )
189
190
191def _chunk_read(response, local_file, chunk_size=8192, initial_size=0):
192 """Download a file chunk by chunk and show advancement
193
194 Can also be used when resuming downloads over http.
195
196 Parameters
197 ----------
198 response: urllib.response.addinfourl
199 Response to the download request in order to get file size.
200 local_file: file
201 Hard disk file where data should be written.
202 chunk_size: integer, optional
203 Size of downloaded chunks. Default: 8192
204 initial_size: int, optional
205 If resuming, indicate the initial size of the file.
206 """
207 # Adapted from NISL:
208 # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
209
210 bytes_so_far = initial_size
211 # Returns only amount left to download when resuming, not the size of the
212 # entire file
213 total_size = int(response.headers["Content-Length"].strip())
214 total_size += initial_size
215
216 progress = StdoutProgressIndicator("Downloading")
217 progress.start("", "bytes", total_size)
218
219 while True:
220 chunk = response.read(chunk_size)
221 bytes_so_far += len(chunk)
222 if not chunk:
223 break
224 _chunk_write(chunk, local_file, progress)
225 progress.finish("Done")
226
227
228def _chunk_write(chunk, local_file, progress):
229 """Write a chunk to file and update the progress bar"""
230 local_file.write(chunk)
231 progress.increase_progress(len(chunk))
232 time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes
233
234
235def _sizeof_fmt(num):
236 """Turn number of bytes into human-readable str"""
237 units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
238 decimals = [0, 0, 1, 2, 2, 2]
239 """Human friendly file size"""
240 if num > 1:
241 exponent = min(int(log(num, 1024)), len(units) - 1)
242 quotient = float(num) / 1024**exponent
243 unit = units[exponent]
244 num_decimals = decimals[exponent]
245 format_string = "{0:.%sf} {1}" % num_decimals
246 return format_string.format(quotient, unit)
247 return "0 bytes" if num == 0 else "1 byte"