Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 19%
481 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1"""
2NetCDF reader/writer module.
4This module is used to read and create NetCDF files. NetCDF files are
5accessed through the `netcdf_file` object. Data written to and from NetCDF
6files are contained in `netcdf_variable` objects. Attributes are given
7as member variables of the `netcdf_file` and `netcdf_variable` objects.
9This module implements the Scientific.IO.NetCDF API to read and create
10NetCDF files. The same API is also used in the PyNIO and pynetcdf
11modules, allowing these modules to be used interchangeably when working
12with NetCDF files.
14Only NetCDF3 is supported here; for NetCDF4 see
15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,
16which has a similar API.
18"""
20# TODO:
21# * properly implement ``_FillValue``.
22# * fix character variables.
23# * implement PAGESIZE for Python 2.6?
25# The Scientific.IO.NetCDF API allows attributes to be added directly to
26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
27# between user-set attributes and instance attributes, user-set attributes
28# are automatically stored in the ``_attributes`` attribute by overloading
29#``__setattr__``. This is the reason why the code sometimes uses
30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
31# otherwise the key would be inserted into userspace attributes.
34__all__ = ['netcdf_file', 'netcdf_variable']
37import warnings
38import weakref
39from operator import mul
40from platform import python_implementation
42import mmap as mm
44import numpy as np
45from numpy import frombuffer, dtype, empty, array, asarray
46from numpy import little_endian as LITTLE_ENDIAN
47from functools import reduce
50IS_PYPY = python_implementation() == 'PyPy'
52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
53ZERO = b'\x00\x00\x00\x00'
54NC_BYTE = b'\x00\x00\x00\x01'
55NC_CHAR = b'\x00\x00\x00\x02'
56NC_SHORT = b'\x00\x00\x00\x03'
57NC_INT = b'\x00\x00\x00\x04'
58NC_FLOAT = b'\x00\x00\x00\x05'
59NC_DOUBLE = b'\x00\x00\x00\x06'
60NC_DIMENSION = b'\x00\x00\x00\n'
61NC_VARIABLE = b'\x00\x00\x00\x0b'
62NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
63FILL_BYTE = b'\x81'
64FILL_CHAR = b'\x00'
65FILL_SHORT = b'\x80\x01'
66FILL_INT = b'\x80\x00\x00\x01'
67FILL_FLOAT = b'\x7C\xF0\x00\x00'
68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'
70TYPEMAP = {NC_BYTE: ('b', 1),
71 NC_CHAR: ('c', 1),
72 NC_SHORT: ('h', 2),
73 NC_INT: ('i', 4),
74 NC_FLOAT: ('f', 4),
75 NC_DOUBLE: ('d', 8)}
77FILLMAP = {NC_BYTE: FILL_BYTE,
78 NC_CHAR: FILL_CHAR,
79 NC_SHORT: FILL_SHORT,
80 NC_INT: FILL_INT,
81 NC_FLOAT: FILL_FLOAT,
82 NC_DOUBLE: FILL_DOUBLE}
84REVERSE = {('b', 1): NC_BYTE,
85 ('B', 1): NC_CHAR,
86 ('c', 1): NC_CHAR,
87 ('h', 2): NC_SHORT,
88 ('i', 4): NC_INT,
89 ('f', 4): NC_FLOAT,
90 ('d', 8): NC_DOUBLE,
92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
93 # used when getting the types from generic attributes.
94 ('l', 4): NC_INT,
95 ('S', 1): NC_CHAR}
98class netcdf_file:
99 """
100 A file object for NetCDF data.
102 A `netcdf_file` object has two standard attributes: `dimensions` and
103 `variables`. The values of both are dictionaries, mapping dimension
104 names to their associated lengths and variable names to variables,
105 respectively. Application programs should never modify these
106 dictionaries.
108 All other attributes correspond to global attributes defined in the
109 NetCDF file. Global file attributes are created by assigning to an
110 attribute of the `netcdf_file` object.
112 Parameters
113 ----------
114 filename : string or file-like
115 string -> filename
116 mode : {'r', 'w', 'a'}, optional
117 read-write-append mode, default is 'r'
118 mmap : None or bool, optional
119 Whether to mmap `filename` when reading. Default is True
120 when `filename` is a file name, False when `filename` is a
121 file-like object. Note that when mmap is in use, data arrays
122 returned refer directly to the mmapped data on disk, and the
123 file cannot be closed as long as references to it exist.
124 version : {1, 2}, optional
125 version of netcdf to read / write, where 1 means *Classic
126 format* and 2 means *64-bit offset format*. Default is 1. See
127 `here <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_introduction.html#select_format>`__
128 for more info.
129 maskandscale : bool, optional
130 Whether to automatically scale and/or mask data based on attributes.
131 Default is False.
133 Notes
134 -----
135 The major advantage of this module over other modules is that it doesn't
136 require the code to be linked to the NetCDF libraries. This module is
137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.
139 NetCDF files are a self-describing binary data format. The file contains
140 metadata that describes the dimensions and variables in the file. More
141 details about NetCDF files can be found `here
142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There
143 are three main sections to a NetCDF data structure:
145 1. Dimensions
146 2. Variables
147 3. Attributes
149 The dimensions section records the name and length of each dimension used
150 by the variables. The variables would then indicate which dimensions it
151 uses and any attributes such as data units, along with containing the data
152 values for the variable. It is good practice to include a
153 variable that is the same name as a dimension to provide the values for
154 that axes. Lastly, the attributes section would contain additional
155 information such as the name of the file creator or the instrument used to
156 collect the data.
158 When writing data to a NetCDF file, there is often the need to indicate the
159 'record dimension'. A record dimension is the unbounded dimension for a
160 variable. For example, a temperature variable may have dimensions of
161 latitude, longitude and time. If one wants to add more temperature data to
162 the NetCDF file as time progresses, then the temperature variable should
163 have the time dimension flagged as the record dimension.
165 In addition, the NetCDF file header contains the position of the data in
166 the file, so access can be done in an efficient manner without loading
167 unnecessary data into memory. It uses the ``mmap`` module to create
168 Numpy arrays mapped to the data on disk, for the same purpose.
170 Note that when `netcdf_file` is used to open a file with mmap=True
171 (default for read-only), arrays returned by it refer to data
172 directly on the disk. The file should not be closed, and cannot be cleanly
173 closed when asked, if such arrays are alive. You may want to copy data arrays
174 obtained from mmapped Netcdf file if they are to be processed after the file
175 is closed, see the example below.
177 Examples
178 --------
179 To create a NetCDF file:
181 >>> from scipy.io import netcdf_file
182 >>> import numpy as np
183 >>> f = netcdf_file('simple.nc', 'w')
184 >>> f.history = 'Created for a test'
185 >>> f.createDimension('time', 10)
186 >>> time = f.createVariable('time', 'i', ('time',))
187 >>> time[:] = np.arange(10)
188 >>> time.units = 'days since 2008-01-01'
189 >>> f.close()
191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice
192 of the time variable allows for the data to be set in the object, rather
193 than letting ``arange(10)`` overwrite the ``time`` variable.
195 To read the NetCDF file we just created:
197 >>> from scipy.io import netcdf_file
198 >>> f = netcdf_file('simple.nc', 'r')
199 >>> print(f.history)
200 b'Created for a test'
201 >>> time = f.variables['time']
202 >>> print(time.units)
203 b'days since 2008-01-01'
204 >>> print(time.shape)
205 (10,)
206 >>> print(time[-1])
207 9
209 NetCDF files, when opened read-only, return arrays that refer
210 directly to memory-mapped data on disk:
212 >>> data = time[:]
214 If the data is to be processed after the file is closed, it needs
215 to be copied to main memory:
217 >>> data = time[:].copy()
218 >>> del time
219 >>> f.close()
220 >>> data.mean()
221 4.5
223 A NetCDF file can also be used as context manager:
225 >>> from scipy.io import netcdf_file
226 >>> with netcdf_file('simple.nc', 'r') as f:
227 ... print(f.history)
228 b'Created for a test'
230 """
231 def __init__(self, filename, mode='r', mmap=None, version=1,
232 maskandscale=False):
233 """Initialize netcdf_file from fileobj (str or file-like)."""
234 if mode not in 'rwa':
235 raise ValueError("Mode must be either 'r', 'w' or 'a'.")
237 if hasattr(filename, 'seek'): # file-like
238 self.fp = filename
239 self.filename = 'None'
240 if mmap is None:
241 mmap = False
242 elif mmap and not hasattr(filename, 'fileno'):
243 raise ValueError('Cannot use file object for mmap')
244 else: # maybe it's a string
245 self.filename = filename
246 omode = 'r+' if mode == 'a' else mode
247 self.fp = open(self.filename, '%sb' % omode)
248 if mmap is None:
249 # Mmapped files on PyPy cannot be usually closed
250 # before the GC runs, so it's better to use mmap=False
251 # as the default.
252 mmap = (not IS_PYPY)
254 if mode != 'r':
255 # Cannot read write-only files
256 mmap = False
258 self.use_mmap = mmap
259 self.mode = mode
260 self.version_byte = version
261 self.maskandscale = maskandscale
263 self.dimensions = {}
264 self.variables = {}
266 self._dims = []
267 self._recs = 0
268 self._recsize = 0
270 self._mm = None
271 self._mm_buf = None
272 if self.use_mmap:
273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)
276 self._attributes = {}
278 if mode in 'ra':
279 self._read()
281 def __setattr__(self, attr, value):
282 # Store user defined attributes in a separate dict,
283 # so we can save them to file later.
284 try:
285 self._attributes[attr] = value
286 except AttributeError:
287 pass
288 self.__dict__[attr] = value
290 def close(self):
291 """Closes the NetCDF file."""
292 if hasattr(self, 'fp') and not self.fp.closed:
293 try:
294 self.flush()
295 finally:
296 self.variables = {}
297 if self._mm_buf is not None:
298 ref = weakref.ref(self._mm_buf)
299 self._mm_buf = None
300 if ref() is None:
301 # self._mm_buf is gc'd, and we can close the mmap
302 self._mm.close()
303 else:
304 # we cannot close self._mm, since self._mm_buf is
305 # alive and there may still be arrays referring to it
306 warnings.warn((
307 "Cannot close a netcdf_file opened with mmap=True, when "
308 "netcdf_variables or arrays referring to its data still exist. "
309 "All data arrays obtained from such files refer directly to "
310 "data on disk, and must be copied before the file can be cleanly "
311 "closed. (See netcdf_file docstring for more information on mmap.)"
312 ), category=RuntimeWarning)
313 self._mm = None
314 self.fp.close()
315 __del__ = close
317 def __enter__(self):
318 return self
320 def __exit__(self, type, value, traceback):
321 self.close()
323 def createDimension(self, name, length):
324 """
325 Adds a dimension to the Dimension section of the NetCDF data structure.
327 Note that this function merely adds a new dimension that the variables can
328 reference. The values for the dimension, if desired, should be added as
329 a variable using `createVariable`, referring to this dimension.
331 Parameters
332 ----------
333 name : str
334 Name of the dimension (Eg, 'lat' or 'time').
335 length : int
336 Length of the dimension.
338 See Also
339 --------
340 createVariable
342 """
343 if length is None and self._dims:
344 raise ValueError("Only first dimension may be unlimited!")
346 self.dimensions[name] = length
347 self._dims.append(name)
349 def createVariable(self, name, type, dimensions):
350 """
351 Create an empty variable for the `netcdf_file` object, specifying its data
352 type and the dimensions it uses.
354 Parameters
355 ----------
356 name : str
357 Name of the new variable.
358 type : dtype or str
359 Data type of the variable.
360 dimensions : sequence of str
361 List of the dimension names used by the variable, in the desired order.
363 Returns
364 -------
365 variable : netcdf_variable
366 The newly created ``netcdf_variable`` object.
367 This object has also been added to the `netcdf_file` object as well.
369 See Also
370 --------
371 createDimension
373 Notes
374 -----
375 Any dimensions to be used by the variable should already exist in the
376 NetCDF data structure or should be created by `createDimension` prior to
377 creating the NetCDF variable.
379 """
380 shape = tuple([self.dimensions[dim] for dim in dimensions])
381 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy
383 type = dtype(type)
384 typecode, size = type.char, type.itemsize
385 if (typecode, size) not in REVERSE:
386 raise ValueError("NetCDF 3 does not support type %s" % type)
388 data = empty(shape_, dtype=type.newbyteorder("B")) # convert to big endian always for NetCDF 3
389 self.variables[name] = netcdf_variable(
390 data, typecode, size, shape, dimensions,
391 maskandscale=self.maskandscale)
392 return self.variables[name]
394 def flush(self):
395 """
396 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
398 See Also
399 --------
400 sync : Identical function
402 """
403 if hasattr(self, 'mode') and self.mode in 'wa':
404 self._write()
405 sync = flush
407 def _write(self):
408 self.fp.seek(0)
409 self.fp.write(b'CDF')
410 self.fp.write(array(self.version_byte, '>b').tobytes())
412 # Write headers and data.
413 self._write_numrecs()
414 self._write_dim_array()
415 self._write_gatt_array()
416 self._write_var_array()
418 def _write_numrecs(self):
419 # Get highest record count from all record variables.
420 for var in self.variables.values():
421 if var.isrec and len(var.data) > self._recs:
422 self.__dict__['_recs'] = len(var.data)
423 self._pack_int(self._recs)
425 def _write_dim_array(self):
426 if self.dimensions:
427 self.fp.write(NC_DIMENSION)
428 self._pack_int(len(self.dimensions))
429 for name in self._dims:
430 self._pack_string(name)
431 length = self.dimensions[name]
432 self._pack_int(length or 0) # replace None with 0 for record dimension
433 else:
434 self.fp.write(ABSENT)
436 def _write_gatt_array(self):
437 self._write_att_array(self._attributes)
439 def _write_att_array(self, attributes):
440 if attributes:
441 self.fp.write(NC_ATTRIBUTE)
442 self._pack_int(len(attributes))
443 for name, values in attributes.items():
444 self._pack_string(name)
445 self._write_att_values(values)
446 else:
447 self.fp.write(ABSENT)
449 def _write_var_array(self):
450 if self.variables:
451 self.fp.write(NC_VARIABLE)
452 self._pack_int(len(self.variables))
454 # Sort variable names non-recs first, then recs.
455 def sortkey(n):
456 v = self.variables[n]
457 if v.isrec:
458 return (-1,)
459 return v._shape
460 variables = sorted(self.variables, key=sortkey, reverse=True)
462 # Set the metadata for all variables.
463 for name in variables:
464 self._write_var_metadata(name)
465 # Now that we have the metadata, we know the vsize of
466 # each record variable, so we can calculate recsize.
467 self.__dict__['_recsize'] = sum([
468 var._vsize for var in self.variables.values()
469 if var.isrec])
470 # Set the data for all variables.
471 for name in variables:
472 self._write_var_data(name)
473 else:
474 self.fp.write(ABSENT)
476 def _write_var_metadata(self, name):
477 var = self.variables[name]
479 self._pack_string(name)
480 self._pack_int(len(var.dimensions))
481 for dimname in var.dimensions:
482 dimid = self._dims.index(dimname)
483 self._pack_int(dimid)
485 self._write_att_array(var._attributes)
487 nc_type = REVERSE[var.typecode(), var.itemsize()]
488 self.fp.write(nc_type)
490 if not var.isrec:
491 vsize = var.data.size * var.data.itemsize
492 vsize += -vsize % 4
493 else: # record variable
494 try:
495 vsize = var.data[0].size * var.data.itemsize
496 except IndexError:
497 vsize = 0
498 rec_vars = len([v for v in self.variables.values()
499 if v.isrec])
500 if rec_vars > 1:
501 vsize += -vsize % 4
502 self.variables[name].__dict__['_vsize'] = vsize
503 self._pack_int(vsize)
505 # Pack a bogus begin, and set the real value later.
506 self.variables[name].__dict__['_begin'] = self.fp.tell()
507 self._pack_begin(0)
509 def _write_var_data(self, name):
510 var = self.variables[name]
512 # Set begin in file header.
513 the_beguine = self.fp.tell()
514 self.fp.seek(var._begin)
515 self._pack_begin(the_beguine)
516 self.fp.seek(the_beguine)
518 # Write data.
519 if not var.isrec:
520 self.fp.write(var.data.tobytes())
521 count = var.data.size * var.data.itemsize
522 self._write_var_padding(var, var._vsize - count)
523 else: # record variable
524 # Handle rec vars with shape[0] < nrecs.
525 if self._recs > len(var.data):
526 shape = (self._recs,) + var.data.shape[1:]
527 # Resize in-place does not always work since
528 # the array might not be single-segment
529 try:
530 var.data.resize(shape)
531 except ValueError:
532 var.__dict__['data'] = np.resize(var.data, shape).astype(var.data.dtype)
534 pos0 = pos = self.fp.tell()
535 for rec in var.data:
536 # Apparently scalars cannot be converted to big endian. If we
537 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
538 # will remain as ``=i4``.
539 if not rec.shape and (rec.dtype.byteorder == '<' or
540 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
541 rec = rec.byteswap()
542 self.fp.write(rec.tobytes())
543 # Padding
544 count = rec.size * rec.itemsize
545 self._write_var_padding(var, var._vsize - count)
546 pos += self._recsize
547 self.fp.seek(pos)
548 self.fp.seek(pos0 + var._vsize)
550 def _write_var_padding(self, var, size):
551 encoded_fill_value = var._get_encoded_fill_value()
552 num_fills = size // len(encoded_fill_value)
553 self.fp.write(encoded_fill_value * num_fills)
555 def _write_att_values(self, values):
556 if hasattr(values, 'dtype'):
557 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]
558 else:
559 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]
561 # bytes index into scalars in py3k. Check for "string" types
562 if isinstance(values, (str, bytes)):
563 sample = values
564 else:
565 try:
566 sample = values[0] # subscriptable?
567 except TypeError:
568 sample = values # scalar
570 for class_, nc_type in types:
571 if isinstance(sample, class_):
572 break
574 typecode, size = TYPEMAP[nc_type]
575 dtype_ = '>%s' % typecode
576 # asarray() dies with bytes and '>c' in py3k. Change to 'S'
577 dtype_ = 'S' if dtype_ == '>c' else dtype_
579 values = asarray(values, dtype=dtype_)
581 self.fp.write(nc_type)
583 if values.dtype.char == 'S':
584 nelems = values.itemsize
585 else:
586 nelems = values.size
587 self._pack_int(nelems)
589 if not values.shape and (values.dtype.byteorder == '<' or
590 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
591 values = values.byteswap()
592 self.fp.write(values.tobytes())
593 count = values.size * values.itemsize
594 self.fp.write(b'\x00' * (-count % 4)) # pad
596 def _read(self):
597 # Check magic bytes and version
598 magic = self.fp.read(3)
599 if not magic == b'CDF':
600 raise TypeError("Error: %s is not a valid NetCDF 3 file" %
601 self.filename)
602 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
604 # Read file headers and set data.
605 self._read_numrecs()
606 self._read_dim_array()
607 self._read_gatt_array()
608 self._read_var_array()
610 def _read_numrecs(self):
611 self.__dict__['_recs'] = self._unpack_int()
613 def _read_dim_array(self):
614 header = self.fp.read(4)
615 if header not in [ZERO, NC_DIMENSION]:
616 raise ValueError("Unexpected header.")
617 count = self._unpack_int()
619 for dim in range(count):
620 name = self._unpack_string().decode('latin1')
621 length = self._unpack_int() or None # None for record dimension
622 self.dimensions[name] = length
623 self._dims.append(name) # preserve order
625 def _read_gatt_array(self):
626 for k, v in self._read_att_array().items():
627 self.__setattr__(k, v)
629 def _read_att_array(self):
630 header = self.fp.read(4)
631 if header not in [ZERO, NC_ATTRIBUTE]:
632 raise ValueError("Unexpected header.")
633 count = self._unpack_int()
635 attributes = {}
636 for attr in range(count):
637 name = self._unpack_string().decode('latin1')
638 attributes[name] = self._read_att_values()
639 return attributes
641 def _read_var_array(self):
642 header = self.fp.read(4)
643 if header not in [ZERO, NC_VARIABLE]:
644 raise ValueError("Unexpected header.")
646 begin = 0
647 dtypes = {'names': [], 'formats': []}
648 rec_vars = []
649 count = self._unpack_int()
650 for var in range(count):
651 (name, dimensions, shape, attributes,
652 typecode, size, dtype_, begin_, vsize) = self._read_var()
653 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html
654 # Note that vsize is the product of the dimension lengths
655 # (omitting the record dimension) and the number of bytes
656 # per value (determined from the type), increased to the
657 # next multiple of 4, for each variable. If a record
658 # variable, this is the amount of space per record. The
659 # netCDF "record size" is calculated as the sum of the
660 # vsize's of all the record variables.
661 #
662 # The vsize field is actually redundant, because its value
663 # may be computed from other information in the header. The
664 # 32-bit vsize field is not large enough to contain the size
665 # of variables that require more than 2^32 - 4 bytes, so
666 # 2^32 - 1 is used in the vsize field for such variables.
667 if shape and shape[0] is None: # record variable
668 rec_vars.append(name)
669 # The netCDF "record size" is calculated as the sum of
670 # the vsize's of all the record variables.
671 self.__dict__['_recsize'] += vsize
672 if begin == 0:
673 begin = begin_
674 dtypes['names'].append(name)
675 dtypes['formats'].append(str(shape[1:]) + dtype_)
677 # Handle padding with a virtual variable.
678 if typecode in 'bch':
679 actual_size = reduce(mul, (1,) + shape[1:]) * size
680 padding = -actual_size % 4
681 if padding:
682 dtypes['names'].append('_padding_%d' % var)
683 dtypes['formats'].append('(%d,)>b' % padding)
685 # Data will be set later.
686 data = None
687 else: # not a record variable
688 # Calculate size to avoid problems with vsize (above)
689 a_size = reduce(mul, shape, 1) * size
690 if self.use_mmap:
691 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)
692 data.shape = shape
693 else:
694 pos = self.fp.tell()
695 self.fp.seek(begin_)
696 data = frombuffer(self.fp.read(a_size), dtype=dtype_
697 ).copy()
698 data.shape = shape
699 self.fp.seek(pos)
701 # Add variable.
702 self.variables[name] = netcdf_variable(
703 data, typecode, size, shape, dimensions, attributes,
704 maskandscale=self.maskandscale)
706 if rec_vars:
707 # Remove padding when only one record variable.
708 if len(rec_vars) == 1:
709 dtypes['names'] = dtypes['names'][:1]
710 dtypes['formats'] = dtypes['formats'][:1]
712 # Build rec array.
713 if self.use_mmap:
714 rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes)
715 rec_array.shape = (self._recs,)
716 else:
717 pos = self.fp.tell()
718 self.fp.seek(begin)
719 rec_array = frombuffer(self.fp.read(self._recs*self._recsize),
720 dtype=dtypes).copy()
721 rec_array.shape = (self._recs,)
722 self.fp.seek(pos)
724 for var in rec_vars:
725 self.variables[var].__dict__['data'] = rec_array[var]
727 def _read_var(self):
728 name = self._unpack_string().decode('latin1')
729 dimensions = []
730 shape = []
731 dims = self._unpack_int()
733 for i in range(dims):
734 dimid = self._unpack_int()
735 dimname = self._dims[dimid]
736 dimensions.append(dimname)
737 dim = self.dimensions[dimname]
738 shape.append(dim)
739 dimensions = tuple(dimensions)
740 shape = tuple(shape)
742 attributes = self._read_att_array()
743 nc_type = self.fp.read(4)
744 vsize = self._unpack_int()
745 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
747 typecode, size = TYPEMAP[nc_type]
748 dtype_ = '>%s' % typecode
750 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
752 def _read_att_values(self):
753 nc_type = self.fp.read(4)
754 n = self._unpack_int()
756 typecode, size = TYPEMAP[nc_type]
758 count = n*size
759 values = self.fp.read(int(count))
760 self.fp.read(-count % 4) # read padding
762 if typecode != 'c':
763 values = frombuffer(values, dtype='>%s' % typecode).copy()
764 if values.shape == (1,):
765 values = values[0]
766 else:
767 values = values.rstrip(b'\x00')
768 return values
770 def _pack_begin(self, begin):
771 if self.version_byte == 1:
772 self._pack_int(begin)
773 elif self.version_byte == 2:
774 self._pack_int64(begin)
776 def _pack_int(self, value):
777 self.fp.write(array(value, '>i').tobytes())
778 _pack_int32 = _pack_int
780 def _unpack_int(self):
781 return int(frombuffer(self.fp.read(4), '>i')[0])
782 _unpack_int32 = _unpack_int
784 def _pack_int64(self, value):
785 self.fp.write(array(value, '>q').tobytes())
787 def _unpack_int64(self):
788 return frombuffer(self.fp.read(8), '>q')[0]
790 def _pack_string(self, s):
791 count = len(s)
792 self._pack_int(count)
793 self.fp.write(s.encode('latin1'))
794 self.fp.write(b'\x00' * (-count % 4)) # pad
796 def _unpack_string(self):
797 count = self._unpack_int()
798 s = self.fp.read(count).rstrip(b'\x00')
799 self.fp.read(-count % 4) # read padding
800 return s
803class netcdf_variable:
804 """
805 A data object for netcdf files.
807 `netcdf_variable` objects are constructed by calling the method
808 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
809 objects behave much like array objects defined in numpy, except that their
810 data resides in a file. Data is read by indexing and written by assigning
811 to an indexed subset; the entire array can be accessed by the index ``[:]``
812 or (for scalars) by using the methods `getValue` and `assignValue`.
813 `netcdf_variable` objects also have attribute `shape` with the same meaning
814 as for arrays, but the shape cannot be modified. There is another read-only
815 attribute `dimensions`, whose value is the tuple of dimension names.
817 All other attributes correspond to variable attributes defined in
818 the NetCDF file. Variable attributes are created by assigning to an
819 attribute of the `netcdf_variable` object.
821 Parameters
822 ----------
823 data : array_like
824 The data array that holds the values for the variable.
825 Typically, this is initialized as empty, but with the proper shape.
826 typecode : dtype character code
827 Desired data-type for the data array.
828 size : int
829 Desired element size for the data array.
830 shape : sequence of ints
831 The shape of the array. This should match the lengths of the
832 variable's dimensions.
833 dimensions : sequence of strings
834 The names of the dimensions used by the variable. Must be in the
835 same order of the dimension lengths given by `shape`.
836 attributes : dict, optional
837 Attribute values (any type) keyed by string names. These attributes
838 become attributes for the netcdf_variable object.
839 maskandscale : bool, optional
840 Whether to automatically scale and/or mask data based on attributes.
841 Default is False.
844 Attributes
845 ----------
846 dimensions : list of str
847 List of names of dimensions used by the variable object.
848 isrec, shape
849 Properties
851 See also
852 --------
853 isrec, shape
855 """
856 def __init__(self, data, typecode, size, shape, dimensions,
857 attributes=None,
858 maskandscale=False):
859 self.data = data
860 self._typecode = typecode
861 self._size = size
862 self._shape = shape
863 self.dimensions = dimensions
864 self.maskandscale = maskandscale
866 self._attributes = attributes or {}
867 for k, v in self._attributes.items():
868 self.__dict__[k] = v
870 def __setattr__(self, attr, value):
871 # Store user defined attributes in a separate dict,
872 # so we can save them to file later.
873 try:
874 self._attributes[attr] = value
875 except AttributeError:
876 pass
877 self.__dict__[attr] = value
879 def isrec(self):
880 """Returns whether the variable has a record dimension or not.
882 A record dimension is a dimension along which additional data could be
883 easily appended in the netcdf data structure without much rewriting of
884 the data file. This attribute is a read-only property of the
885 `netcdf_variable`.
887 """
888 return bool(self.data.shape) and not self._shape[0]
889 isrec = property(isrec)
891 def shape(self):
892 """Returns the shape tuple of the data variable.
894 This is a read-only attribute and can not be modified in the
895 same manner of other numpy arrays.
896 """
897 return self.data.shape
898 shape = property(shape)
900 def getValue(self):
901 """
902 Retrieve a scalar value from a `netcdf_variable` of length one.
904 Raises
905 ------
906 ValueError
907 If the netcdf variable is an array of length greater than one,
908 this exception will be raised.
910 """
911 return self.data.item()
913 def assignValue(self, value):
914 """
915 Assign a scalar value to a `netcdf_variable` of length one.
917 Parameters
918 ----------
919 value : scalar
920 Scalar value (of compatible type) to assign to a length-one netcdf
921 variable. This value will be written to file.
923 Raises
924 ------
925 ValueError
926 If the input is not a scalar, or if the destination is not a length-one
927 netcdf variable.
929 """
930 if not self.data.flags.writeable:
931 # Work-around for a bug in NumPy. Calling itemset() on a read-only
932 # memory-mapped array causes a seg. fault.
933 # See NumPy ticket #1622, and SciPy ticket #1202.
934 # This check for `writeable` can be removed when the oldest version
935 # of NumPy still supported by scipy contains the fix for #1622.
936 raise RuntimeError("variable is not writeable")
938 self.data[:] = value
940 def typecode(self):
941 """
942 Return the typecode of the variable.
944 Returns
945 -------
946 typecode : char
947 The character typecode of the variable (e.g., 'i' for int).
949 """
950 return self._typecode
952 def itemsize(self):
953 """
954 Return the itemsize of the variable.
956 Returns
957 -------
958 itemsize : int
959 The element size of the variable (e.g., 8 for float64).
961 """
962 return self._size
964 def __getitem__(self, index):
965 if not self.maskandscale:
966 return self.data[index]
968 data = self.data[index].copy()
969 missing_value = self._get_missing_value()
970 data = self._apply_missing_value(data, missing_value)
971 scale_factor = self._attributes.get('scale_factor')
972 add_offset = self._attributes.get('add_offset')
973 if add_offset is not None or scale_factor is not None:
974 data = data.astype(np.float64)
975 if scale_factor is not None:
976 data = data * scale_factor
977 if add_offset is not None:
978 data += add_offset
980 return data
982 def __setitem__(self, index, data):
983 if self.maskandscale:
984 missing_value = (
985 self._get_missing_value() or
986 getattr(data, 'fill_value', 999999))
987 self._attributes.setdefault('missing_value', missing_value)
988 self._attributes.setdefault('_FillValue', missing_value)
989 data = ((data - self._attributes.get('add_offset', 0.0)) /
990 self._attributes.get('scale_factor', 1.0))
991 data = np.ma.asarray(data).filled(missing_value)
992 if self._typecode not in 'fd' and data.dtype.kind == 'f':
993 data = np.round(data)
995 # Expand data for record vars?
996 if self.isrec:
997 if isinstance(index, tuple):
998 rec_index = index[0]
999 else:
1000 rec_index = index
1001 if isinstance(rec_index, slice):
1002 recs = (rec_index.start or 0) + len(data)
1003 else:
1004 recs = rec_index + 1
1005 if recs > len(self.data):
1006 shape = (recs,) + self._shape[1:]
1007 # Resize in-place does not always work since
1008 # the array might not be single-segment
1009 try:
1010 self.data.resize(shape)
1011 except ValueError:
1012 self.__dict__['data'] = np.resize(self.data, shape).astype(self.data.dtype)
1013 self.data[index] = data
1015 def _default_encoded_fill_value(self):
1016 """
1017 The default encoded fill-value for this Variable's data type.
1018 """
1019 nc_type = REVERSE[self.typecode(), self.itemsize()]
1020 return FILLMAP[nc_type]
1022 def _get_encoded_fill_value(self):
1023 """
1024 Returns the encoded fill value for this variable as bytes.
1026 This is taken from either the _FillValue attribute, or the default fill
1027 value for this variable's data type.
1028 """
1029 if '_FillValue' in self._attributes:
1030 fill_value = np.array(self._attributes['_FillValue'],
1031 dtype=self.data.dtype).tobytes()
1032 if len(fill_value) == self.itemsize():
1033 return fill_value
1034 else:
1035 return self._default_encoded_fill_value()
1036 else:
1037 return self._default_encoded_fill_value()
1039 def _get_missing_value(self):
1040 """
1041 Returns the value denoting "no data" for this variable.
1043 If this variable does not have a missing/fill value, returns None.
1045 If both _FillValue and missing_value are given, give precedence to
1046 _FillValue. The netCDF standard gives special meaning to _FillValue;
1047 missing_value is just used for compatibility with old datasets.
1048 """
1050 if '_FillValue' in self._attributes:
1051 missing_value = self._attributes['_FillValue']
1052 elif 'missing_value' in self._attributes:
1053 missing_value = self._attributes['missing_value']
1054 else:
1055 missing_value = None
1057 return missing_value
1059 @staticmethod
1060 def _apply_missing_value(data, missing_value):
1061 """
1062 Applies the given missing value to the data array.
1064 Returns a numpy.ma array, with any value equal to missing_value masked
1065 out (unless missing_value is None, in which case the original array is
1066 returned).
1067 """
1069 if missing_value is None:
1070 newdata = data
1071 else:
1072 try:
1073 missing_value_isnan = np.isnan(missing_value)
1074 except (TypeError, NotImplementedError):
1075 # some data types (e.g., characters) cannot be tested for NaN
1076 missing_value_isnan = False
1078 if missing_value_isnan:
1079 mymask = np.isnan(data)
1080 else:
1081 mymask = (data == missing_value)
1083 newdata = np.ma.masked_where(mymask, data)
1085 return newdata
1088NetCDFFile = netcdf_file
1089NetCDFVariable = netcdf_variable