Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 18%
484 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
1"""
2NetCDF reader/writer module.
4This module is used to read and create NetCDF files. NetCDF files are
5accessed through the `netcdf_file` object. Data written to and from NetCDF
6files are contained in `netcdf_variable` objects. Attributes are given
7as member variables of the `netcdf_file` and `netcdf_variable` objects.
9This module implements the Scientific.IO.NetCDF API to read and create
10NetCDF files. The same API is also used in the PyNIO and pynetcdf
11modules, allowing these modules to be used interchangeably when working
12with NetCDF files.
14Only NetCDF3 is supported here; for NetCDF4 see
15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,
16which has a similar API.
18"""
20# TODO:
21# * properly implement ``_FillValue``.
22# * fix character variables.
23# * implement PAGESIZE for Python 2.6?
25# The Scientific.IO.NetCDF API allows attributes to be added directly to
26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
27# between user-set attributes and instance attributes, user-set attributes
28# are automatically stored in the ``_attributes`` attribute by overloading
29#``__setattr__``. This is the reason why the code sometimes uses
30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
31# otherwise the key would be inserted into userspace attributes.
34__all__ = ['netcdf_file', 'netcdf_variable']
37import warnings
38import weakref
39from operator import mul
40from platform import python_implementation
42import mmap as mm
44import numpy as np
45from numpy import frombuffer, dtype, empty, array, asarray
46from numpy import little_endian as LITTLE_ENDIAN
47from functools import reduce
50IS_PYPY = python_implementation() == 'PyPy'
52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
53ZERO = b'\x00\x00\x00\x00'
54NC_BYTE = b'\x00\x00\x00\x01'
55NC_CHAR = b'\x00\x00\x00\x02'
56NC_SHORT = b'\x00\x00\x00\x03'
57NC_INT = b'\x00\x00\x00\x04'
58NC_FLOAT = b'\x00\x00\x00\x05'
59NC_DOUBLE = b'\x00\x00\x00\x06'
60NC_DIMENSION = b'\x00\x00\x00\n'
61NC_VARIABLE = b'\x00\x00\x00\x0b'
62NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
63FILL_BYTE = b'\x81'
64FILL_CHAR = b'\x00'
65FILL_SHORT = b'\x80\x01'
66FILL_INT = b'\x80\x00\x00\x01'
67FILL_FLOAT = b'\x7C\xF0\x00\x00'
68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'
70TYPEMAP = {NC_BYTE: ('b', 1),
71 NC_CHAR: ('c', 1),
72 NC_SHORT: ('h', 2),
73 NC_INT: ('i', 4),
74 NC_FLOAT: ('f', 4),
75 NC_DOUBLE: ('d', 8)}
77FILLMAP = {NC_BYTE: FILL_BYTE,
78 NC_CHAR: FILL_CHAR,
79 NC_SHORT: FILL_SHORT,
80 NC_INT: FILL_INT,
81 NC_FLOAT: FILL_FLOAT,
82 NC_DOUBLE: FILL_DOUBLE}
84REVERSE = {('b', 1): NC_BYTE,
85 ('B', 1): NC_CHAR,
86 ('c', 1): NC_CHAR,
87 ('h', 2): NC_SHORT,
88 ('i', 4): NC_INT,
89 ('f', 4): NC_FLOAT,
90 ('d', 8): NC_DOUBLE,
92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
93 # used when getting the types from generic attributes.
94 ('l', 4): NC_INT,
95 ('S', 1): NC_CHAR}
98class netcdf_file:
99 """
100 A file object for NetCDF data.
102 A `netcdf_file` object has two standard attributes: `dimensions` and
103 `variables`. The values of both are dictionaries, mapping dimension
104 names to their associated lengths and variable names to variables,
105 respectively. Application programs should never modify these
106 dictionaries.
108 All other attributes correspond to global attributes defined in the
109 NetCDF file. Global file attributes are created by assigning to an
110 attribute of the `netcdf_file` object.
112 Parameters
113 ----------
114 filename : string or file-like
115 string -> filename
116 mode : {'r', 'w', 'a'}, optional
117 read-write-append mode, default is 'r'
118 mmap : None or bool, optional
119 Whether to mmap `filename` when reading. Default is True
120 when `filename` is a file name, False when `filename` is a
121 file-like object. Note that when mmap is in use, data arrays
122 returned refer directly to the mmapped data on disk, and the
123 file cannot be closed as long as references to it exist.
124 version : {1, 2}, optional
125 version of netcdf to read / write, where 1 means *Classic
126 format* and 2 means *64-bit offset format*. Default is 1. See
127 `here <https://docs.unidata.ucar.edu/nug/current/netcdf_introduction.html#select_format>`__
128 for more info.
129 maskandscale : bool, optional
130 Whether to automatically scale and/or mask data based on attributes.
131 Default is False.
133 Notes
134 -----
135 The major advantage of this module over other modules is that it doesn't
136 require the code to be linked to the NetCDF libraries. This module is
137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.
139 NetCDF files are a self-describing binary data format. The file contains
140 metadata that describes the dimensions and variables in the file. More
141 details about NetCDF files can be found `here
142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There
143 are three main sections to a NetCDF data structure:
145 1. Dimensions
146 2. Variables
147 3. Attributes
149 The dimensions section records the name and length of each dimension used
150 by the variables. The variables would then indicate which dimensions it
151 uses and any attributes such as data units, along with containing the data
152 values for the variable. It is good practice to include a
153 variable that is the same name as a dimension to provide the values for
154 that axes. Lastly, the attributes section would contain additional
155 information such as the name of the file creator or the instrument used to
156 collect the data.
158 When writing data to a NetCDF file, there is often the need to indicate the
159 'record dimension'. A record dimension is the unbounded dimension for a
160 variable. For example, a temperature variable may have dimensions of
161 latitude, longitude and time. If one wants to add more temperature data to
162 the NetCDF file as time progresses, then the temperature variable should
163 have the time dimension flagged as the record dimension.
165 In addition, the NetCDF file header contains the position of the data in
166 the file, so access can be done in an efficient manner without loading
167 unnecessary data into memory. It uses the ``mmap`` module to create
168 Numpy arrays mapped to the data on disk, for the same purpose.
170 Note that when `netcdf_file` is used to open a file with mmap=True
171 (default for read-only), arrays returned by it refer to data
172 directly on the disk. The file should not be closed, and cannot be cleanly
173 closed when asked, if such arrays are alive. You may want to copy data arrays
174 obtained from mmapped Netcdf file if they are to be processed after the file
175 is closed, see the example below.
177 Examples
178 --------
179 To create a NetCDF file:
181 >>> from scipy.io import netcdf_file
182 >>> import numpy as np
183 >>> f = netcdf_file('simple.nc', 'w')
184 >>> f.history = 'Created for a test'
185 >>> f.createDimension('time', 10)
186 >>> time = f.createVariable('time', 'i', ('time',))
187 >>> time[:] = np.arange(10)
188 >>> time.units = 'days since 2008-01-01'
189 >>> f.close()
191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice
192 of the time variable allows for the data to be set in the object, rather
193 than letting ``arange(10)`` overwrite the ``time`` variable.
195 To read the NetCDF file we just created:
197 >>> from scipy.io import netcdf_file
198 >>> f = netcdf_file('simple.nc', 'r')
199 >>> print(f.history)
200 b'Created for a test'
201 >>> time = f.variables['time']
202 >>> print(time.units)
203 b'days since 2008-01-01'
204 >>> print(time.shape)
205 (10,)
206 >>> print(time[-1])
207 9
209 NetCDF files, when opened read-only, return arrays that refer
210 directly to memory-mapped data on disk:
212 >>> data = time[:]
214 If the data is to be processed after the file is closed, it needs
215 to be copied to main memory:
217 >>> data = time[:].copy()
218 >>> del time
219 >>> f.close()
220 >>> data.mean()
221 4.5
223 A NetCDF file can also be used as context manager:
225 >>> from scipy.io import netcdf_file
226 >>> with netcdf_file('simple.nc', 'r') as f:
227 ... print(f.history)
228 b'Created for a test'
230 """
231 def __init__(self, filename, mode='r', mmap=None, version=1,
232 maskandscale=False):
233 """Initialize netcdf_file from fileobj (str or file-like)."""
234 if mode not in 'rwa':
235 raise ValueError("Mode must be either 'r', 'w' or 'a'.")
237 if hasattr(filename, 'seek'): # file-like
238 self.fp = filename
239 self.filename = 'None'
240 if mmap is None:
241 mmap = False
242 elif mmap and not hasattr(filename, 'fileno'):
243 raise ValueError('Cannot use file object for mmap')
244 else: # maybe it's a string
245 self.filename = filename
246 omode = 'r+' if mode == 'a' else mode
247 self.fp = open(self.filename, '%sb' % omode)
248 if mmap is None:
249 # Mmapped files on PyPy cannot be usually closed
250 # before the GC runs, so it's better to use mmap=False
251 # as the default.
252 mmap = (not IS_PYPY)
254 if mode != 'r':
255 # Cannot read write-only files
256 mmap = False
258 self.use_mmap = mmap
259 self.mode = mode
260 self.version_byte = version
261 self.maskandscale = maskandscale
263 self.dimensions = {}
264 self.variables = {}
266 self._dims = []
267 self._recs = 0
268 self._recsize = 0
270 self._mm = None
271 self._mm_buf = None
272 if self.use_mmap:
273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)
276 self._attributes = {}
278 if mode in 'ra':
279 self._read()
281 def __setattr__(self, attr, value):
282 # Store user defined attributes in a separate dict,
283 # so we can save them to file later.
284 try:
285 self._attributes[attr] = value
286 except AttributeError:
287 pass
288 self.__dict__[attr] = value
290 def close(self):
291 """Closes the NetCDF file."""
292 if hasattr(self, 'fp') and not self.fp.closed:
293 try:
294 self.flush()
295 finally:
296 self.variables = {}
297 if self._mm_buf is not None:
298 ref = weakref.ref(self._mm_buf)
299 self._mm_buf = None
300 if ref() is None:
301 # self._mm_buf is gc'd, and we can close the mmap
302 self._mm.close()
303 else:
304 # we cannot close self._mm, since self._mm_buf is
305 # alive and there may still be arrays referring to it
306 warnings.warn(
307 "Cannot close a netcdf_file opened with mmap=True, when "
308 "netcdf_variables or arrays referring to its data still "
309 "exist. All data arrays obtained from such files refer "
310 "directly to data on disk, and must be copied before the "
311 "file can be cleanly closed. "
312 "(See netcdf_file docstring for more information on mmap.)",
313 category=RuntimeWarning, stacklevel=2,
314 )
315 self._mm = None
316 self.fp.close()
317 __del__ = close
319 def __enter__(self):
320 return self
322 def __exit__(self, type, value, traceback):
323 self.close()
325 def createDimension(self, name, length):
326 """
327 Adds a dimension to the Dimension section of the NetCDF data structure.
329 Note that this function merely adds a new dimension that the variables can
330 reference. The values for the dimension, if desired, should be added as
331 a variable using `createVariable`, referring to this dimension.
333 Parameters
334 ----------
335 name : str
336 Name of the dimension (Eg, 'lat' or 'time').
337 length : int
338 Length of the dimension.
340 See Also
341 --------
342 createVariable
344 """
345 if length is None and self._dims:
346 raise ValueError("Only first dimension may be unlimited!")
348 self.dimensions[name] = length
349 self._dims.append(name)
351 def createVariable(self, name, type, dimensions):
352 """
353 Create an empty variable for the `netcdf_file` object, specifying its data
354 type and the dimensions it uses.
356 Parameters
357 ----------
358 name : str
359 Name of the new variable.
360 type : dtype or str
361 Data type of the variable.
362 dimensions : sequence of str
363 List of the dimension names used by the variable, in the desired order.
365 Returns
366 -------
367 variable : netcdf_variable
368 The newly created ``netcdf_variable`` object.
369 This object has also been added to the `netcdf_file` object as well.
371 See Also
372 --------
373 createDimension
375 Notes
376 -----
377 Any dimensions to be used by the variable should already exist in the
378 NetCDF data structure or should be created by `createDimension` prior to
379 creating the NetCDF variable.
381 """
382 shape = tuple([self.dimensions[dim] for dim in dimensions])
383 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy
385 type = dtype(type)
386 typecode, size = type.char, type.itemsize
387 if (typecode, size) not in REVERSE:
388 raise ValueError("NetCDF 3 does not support type %s" % type)
390 # convert to big endian always for NetCDF 3
391 data = empty(shape_, dtype=type.newbyteorder("B"))
392 self.variables[name] = netcdf_variable(
393 data, typecode, size, shape, dimensions,
394 maskandscale=self.maskandscale)
395 return self.variables[name]
397 def flush(self):
398 """
399 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
401 See Also
402 --------
403 sync : Identical function
405 """
406 if hasattr(self, 'mode') and self.mode in 'wa':
407 self._write()
408 sync = flush
410 def _write(self):
411 self.fp.seek(0)
412 self.fp.write(b'CDF')
413 self.fp.write(array(self.version_byte, '>b').tobytes())
415 # Write headers and data.
416 self._write_numrecs()
417 self._write_dim_array()
418 self._write_gatt_array()
419 self._write_var_array()
421 def _write_numrecs(self):
422 # Get highest record count from all record variables.
423 for var in self.variables.values():
424 if var.isrec and len(var.data) > self._recs:
425 self.__dict__['_recs'] = len(var.data)
426 self._pack_int(self._recs)
428 def _write_dim_array(self):
429 if self.dimensions:
430 self.fp.write(NC_DIMENSION)
431 self._pack_int(len(self.dimensions))
432 for name in self._dims:
433 self._pack_string(name)
434 length = self.dimensions[name]
435 self._pack_int(length or 0) # replace None with 0 for record dimension
436 else:
437 self.fp.write(ABSENT)
439 def _write_gatt_array(self):
440 self._write_att_array(self._attributes)
442 def _write_att_array(self, attributes):
443 if attributes:
444 self.fp.write(NC_ATTRIBUTE)
445 self._pack_int(len(attributes))
446 for name, values in attributes.items():
447 self._pack_string(name)
448 self._write_att_values(values)
449 else:
450 self.fp.write(ABSENT)
452 def _write_var_array(self):
453 if self.variables:
454 self.fp.write(NC_VARIABLE)
455 self._pack_int(len(self.variables))
457 # Sort variable names non-recs first, then recs.
458 def sortkey(n):
459 v = self.variables[n]
460 if v.isrec:
461 return (-1,)
462 return v._shape
463 variables = sorted(self.variables, key=sortkey, reverse=True)
465 # Set the metadata for all variables.
466 for name in variables:
467 self._write_var_metadata(name)
468 # Now that we have the metadata, we know the vsize of
469 # each record variable, so we can calculate recsize.
470 self.__dict__['_recsize'] = sum([
471 var._vsize for var in self.variables.values()
472 if var.isrec])
473 # Set the data for all variables.
474 for name in variables:
475 self._write_var_data(name)
476 else:
477 self.fp.write(ABSENT)
479 def _write_var_metadata(self, name):
480 var = self.variables[name]
482 self._pack_string(name)
483 self._pack_int(len(var.dimensions))
484 for dimname in var.dimensions:
485 dimid = self._dims.index(dimname)
486 self._pack_int(dimid)
488 self._write_att_array(var._attributes)
490 nc_type = REVERSE[var.typecode(), var.itemsize()]
491 self.fp.write(nc_type)
493 if not var.isrec:
494 vsize = var.data.size * var.data.itemsize
495 vsize += -vsize % 4
496 else: # record variable
497 try:
498 vsize = var.data[0].size * var.data.itemsize
499 except IndexError:
500 vsize = 0
501 rec_vars = len([v for v in self.variables.values()
502 if v.isrec])
503 if rec_vars > 1:
504 vsize += -vsize % 4
505 self.variables[name].__dict__['_vsize'] = vsize
506 self._pack_int(vsize)
508 # Pack a bogus begin, and set the real value later.
509 self.variables[name].__dict__['_begin'] = self.fp.tell()
510 self._pack_begin(0)
512 def _write_var_data(self, name):
513 var = self.variables[name]
515 # Set begin in file header.
516 the_beguine = self.fp.tell()
517 self.fp.seek(var._begin)
518 self._pack_begin(the_beguine)
519 self.fp.seek(the_beguine)
521 # Write data.
522 if not var.isrec:
523 self.fp.write(var.data.tobytes())
524 count = var.data.size * var.data.itemsize
525 self._write_var_padding(var, var._vsize - count)
526 else: # record variable
527 # Handle rec vars with shape[0] < nrecs.
528 if self._recs > len(var.data):
529 shape = (self._recs,) + var.data.shape[1:]
530 # Resize in-place does not always work since
531 # the array might not be single-segment
532 try:
533 var.data.resize(shape)
534 except ValueError:
535 dtype = var.data.dtype
536 var.__dict__['data'] = np.resize(var.data, shape).astype(dtype)
538 pos0 = pos = self.fp.tell()
539 for rec in var.data:
540 # Apparently scalars cannot be converted to big endian. If we
541 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
542 # will remain as ``=i4``.
543 if not rec.shape and (rec.dtype.byteorder == '<' or
544 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
545 rec = rec.byteswap()
546 self.fp.write(rec.tobytes())
547 # Padding
548 count = rec.size * rec.itemsize
549 self._write_var_padding(var, var._vsize - count)
550 pos += self._recsize
551 self.fp.seek(pos)
552 self.fp.seek(pos0 + var._vsize)
554 def _write_var_padding(self, var, size):
555 encoded_fill_value = var._get_encoded_fill_value()
556 num_fills = size // len(encoded_fill_value)
557 self.fp.write(encoded_fill_value * num_fills)
559 def _write_att_values(self, values):
560 if hasattr(values, 'dtype'):
561 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]
562 else:
563 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]
565 # bytes index into scalars in py3k. Check for "string" types
566 if isinstance(values, (str, bytes)):
567 sample = values
568 else:
569 try:
570 sample = values[0] # subscriptable?
571 except TypeError:
572 sample = values # scalar
574 for class_, nc_type in types:
575 if isinstance(sample, class_):
576 break
578 typecode, size = TYPEMAP[nc_type]
579 dtype_ = '>%s' % typecode
580 # asarray() dies with bytes and '>c' in py3k. Change to 'S'
581 dtype_ = 'S' if dtype_ == '>c' else dtype_
583 values = asarray(values, dtype=dtype_)
585 self.fp.write(nc_type)
587 if values.dtype.char == 'S':
588 nelems = values.itemsize
589 else:
590 nelems = values.size
591 self._pack_int(nelems)
593 if not values.shape and (values.dtype.byteorder == '<' or
594 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
595 values = values.byteswap()
596 self.fp.write(values.tobytes())
597 count = values.size * values.itemsize
598 self.fp.write(b'\x00' * (-count % 4)) # pad
600 def _read(self):
601 # Check magic bytes and version
602 magic = self.fp.read(3)
603 if not magic == b'CDF':
604 raise TypeError("Error: %s is not a valid NetCDF 3 file" %
605 self.filename)
606 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
608 # Read file headers and set data.
609 self._read_numrecs()
610 self._read_dim_array()
611 self._read_gatt_array()
612 self._read_var_array()
614 def _read_numrecs(self):
615 self.__dict__['_recs'] = self._unpack_int()
617 def _read_dim_array(self):
618 header = self.fp.read(4)
619 if header not in [ZERO, NC_DIMENSION]:
620 raise ValueError("Unexpected header.")
621 count = self._unpack_int()
623 for dim in range(count):
624 name = self._unpack_string().decode('latin1')
625 length = self._unpack_int() or None # None for record dimension
626 self.dimensions[name] = length
627 self._dims.append(name) # preserve order
629 def _read_gatt_array(self):
630 for k, v in self._read_att_array().items():
631 self.__setattr__(k, v)
633 def _read_att_array(self):
634 header = self.fp.read(4)
635 if header not in [ZERO, NC_ATTRIBUTE]:
636 raise ValueError("Unexpected header.")
637 count = self._unpack_int()
639 attributes = {}
640 for attr in range(count):
641 name = self._unpack_string().decode('latin1')
642 attributes[name] = self._read_att_values()
643 return attributes
645 def _read_var_array(self):
646 header = self.fp.read(4)
647 if header not in [ZERO, NC_VARIABLE]:
648 raise ValueError("Unexpected header.")
650 begin = 0
651 dtypes = {'names': [], 'formats': []}
652 rec_vars = []
653 count = self._unpack_int()
654 for var in range(count):
655 (name, dimensions, shape, attributes,
656 typecode, size, dtype_, begin_, vsize) = self._read_var()
657 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html
658 # Note that vsize is the product of the dimension lengths
659 # (omitting the record dimension) and the number of bytes
660 # per value (determined from the type), increased to the
661 # next multiple of 4, for each variable. If a record
662 # variable, this is the amount of space per record. The
663 # netCDF "record size" is calculated as the sum of the
664 # vsize's of all the record variables.
665 #
666 # The vsize field is actually redundant, because its value
667 # may be computed from other information in the header. The
668 # 32-bit vsize field is not large enough to contain the size
669 # of variables that require more than 2^32 - 4 bytes, so
670 # 2^32 - 1 is used in the vsize field for such variables.
671 if shape and shape[0] is None: # record variable
672 rec_vars.append(name)
673 # The netCDF "record size" is calculated as the sum of
674 # the vsize's of all the record variables.
675 self.__dict__['_recsize'] += vsize
676 if begin == 0:
677 begin = begin_
678 dtypes['names'].append(name)
679 dtypes['formats'].append(str(shape[1:]) + dtype_)
681 # Handle padding with a virtual variable.
682 if typecode in 'bch':
683 actual_size = reduce(mul, (1,) + shape[1:]) * size
684 padding = -actual_size % 4
685 if padding:
686 dtypes['names'].append('_padding_%d' % var)
687 dtypes['formats'].append('(%d,)>b' % padding)
689 # Data will be set later.
690 data = None
691 else: # not a record variable
692 # Calculate size to avoid problems with vsize (above)
693 a_size = reduce(mul, shape, 1) * size
694 if self.use_mmap:
695 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)
696 data.shape = shape
697 else:
698 pos = self.fp.tell()
699 self.fp.seek(begin_)
700 data = frombuffer(self.fp.read(a_size), dtype=dtype_
701 ).copy()
702 data.shape = shape
703 self.fp.seek(pos)
705 # Add variable.
706 self.variables[name] = netcdf_variable(
707 data, typecode, size, shape, dimensions, attributes,
708 maskandscale=self.maskandscale)
710 if rec_vars:
711 # Remove padding when only one record variable.
712 if len(rec_vars) == 1:
713 dtypes['names'] = dtypes['names'][:1]
714 dtypes['formats'] = dtypes['formats'][:1]
716 # Build rec array.
717 if self.use_mmap:
718 buf = self._mm_buf[begin:begin+self._recs*self._recsize]
719 rec_array = buf.view(dtype=dtypes)
720 rec_array.shape = (self._recs,)
721 else:
722 pos = self.fp.tell()
723 self.fp.seek(begin)
724 rec_array = frombuffer(self.fp.read(self._recs*self._recsize),
725 dtype=dtypes).copy()
726 rec_array.shape = (self._recs,)
727 self.fp.seek(pos)
729 for var in rec_vars:
730 self.variables[var].__dict__['data'] = rec_array[var]
732 def _read_var(self):
733 name = self._unpack_string().decode('latin1')
734 dimensions = []
735 shape = []
736 dims = self._unpack_int()
738 for i in range(dims):
739 dimid = self._unpack_int()
740 dimname = self._dims[dimid]
741 dimensions.append(dimname)
742 dim = self.dimensions[dimname]
743 shape.append(dim)
744 dimensions = tuple(dimensions)
745 shape = tuple(shape)
747 attributes = self._read_att_array()
748 nc_type = self.fp.read(4)
749 vsize = self._unpack_int()
750 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
752 typecode, size = TYPEMAP[nc_type]
753 dtype_ = '>%s' % typecode
755 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
757 def _read_att_values(self):
758 nc_type = self.fp.read(4)
759 n = self._unpack_int()
761 typecode, size = TYPEMAP[nc_type]
763 count = n*size
764 values = self.fp.read(int(count))
765 self.fp.read(-count % 4) # read padding
767 if typecode != 'c':
768 values = frombuffer(values, dtype='>%s' % typecode).copy()
769 if values.shape == (1,):
770 values = values[0]
771 else:
772 values = values.rstrip(b'\x00')
773 return values
775 def _pack_begin(self, begin):
776 if self.version_byte == 1:
777 self._pack_int(begin)
778 elif self.version_byte == 2:
779 self._pack_int64(begin)
781 def _pack_int(self, value):
782 self.fp.write(array(value, '>i').tobytes())
783 _pack_int32 = _pack_int
785 def _unpack_int(self):
786 return int(frombuffer(self.fp.read(4), '>i')[0])
787 _unpack_int32 = _unpack_int
789 def _pack_int64(self, value):
790 self.fp.write(array(value, '>q').tobytes())
792 def _unpack_int64(self):
793 return frombuffer(self.fp.read(8), '>q')[0]
795 def _pack_string(self, s):
796 count = len(s)
797 self._pack_int(count)
798 self.fp.write(s.encode('latin1'))
799 self.fp.write(b'\x00' * (-count % 4)) # pad
801 def _unpack_string(self):
802 count = self._unpack_int()
803 s = self.fp.read(count).rstrip(b'\x00')
804 self.fp.read(-count % 4) # read padding
805 return s
808class netcdf_variable:
809 """
810 A data object for netcdf files.
812 `netcdf_variable` objects are constructed by calling the method
813 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
814 objects behave much like array objects defined in numpy, except that their
815 data resides in a file. Data is read by indexing and written by assigning
816 to an indexed subset; the entire array can be accessed by the index ``[:]``
817 or (for scalars) by using the methods `getValue` and `assignValue`.
818 `netcdf_variable` objects also have attribute `shape` with the same meaning
819 as for arrays, but the shape cannot be modified. There is another read-only
820 attribute `dimensions`, whose value is the tuple of dimension names.
822 All other attributes correspond to variable attributes defined in
823 the NetCDF file. Variable attributes are created by assigning to an
824 attribute of the `netcdf_variable` object.
826 Parameters
827 ----------
828 data : array_like
829 The data array that holds the values for the variable.
830 Typically, this is initialized as empty, but with the proper shape.
831 typecode : dtype character code
832 Desired data-type for the data array.
833 size : int
834 Desired element size for the data array.
835 shape : sequence of ints
836 The shape of the array. This should match the lengths of the
837 variable's dimensions.
838 dimensions : sequence of strings
839 The names of the dimensions used by the variable. Must be in the
840 same order of the dimension lengths given by `shape`.
841 attributes : dict, optional
842 Attribute values (any type) keyed by string names. These attributes
843 become attributes for the netcdf_variable object.
844 maskandscale : bool, optional
845 Whether to automatically scale and/or mask data based on attributes.
846 Default is False.
849 Attributes
850 ----------
851 dimensions : list of str
852 List of names of dimensions used by the variable object.
853 isrec, shape
854 Properties
856 See also
857 --------
858 isrec, shape
860 """
861 def __init__(self, data, typecode, size, shape, dimensions,
862 attributes=None,
863 maskandscale=False):
864 self.data = data
865 self._typecode = typecode
866 self._size = size
867 self._shape = shape
868 self.dimensions = dimensions
869 self.maskandscale = maskandscale
871 self._attributes = attributes or {}
872 for k, v in self._attributes.items():
873 self.__dict__[k] = v
875 def __setattr__(self, attr, value):
876 # Store user defined attributes in a separate dict,
877 # so we can save them to file later.
878 try:
879 self._attributes[attr] = value
880 except AttributeError:
881 pass
882 self.__dict__[attr] = value
884 def isrec(self):
885 """Returns whether the variable has a record dimension or not.
887 A record dimension is a dimension along which additional data could be
888 easily appended in the netcdf data structure without much rewriting of
889 the data file. This attribute is a read-only property of the
890 `netcdf_variable`.
892 """
893 return bool(self.data.shape) and not self._shape[0]
894 isrec = property(isrec)
896 def shape(self):
897 """Returns the shape tuple of the data variable.
899 This is a read-only attribute and can not be modified in the
900 same manner of other numpy arrays.
901 """
902 return self.data.shape
903 shape = property(shape)
905 def getValue(self):
906 """
907 Retrieve a scalar value from a `netcdf_variable` of length one.
909 Raises
910 ------
911 ValueError
912 If the netcdf variable is an array of length greater than one,
913 this exception will be raised.
915 """
916 return self.data.item()
918 def assignValue(self, value):
919 """
920 Assign a scalar value to a `netcdf_variable` of length one.
922 Parameters
923 ----------
924 value : scalar
925 Scalar value (of compatible type) to assign to a length-one netcdf
926 variable. This value will be written to file.
928 Raises
929 ------
930 ValueError
931 If the input is not a scalar, or if the destination is not a length-one
932 netcdf variable.
934 """
935 if not self.data.flags.writeable:
936 # Work-around for a bug in NumPy. Calling itemset() on a read-only
937 # memory-mapped array causes a seg. fault.
938 # See NumPy ticket #1622, and SciPy ticket #1202.
939 # This check for `writeable` can be removed when the oldest version
940 # of NumPy still supported by scipy contains the fix for #1622.
941 raise RuntimeError("variable is not writeable")
943 self.data[:] = value
945 def typecode(self):
946 """
947 Return the typecode of the variable.
949 Returns
950 -------
951 typecode : char
952 The character typecode of the variable (e.g., 'i' for int).
954 """
955 return self._typecode
957 def itemsize(self):
958 """
959 Return the itemsize of the variable.
961 Returns
962 -------
963 itemsize : int
964 The element size of the variable (e.g., 8 for float64).
966 """
967 return self._size
969 def __getitem__(self, index):
970 if not self.maskandscale:
971 return self.data[index]
973 data = self.data[index].copy()
974 missing_value = self._get_missing_value()
975 data = self._apply_missing_value(data, missing_value)
976 scale_factor = self._attributes.get('scale_factor')
977 add_offset = self._attributes.get('add_offset')
978 if add_offset is not None or scale_factor is not None:
979 data = data.astype(np.float64)
980 if scale_factor is not None:
981 data = data * scale_factor
982 if add_offset is not None:
983 data += add_offset
985 return data
987 def __setitem__(self, index, data):
988 if self.maskandscale:
989 missing_value = (
990 self._get_missing_value() or
991 getattr(data, 'fill_value', 999999))
992 self._attributes.setdefault('missing_value', missing_value)
993 self._attributes.setdefault('_FillValue', missing_value)
994 data = ((data - self._attributes.get('add_offset', 0.0)) /
995 self._attributes.get('scale_factor', 1.0))
996 data = np.ma.asarray(data).filled(missing_value)
997 if self._typecode not in 'fd' and data.dtype.kind == 'f':
998 data = np.round(data)
1000 # Expand data for record vars?
1001 if self.isrec:
1002 if isinstance(index, tuple):
1003 rec_index = index[0]
1004 else:
1005 rec_index = index
1006 if isinstance(rec_index, slice):
1007 recs = (rec_index.start or 0) + len(data)
1008 else:
1009 recs = rec_index + 1
1010 if recs > len(self.data):
1011 shape = (recs,) + self._shape[1:]
1012 # Resize in-place does not always work since
1013 # the array might not be single-segment
1014 try:
1015 self.data.resize(shape)
1016 except ValueError:
1017 dtype = self.data.dtype
1018 self.__dict__['data'] = np.resize(self.data, shape).astype(dtype)
1019 self.data[index] = data
1021 def _default_encoded_fill_value(self):
1022 """
1023 The default encoded fill-value for this Variable's data type.
1024 """
1025 nc_type = REVERSE[self.typecode(), self.itemsize()]
1026 return FILLMAP[nc_type]
1028 def _get_encoded_fill_value(self):
1029 """
1030 Returns the encoded fill value for this variable as bytes.
1032 This is taken from either the _FillValue attribute, or the default fill
1033 value for this variable's data type.
1034 """
1035 if '_FillValue' in self._attributes:
1036 fill_value = np.array(self._attributes['_FillValue'],
1037 dtype=self.data.dtype).tobytes()
1038 if len(fill_value) == self.itemsize():
1039 return fill_value
1040 else:
1041 return self._default_encoded_fill_value()
1042 else:
1043 return self._default_encoded_fill_value()
1045 def _get_missing_value(self):
1046 """
1047 Returns the value denoting "no data" for this variable.
1049 If this variable does not have a missing/fill value, returns None.
1051 If both _FillValue and missing_value are given, give precedence to
1052 _FillValue. The netCDF standard gives special meaning to _FillValue;
1053 missing_value is just used for compatibility with old datasets.
1054 """
1056 if '_FillValue' in self._attributes:
1057 missing_value = self._attributes['_FillValue']
1058 elif 'missing_value' in self._attributes:
1059 missing_value = self._attributes['missing_value']
1060 else:
1061 missing_value = None
1063 return missing_value
1065 @staticmethod
1066 def _apply_missing_value(data, missing_value):
1067 """
1068 Applies the given missing value to the data array.
1070 Returns a numpy.ma array, with any value equal to missing_value masked
1071 out (unless missing_value is None, in which case the original array is
1072 returned).
1073 """
1075 if missing_value is None:
1076 newdata = data
1077 else:
1078 try:
1079 missing_value_isnan = np.isnan(missing_value)
1080 except (TypeError, NotImplementedError):
1081 # some data types (e.g., characters) cannot be tested for NaN
1082 missing_value_isnan = False
1084 if missing_value_isnan:
1085 mymask = np.isnan(data)
1086 else:
1087 mymask = (data == missing_value)
1089 newdata = np.ma.masked_where(mymask, data)
1091 return newdata
1094NetCDFFile = netcdf_file
1095NetCDFVariable = netcdf_variable