Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 19%

481 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1""" 

2NetCDF reader/writer module. 

3 

4This module is used to read and create NetCDF files. NetCDF files are 

5accessed through the `netcdf_file` object. Data written to and from NetCDF 

6files are contained in `netcdf_variable` objects. Attributes are given 

7as member variables of the `netcdf_file` and `netcdf_variable` objects. 

8 

9This module implements the Scientific.IO.NetCDF API to read and create 

10NetCDF files. The same API is also used in the PyNIO and pynetcdf 

11modules, allowing these modules to be used interchangeably when working 

12with NetCDF files. 

13 

14Only NetCDF3 is supported here; for NetCDF4 see 

15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__, 

16which has a similar API. 

17 

18""" 

19 

20# TODO: 

21# * properly implement ``_FillValue``. 

22# * fix character variables. 

23# * implement PAGESIZE for Python 2.6? 

24 

25# The Scientific.IO.NetCDF API allows attributes to be added directly to 

26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate 

27# between user-set attributes and instance attributes, user-set attributes 

28# are automatically stored in the ``_attributes`` attribute by overloading 

29#``__setattr__``. This is the reason why the code sometimes uses 

30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``; 

31# otherwise the key would be inserted into userspace attributes. 

32 

33 

34__all__ = ['netcdf_file', 'netcdf_variable'] 

35 

36 

37import warnings 

38import weakref 

39from operator import mul 

40from platform import python_implementation 

41 

42import mmap as mm 

43 

44import numpy as np 

45from numpy import frombuffer, dtype, empty, array, asarray 

46from numpy import little_endian as LITTLE_ENDIAN 

47from functools import reduce 

48 

49 

50IS_PYPY = python_implementation() == 'PyPy' 

51 

52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00' 

53ZERO = b'\x00\x00\x00\x00' 

54NC_BYTE = b'\x00\x00\x00\x01' 

55NC_CHAR = b'\x00\x00\x00\x02' 

56NC_SHORT = b'\x00\x00\x00\x03' 

57NC_INT = b'\x00\x00\x00\x04' 

58NC_FLOAT = b'\x00\x00\x00\x05' 

59NC_DOUBLE = b'\x00\x00\x00\x06' 

60NC_DIMENSION = b'\x00\x00\x00\n' 

61NC_VARIABLE = b'\x00\x00\x00\x0b' 

62NC_ATTRIBUTE = b'\x00\x00\x00\x0c' 

63FILL_BYTE = b'\x81' 

64FILL_CHAR = b'\x00' 

65FILL_SHORT = b'\x80\x01' 

66FILL_INT = b'\x80\x00\x00\x01' 

67FILL_FLOAT = b'\x7C\xF0\x00\x00' 

68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00' 

69 

70TYPEMAP = {NC_BYTE: ('b', 1), 

71 NC_CHAR: ('c', 1), 

72 NC_SHORT: ('h', 2), 

73 NC_INT: ('i', 4), 

74 NC_FLOAT: ('f', 4), 

75 NC_DOUBLE: ('d', 8)} 

76 

77FILLMAP = {NC_BYTE: FILL_BYTE, 

78 NC_CHAR: FILL_CHAR, 

79 NC_SHORT: FILL_SHORT, 

80 NC_INT: FILL_INT, 

81 NC_FLOAT: FILL_FLOAT, 

82 NC_DOUBLE: FILL_DOUBLE} 

83 

84REVERSE = {('b', 1): NC_BYTE, 

85 ('B', 1): NC_CHAR, 

86 ('c', 1): NC_CHAR, 

87 ('h', 2): NC_SHORT, 

88 ('i', 4): NC_INT, 

89 ('f', 4): NC_FLOAT, 

90 ('d', 8): NC_DOUBLE, 

91 

92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char, 

93 # used when getting the types from generic attributes. 

94 ('l', 4): NC_INT, 

95 ('S', 1): NC_CHAR} 

96 

97 

98class netcdf_file: 

99 """ 

100 A file object for NetCDF data. 

101 

102 A `netcdf_file` object has two standard attributes: `dimensions` and 

103 `variables`. The values of both are dictionaries, mapping dimension 

104 names to their associated lengths and variable names to variables, 

105 respectively. Application programs should never modify these 

106 dictionaries. 

107 

108 All other attributes correspond to global attributes defined in the 

109 NetCDF file. Global file attributes are created by assigning to an 

110 attribute of the `netcdf_file` object. 

111 

112 Parameters 

113 ---------- 

114 filename : string or file-like 

115 string -> filename 

116 mode : {'r', 'w', 'a'}, optional 

117 read-write-append mode, default is 'r' 

118 mmap : None or bool, optional 

119 Whether to mmap `filename` when reading. Default is True 

120 when `filename` is a file name, False when `filename` is a 

121 file-like object. Note that when mmap is in use, data arrays 

122 returned refer directly to the mmapped data on disk, and the 

123 file cannot be closed as long as references to it exist. 

124 version : {1, 2}, optional 

125 version of netcdf to read / write, where 1 means *Classic 

126 format* and 2 means *64-bit offset format*. Default is 1. See 

127 `here <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_introduction.html#select_format>`__ 

128 for more info. 

129 maskandscale : bool, optional 

130 Whether to automatically scale and/or mask data based on attributes. 

131 Default is False. 

132 

133 Notes 

134 ----- 

135 The major advantage of this module over other modules is that it doesn't 

136 require the code to be linked to the NetCDF libraries. This module is 

137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_. 

138 

139 NetCDF files are a self-describing binary data format. The file contains 

140 metadata that describes the dimensions and variables in the file. More 

141 details about NetCDF files can be found `here 

142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There 

143 are three main sections to a NetCDF data structure: 

144 

145 1. Dimensions 

146 2. Variables 

147 3. Attributes 

148 

149 The dimensions section records the name and length of each dimension used 

150 by the variables. The variables would then indicate which dimensions it 

151 uses and any attributes such as data units, along with containing the data 

152 values for the variable. It is good practice to include a 

153 variable that is the same name as a dimension to provide the values for 

154 that axes. Lastly, the attributes section would contain additional 

155 information such as the name of the file creator or the instrument used to 

156 collect the data. 

157 

158 When writing data to a NetCDF file, there is often the need to indicate the 

159 'record dimension'. A record dimension is the unbounded dimension for a 

160 variable. For example, a temperature variable may have dimensions of 

161 latitude, longitude and time. If one wants to add more temperature data to 

162 the NetCDF file as time progresses, then the temperature variable should 

163 have the time dimension flagged as the record dimension. 

164 

165 In addition, the NetCDF file header contains the position of the data in 

166 the file, so access can be done in an efficient manner without loading 

167 unnecessary data into memory. It uses the ``mmap`` module to create 

168 Numpy arrays mapped to the data on disk, for the same purpose. 

169 

170 Note that when `netcdf_file` is used to open a file with mmap=True 

171 (default for read-only), arrays returned by it refer to data 

172 directly on the disk. The file should not be closed, and cannot be cleanly 

173 closed when asked, if such arrays are alive. You may want to copy data arrays 

174 obtained from mmapped Netcdf file if they are to be processed after the file 

175 is closed, see the example below. 

176 

177 Examples 

178 -------- 

179 To create a NetCDF file: 

180 

181 >>> from scipy.io import netcdf_file 

182 >>> import numpy as np 

183 >>> f = netcdf_file('simple.nc', 'w') 

184 >>> f.history = 'Created for a test' 

185 >>> f.createDimension('time', 10) 

186 >>> time = f.createVariable('time', 'i', ('time',)) 

187 >>> time[:] = np.arange(10) 

188 >>> time.units = 'days since 2008-01-01' 

189 >>> f.close() 

190 

191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice 

192 of the time variable allows for the data to be set in the object, rather 

193 than letting ``arange(10)`` overwrite the ``time`` variable. 

194 

195 To read the NetCDF file we just created: 

196 

197 >>> from scipy.io import netcdf_file 

198 >>> f = netcdf_file('simple.nc', 'r') 

199 >>> print(f.history) 

200 b'Created for a test' 

201 >>> time = f.variables['time'] 

202 >>> print(time.units) 

203 b'days since 2008-01-01' 

204 >>> print(time.shape) 

205 (10,) 

206 >>> print(time[-1]) 

207 9 

208 

209 NetCDF files, when opened read-only, return arrays that refer 

210 directly to memory-mapped data on disk: 

211 

212 >>> data = time[:] 

213 

214 If the data is to be processed after the file is closed, it needs 

215 to be copied to main memory: 

216 

217 >>> data = time[:].copy() 

218 >>> del time 

219 >>> f.close() 

220 >>> data.mean() 

221 4.5 

222 

223 A NetCDF file can also be used as context manager: 

224 

225 >>> from scipy.io import netcdf_file 

226 >>> with netcdf_file('simple.nc', 'r') as f: 

227 ... print(f.history) 

228 b'Created for a test' 

229 

230 """ 

231 def __init__(self, filename, mode='r', mmap=None, version=1, 

232 maskandscale=False): 

233 """Initialize netcdf_file from fileobj (str or file-like).""" 

234 if mode not in 'rwa': 

235 raise ValueError("Mode must be either 'r', 'w' or 'a'.") 

236 

237 if hasattr(filename, 'seek'): # file-like 

238 self.fp = filename 

239 self.filename = 'None' 

240 if mmap is None: 

241 mmap = False 

242 elif mmap and not hasattr(filename, 'fileno'): 

243 raise ValueError('Cannot use file object for mmap') 

244 else: # maybe it's a string 

245 self.filename = filename 

246 omode = 'r+' if mode == 'a' else mode 

247 self.fp = open(self.filename, '%sb' % omode) 

248 if mmap is None: 

249 # Mmapped files on PyPy cannot be usually closed 

250 # before the GC runs, so it's better to use mmap=False 

251 # as the default. 

252 mmap = (not IS_PYPY) 

253 

254 if mode != 'r': 

255 # Cannot read write-only files 

256 mmap = False 

257 

258 self.use_mmap = mmap 

259 self.mode = mode 

260 self.version_byte = version 

261 self.maskandscale = maskandscale 

262 

263 self.dimensions = {} 

264 self.variables = {} 

265 

266 self._dims = [] 

267 self._recs = 0 

268 self._recsize = 0 

269 

270 self._mm = None 

271 self._mm_buf = None 

272 if self.use_mmap: 

273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ) 

274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8) 

275 

276 self._attributes = {} 

277 

278 if mode in 'ra': 

279 self._read() 

280 

281 def __setattr__(self, attr, value): 

282 # Store user defined attributes in a separate dict, 

283 # so we can save them to file later. 

284 try: 

285 self._attributes[attr] = value 

286 except AttributeError: 

287 pass 

288 self.__dict__[attr] = value 

289 

290 def close(self): 

291 """Closes the NetCDF file.""" 

292 if hasattr(self, 'fp') and not self.fp.closed: 

293 try: 

294 self.flush() 

295 finally: 

296 self.variables = {} 

297 if self._mm_buf is not None: 

298 ref = weakref.ref(self._mm_buf) 

299 self._mm_buf = None 

300 if ref() is None: 

301 # self._mm_buf is gc'd, and we can close the mmap 

302 self._mm.close() 

303 else: 

304 # we cannot close self._mm, since self._mm_buf is 

305 # alive and there may still be arrays referring to it 

306 warnings.warn(( 

307 "Cannot close a netcdf_file opened with mmap=True, when " 

308 "netcdf_variables or arrays referring to its data still exist. " 

309 "All data arrays obtained from such files refer directly to " 

310 "data on disk, and must be copied before the file can be cleanly " 

311 "closed. (See netcdf_file docstring for more information on mmap.)" 

312 ), category=RuntimeWarning) 

313 self._mm = None 

314 self.fp.close() 

315 __del__ = close 

316 

317 def __enter__(self): 

318 return self 

319 

320 def __exit__(self, type, value, traceback): 

321 self.close() 

322 

323 def createDimension(self, name, length): 

324 """ 

325 Adds a dimension to the Dimension section of the NetCDF data structure. 

326 

327 Note that this function merely adds a new dimension that the variables can 

328 reference. The values for the dimension, if desired, should be added as 

329 a variable using `createVariable`, referring to this dimension. 

330 

331 Parameters 

332 ---------- 

333 name : str 

334 Name of the dimension (Eg, 'lat' or 'time'). 

335 length : int 

336 Length of the dimension. 

337 

338 See Also 

339 -------- 

340 createVariable 

341 

342 """ 

343 if length is None and self._dims: 

344 raise ValueError("Only first dimension may be unlimited!") 

345 

346 self.dimensions[name] = length 

347 self._dims.append(name) 

348 

349 def createVariable(self, name, type, dimensions): 

350 """ 

351 Create an empty variable for the `netcdf_file` object, specifying its data 

352 type and the dimensions it uses. 

353 

354 Parameters 

355 ---------- 

356 name : str 

357 Name of the new variable. 

358 type : dtype or str 

359 Data type of the variable. 

360 dimensions : sequence of str 

361 List of the dimension names used by the variable, in the desired order. 

362 

363 Returns 

364 ------- 

365 variable : netcdf_variable 

366 The newly created ``netcdf_variable`` object. 

367 This object has also been added to the `netcdf_file` object as well. 

368 

369 See Also 

370 -------- 

371 createDimension 

372 

373 Notes 

374 ----- 

375 Any dimensions to be used by the variable should already exist in the 

376 NetCDF data structure or should be created by `createDimension` prior to 

377 creating the NetCDF variable. 

378 

379 """ 

380 shape = tuple([self.dimensions[dim] for dim in dimensions]) 

381 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy 

382 

383 type = dtype(type) 

384 typecode, size = type.char, type.itemsize 

385 if (typecode, size) not in REVERSE: 

386 raise ValueError("NetCDF 3 does not support type %s" % type) 

387 

388 data = empty(shape_, dtype=type.newbyteorder("B")) # convert to big endian always for NetCDF 3 

389 self.variables[name] = netcdf_variable( 

390 data, typecode, size, shape, dimensions, 

391 maskandscale=self.maskandscale) 

392 return self.variables[name] 

393 

394 def flush(self): 

395 """ 

396 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode. 

397 

398 See Also 

399 -------- 

400 sync : Identical function 

401 

402 """ 

403 if hasattr(self, 'mode') and self.mode in 'wa': 

404 self._write() 

405 sync = flush 

406 

407 def _write(self): 

408 self.fp.seek(0) 

409 self.fp.write(b'CDF') 

410 self.fp.write(array(self.version_byte, '>b').tobytes()) 

411 

412 # Write headers and data. 

413 self._write_numrecs() 

414 self._write_dim_array() 

415 self._write_gatt_array() 

416 self._write_var_array() 

417 

418 def _write_numrecs(self): 

419 # Get highest record count from all record variables. 

420 for var in self.variables.values(): 

421 if var.isrec and len(var.data) > self._recs: 

422 self.__dict__['_recs'] = len(var.data) 

423 self._pack_int(self._recs) 

424 

425 def _write_dim_array(self): 

426 if self.dimensions: 

427 self.fp.write(NC_DIMENSION) 

428 self._pack_int(len(self.dimensions)) 

429 for name in self._dims: 

430 self._pack_string(name) 

431 length = self.dimensions[name] 

432 self._pack_int(length or 0) # replace None with 0 for record dimension 

433 else: 

434 self.fp.write(ABSENT) 

435 

436 def _write_gatt_array(self): 

437 self._write_att_array(self._attributes) 

438 

439 def _write_att_array(self, attributes): 

440 if attributes: 

441 self.fp.write(NC_ATTRIBUTE) 

442 self._pack_int(len(attributes)) 

443 for name, values in attributes.items(): 

444 self._pack_string(name) 

445 self._write_att_values(values) 

446 else: 

447 self.fp.write(ABSENT) 

448 

449 def _write_var_array(self): 

450 if self.variables: 

451 self.fp.write(NC_VARIABLE) 

452 self._pack_int(len(self.variables)) 

453 

454 # Sort variable names non-recs first, then recs. 

455 def sortkey(n): 

456 v = self.variables[n] 

457 if v.isrec: 

458 return (-1,) 

459 return v._shape 

460 variables = sorted(self.variables, key=sortkey, reverse=True) 

461 

462 # Set the metadata for all variables. 

463 for name in variables: 

464 self._write_var_metadata(name) 

465 # Now that we have the metadata, we know the vsize of 

466 # each record variable, so we can calculate recsize. 

467 self.__dict__['_recsize'] = sum([ 

468 var._vsize for var in self.variables.values() 

469 if var.isrec]) 

470 # Set the data for all variables. 

471 for name in variables: 

472 self._write_var_data(name) 

473 else: 

474 self.fp.write(ABSENT) 

475 

476 def _write_var_metadata(self, name): 

477 var = self.variables[name] 

478 

479 self._pack_string(name) 

480 self._pack_int(len(var.dimensions)) 

481 for dimname in var.dimensions: 

482 dimid = self._dims.index(dimname) 

483 self._pack_int(dimid) 

484 

485 self._write_att_array(var._attributes) 

486 

487 nc_type = REVERSE[var.typecode(), var.itemsize()] 

488 self.fp.write(nc_type) 

489 

490 if not var.isrec: 

491 vsize = var.data.size * var.data.itemsize 

492 vsize += -vsize % 4 

493 else: # record variable 

494 try: 

495 vsize = var.data[0].size * var.data.itemsize 

496 except IndexError: 

497 vsize = 0 

498 rec_vars = len([v for v in self.variables.values() 

499 if v.isrec]) 

500 if rec_vars > 1: 

501 vsize += -vsize % 4 

502 self.variables[name].__dict__['_vsize'] = vsize 

503 self._pack_int(vsize) 

504 

505 # Pack a bogus begin, and set the real value later. 

506 self.variables[name].__dict__['_begin'] = self.fp.tell() 

507 self._pack_begin(0) 

508 

509 def _write_var_data(self, name): 

510 var = self.variables[name] 

511 

512 # Set begin in file header. 

513 the_beguine = self.fp.tell() 

514 self.fp.seek(var._begin) 

515 self._pack_begin(the_beguine) 

516 self.fp.seek(the_beguine) 

517 

518 # Write data. 

519 if not var.isrec: 

520 self.fp.write(var.data.tobytes()) 

521 count = var.data.size * var.data.itemsize 

522 self._write_var_padding(var, var._vsize - count) 

523 else: # record variable 

524 # Handle rec vars with shape[0] < nrecs. 

525 if self._recs > len(var.data): 

526 shape = (self._recs,) + var.data.shape[1:] 

527 # Resize in-place does not always work since 

528 # the array might not be single-segment 

529 try: 

530 var.data.resize(shape) 

531 except ValueError: 

532 var.__dict__['data'] = np.resize(var.data, shape).astype(var.data.dtype) 

533 

534 pos0 = pos = self.fp.tell() 

535 for rec in var.data: 

536 # Apparently scalars cannot be converted to big endian. If we 

537 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype 

538 # will remain as ``=i4``. 

539 if not rec.shape and (rec.dtype.byteorder == '<' or 

540 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): 

541 rec = rec.byteswap() 

542 self.fp.write(rec.tobytes()) 

543 # Padding 

544 count = rec.size * rec.itemsize 

545 self._write_var_padding(var, var._vsize - count) 

546 pos += self._recsize 

547 self.fp.seek(pos) 

548 self.fp.seek(pos0 + var._vsize) 

549 

550 def _write_var_padding(self, var, size): 

551 encoded_fill_value = var._get_encoded_fill_value() 

552 num_fills = size // len(encoded_fill_value) 

553 self.fp.write(encoded_fill_value * num_fills) 

554 

555 def _write_att_values(self, values): 

556 if hasattr(values, 'dtype'): 

557 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize] 

558 else: 

559 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)] 

560 

561 # bytes index into scalars in py3k. Check for "string" types 

562 if isinstance(values, (str, bytes)): 

563 sample = values 

564 else: 

565 try: 

566 sample = values[0] # subscriptable? 

567 except TypeError: 

568 sample = values # scalar 

569 

570 for class_, nc_type in types: 

571 if isinstance(sample, class_): 

572 break 

573 

574 typecode, size = TYPEMAP[nc_type] 

575 dtype_ = '>%s' % typecode 

576 # asarray() dies with bytes and '>c' in py3k. Change to 'S' 

577 dtype_ = 'S' if dtype_ == '>c' else dtype_ 

578 

579 values = asarray(values, dtype=dtype_) 

580 

581 self.fp.write(nc_type) 

582 

583 if values.dtype.char == 'S': 

584 nelems = values.itemsize 

585 else: 

586 nelems = values.size 

587 self._pack_int(nelems) 

588 

589 if not values.shape and (values.dtype.byteorder == '<' or 

590 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): 

591 values = values.byteswap() 

592 self.fp.write(values.tobytes()) 

593 count = values.size * values.itemsize 

594 self.fp.write(b'\x00' * (-count % 4)) # pad 

595 

596 def _read(self): 

597 # Check magic bytes and version 

598 magic = self.fp.read(3) 

599 if not magic == b'CDF': 

600 raise TypeError("Error: %s is not a valid NetCDF 3 file" % 

601 self.filename) 

602 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0] 

603 

604 # Read file headers and set data. 

605 self._read_numrecs() 

606 self._read_dim_array() 

607 self._read_gatt_array() 

608 self._read_var_array() 

609 

610 def _read_numrecs(self): 

611 self.__dict__['_recs'] = self._unpack_int() 

612 

613 def _read_dim_array(self): 

614 header = self.fp.read(4) 

615 if header not in [ZERO, NC_DIMENSION]: 

616 raise ValueError("Unexpected header.") 

617 count = self._unpack_int() 

618 

619 for dim in range(count): 

620 name = self._unpack_string().decode('latin1') 

621 length = self._unpack_int() or None # None for record dimension 

622 self.dimensions[name] = length 

623 self._dims.append(name) # preserve order 

624 

625 def _read_gatt_array(self): 

626 for k, v in self._read_att_array().items(): 

627 self.__setattr__(k, v) 

628 

629 def _read_att_array(self): 

630 header = self.fp.read(4) 

631 if header not in [ZERO, NC_ATTRIBUTE]: 

632 raise ValueError("Unexpected header.") 

633 count = self._unpack_int() 

634 

635 attributes = {} 

636 for attr in range(count): 

637 name = self._unpack_string().decode('latin1') 

638 attributes[name] = self._read_att_values() 

639 return attributes 

640 

641 def _read_var_array(self): 

642 header = self.fp.read(4) 

643 if header not in [ZERO, NC_VARIABLE]: 

644 raise ValueError("Unexpected header.") 

645 

646 begin = 0 

647 dtypes = {'names': [], 'formats': []} 

648 rec_vars = [] 

649 count = self._unpack_int() 

650 for var in range(count): 

651 (name, dimensions, shape, attributes, 

652 typecode, size, dtype_, begin_, vsize) = self._read_var() 

653 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html 

654 # Note that vsize is the product of the dimension lengths 

655 # (omitting the record dimension) and the number of bytes 

656 # per value (determined from the type), increased to the 

657 # next multiple of 4, for each variable. If a record 

658 # variable, this is the amount of space per record. The 

659 # netCDF "record size" is calculated as the sum of the 

660 # vsize's of all the record variables. 

661 # 

662 # The vsize field is actually redundant, because its value 

663 # may be computed from other information in the header. The 

664 # 32-bit vsize field is not large enough to contain the size 

665 # of variables that require more than 2^32 - 4 bytes, so 

666 # 2^32 - 1 is used in the vsize field for such variables. 

667 if shape and shape[0] is None: # record variable 

668 rec_vars.append(name) 

669 # The netCDF "record size" is calculated as the sum of 

670 # the vsize's of all the record variables. 

671 self.__dict__['_recsize'] += vsize 

672 if begin == 0: 

673 begin = begin_ 

674 dtypes['names'].append(name) 

675 dtypes['formats'].append(str(shape[1:]) + dtype_) 

676 

677 # Handle padding with a virtual variable. 

678 if typecode in 'bch': 

679 actual_size = reduce(mul, (1,) + shape[1:]) * size 

680 padding = -actual_size % 4 

681 if padding: 

682 dtypes['names'].append('_padding_%d' % var) 

683 dtypes['formats'].append('(%d,)>b' % padding) 

684 

685 # Data will be set later. 

686 data = None 

687 else: # not a record variable 

688 # Calculate size to avoid problems with vsize (above) 

689 a_size = reduce(mul, shape, 1) * size 

690 if self.use_mmap: 

691 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_) 

692 data.shape = shape 

693 else: 

694 pos = self.fp.tell() 

695 self.fp.seek(begin_) 

696 data = frombuffer(self.fp.read(a_size), dtype=dtype_ 

697 ).copy() 

698 data.shape = shape 

699 self.fp.seek(pos) 

700 

701 # Add variable. 

702 self.variables[name] = netcdf_variable( 

703 data, typecode, size, shape, dimensions, attributes, 

704 maskandscale=self.maskandscale) 

705 

706 if rec_vars: 

707 # Remove padding when only one record variable. 

708 if len(rec_vars) == 1: 

709 dtypes['names'] = dtypes['names'][:1] 

710 dtypes['formats'] = dtypes['formats'][:1] 

711 

712 # Build rec array. 

713 if self.use_mmap: 

714 rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes) 

715 rec_array.shape = (self._recs,) 

716 else: 

717 pos = self.fp.tell() 

718 self.fp.seek(begin) 

719 rec_array = frombuffer(self.fp.read(self._recs*self._recsize), 

720 dtype=dtypes).copy() 

721 rec_array.shape = (self._recs,) 

722 self.fp.seek(pos) 

723 

724 for var in rec_vars: 

725 self.variables[var].__dict__['data'] = rec_array[var] 

726 

727 def _read_var(self): 

728 name = self._unpack_string().decode('latin1') 

729 dimensions = [] 

730 shape = [] 

731 dims = self._unpack_int() 

732 

733 for i in range(dims): 

734 dimid = self._unpack_int() 

735 dimname = self._dims[dimid] 

736 dimensions.append(dimname) 

737 dim = self.dimensions[dimname] 

738 shape.append(dim) 

739 dimensions = tuple(dimensions) 

740 shape = tuple(shape) 

741 

742 attributes = self._read_att_array() 

743 nc_type = self.fp.read(4) 

744 vsize = self._unpack_int() 

745 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() 

746 

747 typecode, size = TYPEMAP[nc_type] 

748 dtype_ = '>%s' % typecode 

749 

750 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize 

751 

752 def _read_att_values(self): 

753 nc_type = self.fp.read(4) 

754 n = self._unpack_int() 

755 

756 typecode, size = TYPEMAP[nc_type] 

757 

758 count = n*size 

759 values = self.fp.read(int(count)) 

760 self.fp.read(-count % 4) # read padding 

761 

762 if typecode != 'c': 

763 values = frombuffer(values, dtype='>%s' % typecode).copy() 

764 if values.shape == (1,): 

765 values = values[0] 

766 else: 

767 values = values.rstrip(b'\x00') 

768 return values 

769 

770 def _pack_begin(self, begin): 

771 if self.version_byte == 1: 

772 self._pack_int(begin) 

773 elif self.version_byte == 2: 

774 self._pack_int64(begin) 

775 

776 def _pack_int(self, value): 

777 self.fp.write(array(value, '>i').tobytes()) 

778 _pack_int32 = _pack_int 

779 

780 def _unpack_int(self): 

781 return int(frombuffer(self.fp.read(4), '>i')[0]) 

782 _unpack_int32 = _unpack_int 

783 

784 def _pack_int64(self, value): 

785 self.fp.write(array(value, '>q').tobytes()) 

786 

787 def _unpack_int64(self): 

788 return frombuffer(self.fp.read(8), '>q')[0] 

789 

790 def _pack_string(self, s): 

791 count = len(s) 

792 self._pack_int(count) 

793 self.fp.write(s.encode('latin1')) 

794 self.fp.write(b'\x00' * (-count % 4)) # pad 

795 

796 def _unpack_string(self): 

797 count = self._unpack_int() 

798 s = self.fp.read(count).rstrip(b'\x00') 

799 self.fp.read(-count % 4) # read padding 

800 return s 

801 

802 

803class netcdf_variable: 

804 """ 

805 A data object for netcdf files. 

806 

807 `netcdf_variable` objects are constructed by calling the method 

808 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable` 

809 objects behave much like array objects defined in numpy, except that their 

810 data resides in a file. Data is read by indexing and written by assigning 

811 to an indexed subset; the entire array can be accessed by the index ``[:]`` 

812 or (for scalars) by using the methods `getValue` and `assignValue`. 

813 `netcdf_variable` objects also have attribute `shape` with the same meaning 

814 as for arrays, but the shape cannot be modified. There is another read-only 

815 attribute `dimensions`, whose value is the tuple of dimension names. 

816 

817 All other attributes correspond to variable attributes defined in 

818 the NetCDF file. Variable attributes are created by assigning to an 

819 attribute of the `netcdf_variable` object. 

820 

821 Parameters 

822 ---------- 

823 data : array_like 

824 The data array that holds the values for the variable. 

825 Typically, this is initialized as empty, but with the proper shape. 

826 typecode : dtype character code 

827 Desired data-type for the data array. 

828 size : int 

829 Desired element size for the data array. 

830 shape : sequence of ints 

831 The shape of the array. This should match the lengths of the 

832 variable's dimensions. 

833 dimensions : sequence of strings 

834 The names of the dimensions used by the variable. Must be in the 

835 same order of the dimension lengths given by `shape`. 

836 attributes : dict, optional 

837 Attribute values (any type) keyed by string names. These attributes 

838 become attributes for the netcdf_variable object. 

839 maskandscale : bool, optional 

840 Whether to automatically scale and/or mask data based on attributes. 

841 Default is False. 

842 

843 

844 Attributes 

845 ---------- 

846 dimensions : list of str 

847 List of names of dimensions used by the variable object. 

848 isrec, shape 

849 Properties 

850 

851 See also 

852 -------- 

853 isrec, shape 

854 

855 """ 

856 def __init__(self, data, typecode, size, shape, dimensions, 

857 attributes=None, 

858 maskandscale=False): 

859 self.data = data 

860 self._typecode = typecode 

861 self._size = size 

862 self._shape = shape 

863 self.dimensions = dimensions 

864 self.maskandscale = maskandscale 

865 

866 self._attributes = attributes or {} 

867 for k, v in self._attributes.items(): 

868 self.__dict__[k] = v 

869 

870 def __setattr__(self, attr, value): 

871 # Store user defined attributes in a separate dict, 

872 # so we can save them to file later. 

873 try: 

874 self._attributes[attr] = value 

875 except AttributeError: 

876 pass 

877 self.__dict__[attr] = value 

878 

879 def isrec(self): 

880 """Returns whether the variable has a record dimension or not. 

881 

882 A record dimension is a dimension along which additional data could be 

883 easily appended in the netcdf data structure without much rewriting of 

884 the data file. This attribute is a read-only property of the 

885 `netcdf_variable`. 

886 

887 """ 

888 return bool(self.data.shape) and not self._shape[0] 

889 isrec = property(isrec) 

890 

891 def shape(self): 

892 """Returns the shape tuple of the data variable. 

893 

894 This is a read-only attribute and can not be modified in the 

895 same manner of other numpy arrays. 

896 """ 

897 return self.data.shape 

898 shape = property(shape) 

899 

900 def getValue(self): 

901 """ 

902 Retrieve a scalar value from a `netcdf_variable` of length one. 

903 

904 Raises 

905 ------ 

906 ValueError 

907 If the netcdf variable is an array of length greater than one, 

908 this exception will be raised. 

909 

910 """ 

911 return self.data.item() 

912 

913 def assignValue(self, value): 

914 """ 

915 Assign a scalar value to a `netcdf_variable` of length one. 

916 

917 Parameters 

918 ---------- 

919 value : scalar 

920 Scalar value (of compatible type) to assign to a length-one netcdf 

921 variable. This value will be written to file. 

922 

923 Raises 

924 ------ 

925 ValueError 

926 If the input is not a scalar, or if the destination is not a length-one 

927 netcdf variable. 

928 

929 """ 

930 if not self.data.flags.writeable: 

931 # Work-around for a bug in NumPy. Calling itemset() on a read-only 

932 # memory-mapped array causes a seg. fault. 

933 # See NumPy ticket #1622, and SciPy ticket #1202. 

934 # This check for `writeable` can be removed when the oldest version 

935 # of NumPy still supported by scipy contains the fix for #1622. 

936 raise RuntimeError("variable is not writeable") 

937 

938 self.data[:] = value 

939 

940 def typecode(self): 

941 """ 

942 Return the typecode of the variable. 

943 

944 Returns 

945 ------- 

946 typecode : char 

947 The character typecode of the variable (e.g., 'i' for int). 

948 

949 """ 

950 return self._typecode 

951 

952 def itemsize(self): 

953 """ 

954 Return the itemsize of the variable. 

955 

956 Returns 

957 ------- 

958 itemsize : int 

959 The element size of the variable (e.g., 8 for float64). 

960 

961 """ 

962 return self._size 

963 

964 def __getitem__(self, index): 

965 if not self.maskandscale: 

966 return self.data[index] 

967 

968 data = self.data[index].copy() 

969 missing_value = self._get_missing_value() 

970 data = self._apply_missing_value(data, missing_value) 

971 scale_factor = self._attributes.get('scale_factor') 

972 add_offset = self._attributes.get('add_offset') 

973 if add_offset is not None or scale_factor is not None: 

974 data = data.astype(np.float64) 

975 if scale_factor is not None: 

976 data = data * scale_factor 

977 if add_offset is not None: 

978 data += add_offset 

979 

980 return data 

981 

982 def __setitem__(self, index, data): 

983 if self.maskandscale: 

984 missing_value = ( 

985 self._get_missing_value() or 

986 getattr(data, 'fill_value', 999999)) 

987 self._attributes.setdefault('missing_value', missing_value) 

988 self._attributes.setdefault('_FillValue', missing_value) 

989 data = ((data - self._attributes.get('add_offset', 0.0)) / 

990 self._attributes.get('scale_factor', 1.0)) 

991 data = np.ma.asarray(data).filled(missing_value) 

992 if self._typecode not in 'fd' and data.dtype.kind == 'f': 

993 data = np.round(data) 

994 

995 # Expand data for record vars? 

996 if self.isrec: 

997 if isinstance(index, tuple): 

998 rec_index = index[0] 

999 else: 

1000 rec_index = index 

1001 if isinstance(rec_index, slice): 

1002 recs = (rec_index.start or 0) + len(data) 

1003 else: 

1004 recs = rec_index + 1 

1005 if recs > len(self.data): 

1006 shape = (recs,) + self._shape[1:] 

1007 # Resize in-place does not always work since 

1008 # the array might not be single-segment 

1009 try: 

1010 self.data.resize(shape) 

1011 except ValueError: 

1012 self.__dict__['data'] = np.resize(self.data, shape).astype(self.data.dtype) 

1013 self.data[index] = data 

1014 

1015 def _default_encoded_fill_value(self): 

1016 """ 

1017 The default encoded fill-value for this Variable's data type. 

1018 """ 

1019 nc_type = REVERSE[self.typecode(), self.itemsize()] 

1020 return FILLMAP[nc_type] 

1021 

1022 def _get_encoded_fill_value(self): 

1023 """ 

1024 Returns the encoded fill value for this variable as bytes. 

1025 

1026 This is taken from either the _FillValue attribute, or the default fill 

1027 value for this variable's data type. 

1028 """ 

1029 if '_FillValue' in self._attributes: 

1030 fill_value = np.array(self._attributes['_FillValue'], 

1031 dtype=self.data.dtype).tobytes() 

1032 if len(fill_value) == self.itemsize(): 

1033 return fill_value 

1034 else: 

1035 return self._default_encoded_fill_value() 

1036 else: 

1037 return self._default_encoded_fill_value() 

1038 

1039 def _get_missing_value(self): 

1040 """ 

1041 Returns the value denoting "no data" for this variable. 

1042 

1043 If this variable does not have a missing/fill value, returns None. 

1044 

1045 If both _FillValue and missing_value are given, give precedence to 

1046 _FillValue. The netCDF standard gives special meaning to _FillValue; 

1047 missing_value is just used for compatibility with old datasets. 

1048 """ 

1049 

1050 if '_FillValue' in self._attributes: 

1051 missing_value = self._attributes['_FillValue'] 

1052 elif 'missing_value' in self._attributes: 

1053 missing_value = self._attributes['missing_value'] 

1054 else: 

1055 missing_value = None 

1056 

1057 return missing_value 

1058 

1059 @staticmethod 

1060 def _apply_missing_value(data, missing_value): 

1061 """ 

1062 Applies the given missing value to the data array. 

1063 

1064 Returns a numpy.ma array, with any value equal to missing_value masked 

1065 out (unless missing_value is None, in which case the original array is 

1066 returned). 

1067 """ 

1068 

1069 if missing_value is None: 

1070 newdata = data 

1071 else: 

1072 try: 

1073 missing_value_isnan = np.isnan(missing_value) 

1074 except (TypeError, NotImplementedError): 

1075 # some data types (e.g., characters) cannot be tested for NaN 

1076 missing_value_isnan = False 

1077 

1078 if missing_value_isnan: 

1079 mymask = np.isnan(data) 

1080 else: 

1081 mymask = (data == missing_value) 

1082 

1083 newdata = np.ma.masked_where(mymask, data) 

1084 

1085 return newdata 

1086 

1087 

1088NetCDFFile = netcdf_file 

1089NetCDFVariable = netcdf_variable