Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 18%

484 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-22 06:44 +0000

1""" 

2NetCDF reader/writer module. 

3 

4This module is used to read and create NetCDF files. NetCDF files are 

5accessed through the `netcdf_file` object. Data written to and from NetCDF 

6files are contained in `netcdf_variable` objects. Attributes are given 

7as member variables of the `netcdf_file` and `netcdf_variable` objects. 

8 

9This module implements the Scientific.IO.NetCDF API to read and create 

10NetCDF files. The same API is also used in the PyNIO and pynetcdf 

11modules, allowing these modules to be used interchangeably when working 

12with NetCDF files. 

13 

14Only NetCDF3 is supported here; for NetCDF4 see 

15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__, 

16which has a similar API. 

17 

18""" 

19 

20# TODO: 

21# * properly implement ``_FillValue``. 

22# * fix character variables. 

23# * implement PAGESIZE for Python 2.6? 

24 

25# The Scientific.IO.NetCDF API allows attributes to be added directly to 

26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate 

27# between user-set attributes and instance attributes, user-set attributes 

28# are automatically stored in the ``_attributes`` attribute by overloading 

29#``__setattr__``. This is the reason why the code sometimes uses 

30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``; 

31# otherwise the key would be inserted into userspace attributes. 

32 

33 

34__all__ = ['netcdf_file', 'netcdf_variable'] 

35 

36 

37import warnings 

38import weakref 

39from operator import mul 

40from platform import python_implementation 

41 

42import mmap as mm 

43 

44import numpy as np 

45from numpy import frombuffer, dtype, empty, array, asarray 

46from numpy import little_endian as LITTLE_ENDIAN 

47from functools import reduce 

48 

49 

50IS_PYPY = python_implementation() == 'PyPy' 

51 

52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00' 

53ZERO = b'\x00\x00\x00\x00' 

54NC_BYTE = b'\x00\x00\x00\x01' 

55NC_CHAR = b'\x00\x00\x00\x02' 

56NC_SHORT = b'\x00\x00\x00\x03' 

57NC_INT = b'\x00\x00\x00\x04' 

58NC_FLOAT = b'\x00\x00\x00\x05' 

59NC_DOUBLE = b'\x00\x00\x00\x06' 

60NC_DIMENSION = b'\x00\x00\x00\n' 

61NC_VARIABLE = b'\x00\x00\x00\x0b' 

62NC_ATTRIBUTE = b'\x00\x00\x00\x0c' 

63FILL_BYTE = b'\x81' 

64FILL_CHAR = b'\x00' 

65FILL_SHORT = b'\x80\x01' 

66FILL_INT = b'\x80\x00\x00\x01' 

67FILL_FLOAT = b'\x7C\xF0\x00\x00' 

68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00' 

69 

70TYPEMAP = {NC_BYTE: ('b', 1), 

71 NC_CHAR: ('c', 1), 

72 NC_SHORT: ('h', 2), 

73 NC_INT: ('i', 4), 

74 NC_FLOAT: ('f', 4), 

75 NC_DOUBLE: ('d', 8)} 

76 

77FILLMAP = {NC_BYTE: FILL_BYTE, 

78 NC_CHAR: FILL_CHAR, 

79 NC_SHORT: FILL_SHORT, 

80 NC_INT: FILL_INT, 

81 NC_FLOAT: FILL_FLOAT, 

82 NC_DOUBLE: FILL_DOUBLE} 

83 

84REVERSE = {('b', 1): NC_BYTE, 

85 ('B', 1): NC_CHAR, 

86 ('c', 1): NC_CHAR, 

87 ('h', 2): NC_SHORT, 

88 ('i', 4): NC_INT, 

89 ('f', 4): NC_FLOAT, 

90 ('d', 8): NC_DOUBLE, 

91 

92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char, 

93 # used when getting the types from generic attributes. 

94 ('l', 4): NC_INT, 

95 ('S', 1): NC_CHAR} 

96 

97 

98class netcdf_file: 

99 """ 

100 A file object for NetCDF data. 

101 

102 A `netcdf_file` object has two standard attributes: `dimensions` and 

103 `variables`. The values of both are dictionaries, mapping dimension 

104 names to their associated lengths and variable names to variables, 

105 respectively. Application programs should never modify these 

106 dictionaries. 

107 

108 All other attributes correspond to global attributes defined in the 

109 NetCDF file. Global file attributes are created by assigning to an 

110 attribute of the `netcdf_file` object. 

111 

112 Parameters 

113 ---------- 

114 filename : string or file-like 

115 string -> filename 

116 mode : {'r', 'w', 'a'}, optional 

117 read-write-append mode, default is 'r' 

118 mmap : None or bool, optional 

119 Whether to mmap `filename` when reading. Default is True 

120 when `filename` is a file name, False when `filename` is a 

121 file-like object. Note that when mmap is in use, data arrays 

122 returned refer directly to the mmapped data on disk, and the 

123 file cannot be closed as long as references to it exist. 

124 version : {1, 2}, optional 

125 version of netcdf to read / write, where 1 means *Classic 

126 format* and 2 means *64-bit offset format*. Default is 1. See 

127 `here <https://docs.unidata.ucar.edu/nug/current/netcdf_introduction.html#select_format>`__ 

128 for more info. 

129 maskandscale : bool, optional 

130 Whether to automatically scale and/or mask data based on attributes. 

131 Default is False. 

132 

133 Notes 

134 ----- 

135 The major advantage of this module over other modules is that it doesn't 

136 require the code to be linked to the NetCDF libraries. This module is 

137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_. 

138 

139 NetCDF files are a self-describing binary data format. The file contains 

140 metadata that describes the dimensions and variables in the file. More 

141 details about NetCDF files can be found `here 

142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There 

143 are three main sections to a NetCDF data structure: 

144 

145 1. Dimensions 

146 2. Variables 

147 3. Attributes 

148 

149 The dimensions section records the name and length of each dimension used 

150 by the variables. The variables would then indicate which dimensions it 

151 uses and any attributes such as data units, along with containing the data 

152 values for the variable. It is good practice to include a 

153 variable that is the same name as a dimension to provide the values for 

154 that axes. Lastly, the attributes section would contain additional 

155 information such as the name of the file creator or the instrument used to 

156 collect the data. 

157 

158 When writing data to a NetCDF file, there is often the need to indicate the 

159 'record dimension'. A record dimension is the unbounded dimension for a 

160 variable. For example, a temperature variable may have dimensions of 

161 latitude, longitude and time. If one wants to add more temperature data to 

162 the NetCDF file as time progresses, then the temperature variable should 

163 have the time dimension flagged as the record dimension. 

164 

165 In addition, the NetCDF file header contains the position of the data in 

166 the file, so access can be done in an efficient manner without loading 

167 unnecessary data into memory. It uses the ``mmap`` module to create 

168 Numpy arrays mapped to the data on disk, for the same purpose. 

169 

170 Note that when `netcdf_file` is used to open a file with mmap=True 

171 (default for read-only), arrays returned by it refer to data 

172 directly on the disk. The file should not be closed, and cannot be cleanly 

173 closed when asked, if such arrays are alive. You may want to copy data arrays 

174 obtained from mmapped Netcdf file if they are to be processed after the file 

175 is closed, see the example below. 

176 

177 Examples 

178 -------- 

179 To create a NetCDF file: 

180 

181 >>> from scipy.io import netcdf_file 

182 >>> import numpy as np 

183 >>> f = netcdf_file('simple.nc', 'w') 

184 >>> f.history = 'Created for a test' 

185 >>> f.createDimension('time', 10) 

186 >>> time = f.createVariable('time', 'i', ('time',)) 

187 >>> time[:] = np.arange(10) 

188 >>> time.units = 'days since 2008-01-01' 

189 >>> f.close() 

190 

191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice 

192 of the time variable allows for the data to be set in the object, rather 

193 than letting ``arange(10)`` overwrite the ``time`` variable. 

194 

195 To read the NetCDF file we just created: 

196 

197 >>> from scipy.io import netcdf_file 

198 >>> f = netcdf_file('simple.nc', 'r') 

199 >>> print(f.history) 

200 b'Created for a test' 

201 >>> time = f.variables['time'] 

202 >>> print(time.units) 

203 b'days since 2008-01-01' 

204 >>> print(time.shape) 

205 (10,) 

206 >>> print(time[-1]) 

207 9 

208 

209 NetCDF files, when opened read-only, return arrays that refer 

210 directly to memory-mapped data on disk: 

211 

212 >>> data = time[:] 

213 

214 If the data is to be processed after the file is closed, it needs 

215 to be copied to main memory: 

216 

217 >>> data = time[:].copy() 

218 >>> del time 

219 >>> f.close() 

220 >>> data.mean() 

221 4.5 

222 

223 A NetCDF file can also be used as context manager: 

224 

225 >>> from scipy.io import netcdf_file 

226 >>> with netcdf_file('simple.nc', 'r') as f: 

227 ... print(f.history) 

228 b'Created for a test' 

229 

230 """ 

231 def __init__(self, filename, mode='r', mmap=None, version=1, 

232 maskandscale=False): 

233 """Initialize netcdf_file from fileobj (str or file-like).""" 

234 if mode not in 'rwa': 

235 raise ValueError("Mode must be either 'r', 'w' or 'a'.") 

236 

237 if hasattr(filename, 'seek'): # file-like 

238 self.fp = filename 

239 self.filename = 'None' 

240 if mmap is None: 

241 mmap = False 

242 elif mmap and not hasattr(filename, 'fileno'): 

243 raise ValueError('Cannot use file object for mmap') 

244 else: # maybe it's a string 

245 self.filename = filename 

246 omode = 'r+' if mode == 'a' else mode 

247 self.fp = open(self.filename, '%sb' % omode) 

248 if mmap is None: 

249 # Mmapped files on PyPy cannot be usually closed 

250 # before the GC runs, so it's better to use mmap=False 

251 # as the default. 

252 mmap = (not IS_PYPY) 

253 

254 if mode != 'r': 

255 # Cannot read write-only files 

256 mmap = False 

257 

258 self.use_mmap = mmap 

259 self.mode = mode 

260 self.version_byte = version 

261 self.maskandscale = maskandscale 

262 

263 self.dimensions = {} 

264 self.variables = {} 

265 

266 self._dims = [] 

267 self._recs = 0 

268 self._recsize = 0 

269 

270 self._mm = None 

271 self._mm_buf = None 

272 if self.use_mmap: 

273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ) 

274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8) 

275 

276 self._attributes = {} 

277 

278 if mode in 'ra': 

279 self._read() 

280 

281 def __setattr__(self, attr, value): 

282 # Store user defined attributes in a separate dict, 

283 # so we can save them to file later. 

284 try: 

285 self._attributes[attr] = value 

286 except AttributeError: 

287 pass 

288 self.__dict__[attr] = value 

289 

290 def close(self): 

291 """Closes the NetCDF file.""" 

292 if hasattr(self, 'fp') and not self.fp.closed: 

293 try: 

294 self.flush() 

295 finally: 

296 self.variables = {} 

297 if self._mm_buf is not None: 

298 ref = weakref.ref(self._mm_buf) 

299 self._mm_buf = None 

300 if ref() is None: 

301 # self._mm_buf is gc'd, and we can close the mmap 

302 self._mm.close() 

303 else: 

304 # we cannot close self._mm, since self._mm_buf is 

305 # alive and there may still be arrays referring to it 

306 warnings.warn( 

307 "Cannot close a netcdf_file opened with mmap=True, when " 

308 "netcdf_variables or arrays referring to its data still " 

309 "exist. All data arrays obtained from such files refer " 

310 "directly to data on disk, and must be copied before the " 

311 "file can be cleanly closed. " 

312 "(See netcdf_file docstring for more information on mmap.)", 

313 category=RuntimeWarning, stacklevel=2, 

314 ) 

315 self._mm = None 

316 self.fp.close() 

317 __del__ = close 

318 

319 def __enter__(self): 

320 return self 

321 

322 def __exit__(self, type, value, traceback): 

323 self.close() 

324 

325 def createDimension(self, name, length): 

326 """ 

327 Adds a dimension to the Dimension section of the NetCDF data structure. 

328 

329 Note that this function merely adds a new dimension that the variables can 

330 reference. The values for the dimension, if desired, should be added as 

331 a variable using `createVariable`, referring to this dimension. 

332 

333 Parameters 

334 ---------- 

335 name : str 

336 Name of the dimension (Eg, 'lat' or 'time'). 

337 length : int 

338 Length of the dimension. 

339 

340 See Also 

341 -------- 

342 createVariable 

343 

344 """ 

345 if length is None and self._dims: 

346 raise ValueError("Only first dimension may be unlimited!") 

347 

348 self.dimensions[name] = length 

349 self._dims.append(name) 

350 

351 def createVariable(self, name, type, dimensions): 

352 """ 

353 Create an empty variable for the `netcdf_file` object, specifying its data 

354 type and the dimensions it uses. 

355 

356 Parameters 

357 ---------- 

358 name : str 

359 Name of the new variable. 

360 type : dtype or str 

361 Data type of the variable. 

362 dimensions : sequence of str 

363 List of the dimension names used by the variable, in the desired order. 

364 

365 Returns 

366 ------- 

367 variable : netcdf_variable 

368 The newly created ``netcdf_variable`` object. 

369 This object has also been added to the `netcdf_file` object as well. 

370 

371 See Also 

372 -------- 

373 createDimension 

374 

375 Notes 

376 ----- 

377 Any dimensions to be used by the variable should already exist in the 

378 NetCDF data structure or should be created by `createDimension` prior to 

379 creating the NetCDF variable. 

380 

381 """ 

382 shape = tuple([self.dimensions[dim] for dim in dimensions]) 

383 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy 

384 

385 type = dtype(type) 

386 typecode, size = type.char, type.itemsize 

387 if (typecode, size) not in REVERSE: 

388 raise ValueError("NetCDF 3 does not support type %s" % type) 

389 

390 # convert to big endian always for NetCDF 3 

391 data = empty(shape_, dtype=type.newbyteorder("B")) 

392 self.variables[name] = netcdf_variable( 

393 data, typecode, size, shape, dimensions, 

394 maskandscale=self.maskandscale) 

395 return self.variables[name] 

396 

397 def flush(self): 

398 """ 

399 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode. 

400 

401 See Also 

402 -------- 

403 sync : Identical function 

404 

405 """ 

406 if hasattr(self, 'mode') and self.mode in 'wa': 

407 self._write() 

408 sync = flush 

409 

410 def _write(self): 

411 self.fp.seek(0) 

412 self.fp.write(b'CDF') 

413 self.fp.write(array(self.version_byte, '>b').tobytes()) 

414 

415 # Write headers and data. 

416 self._write_numrecs() 

417 self._write_dim_array() 

418 self._write_gatt_array() 

419 self._write_var_array() 

420 

421 def _write_numrecs(self): 

422 # Get highest record count from all record variables. 

423 for var in self.variables.values(): 

424 if var.isrec and len(var.data) > self._recs: 

425 self.__dict__['_recs'] = len(var.data) 

426 self._pack_int(self._recs) 

427 

428 def _write_dim_array(self): 

429 if self.dimensions: 

430 self.fp.write(NC_DIMENSION) 

431 self._pack_int(len(self.dimensions)) 

432 for name in self._dims: 

433 self._pack_string(name) 

434 length = self.dimensions[name] 

435 self._pack_int(length or 0) # replace None with 0 for record dimension 

436 else: 

437 self.fp.write(ABSENT) 

438 

439 def _write_gatt_array(self): 

440 self._write_att_array(self._attributes) 

441 

442 def _write_att_array(self, attributes): 

443 if attributes: 

444 self.fp.write(NC_ATTRIBUTE) 

445 self._pack_int(len(attributes)) 

446 for name, values in attributes.items(): 

447 self._pack_string(name) 

448 self._write_att_values(values) 

449 else: 

450 self.fp.write(ABSENT) 

451 

452 def _write_var_array(self): 

453 if self.variables: 

454 self.fp.write(NC_VARIABLE) 

455 self._pack_int(len(self.variables)) 

456 

457 # Sort variable names non-recs first, then recs. 

458 def sortkey(n): 

459 v = self.variables[n] 

460 if v.isrec: 

461 return (-1,) 

462 return v._shape 

463 variables = sorted(self.variables, key=sortkey, reverse=True) 

464 

465 # Set the metadata for all variables. 

466 for name in variables: 

467 self._write_var_metadata(name) 

468 # Now that we have the metadata, we know the vsize of 

469 # each record variable, so we can calculate recsize. 

470 self.__dict__['_recsize'] = sum([ 

471 var._vsize for var in self.variables.values() 

472 if var.isrec]) 

473 # Set the data for all variables. 

474 for name in variables: 

475 self._write_var_data(name) 

476 else: 

477 self.fp.write(ABSENT) 

478 

479 def _write_var_metadata(self, name): 

480 var = self.variables[name] 

481 

482 self._pack_string(name) 

483 self._pack_int(len(var.dimensions)) 

484 for dimname in var.dimensions: 

485 dimid = self._dims.index(dimname) 

486 self._pack_int(dimid) 

487 

488 self._write_att_array(var._attributes) 

489 

490 nc_type = REVERSE[var.typecode(), var.itemsize()] 

491 self.fp.write(nc_type) 

492 

493 if not var.isrec: 

494 vsize = var.data.size * var.data.itemsize 

495 vsize += -vsize % 4 

496 else: # record variable 

497 try: 

498 vsize = var.data[0].size * var.data.itemsize 

499 except IndexError: 

500 vsize = 0 

501 rec_vars = len([v for v in self.variables.values() 

502 if v.isrec]) 

503 if rec_vars > 1: 

504 vsize += -vsize % 4 

505 self.variables[name].__dict__['_vsize'] = vsize 

506 self._pack_int(vsize) 

507 

508 # Pack a bogus begin, and set the real value later. 

509 self.variables[name].__dict__['_begin'] = self.fp.tell() 

510 self._pack_begin(0) 

511 

512 def _write_var_data(self, name): 

513 var = self.variables[name] 

514 

515 # Set begin in file header. 

516 the_beguine = self.fp.tell() 

517 self.fp.seek(var._begin) 

518 self._pack_begin(the_beguine) 

519 self.fp.seek(the_beguine) 

520 

521 # Write data. 

522 if not var.isrec: 

523 self.fp.write(var.data.tobytes()) 

524 count = var.data.size * var.data.itemsize 

525 self._write_var_padding(var, var._vsize - count) 

526 else: # record variable 

527 # Handle rec vars with shape[0] < nrecs. 

528 if self._recs > len(var.data): 

529 shape = (self._recs,) + var.data.shape[1:] 

530 # Resize in-place does not always work since 

531 # the array might not be single-segment 

532 try: 

533 var.data.resize(shape) 

534 except ValueError: 

535 dtype = var.data.dtype 

536 var.__dict__['data'] = np.resize(var.data, shape).astype(dtype) 

537 

538 pos0 = pos = self.fp.tell() 

539 for rec in var.data: 

540 # Apparently scalars cannot be converted to big endian. If we 

541 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype 

542 # will remain as ``=i4``. 

543 if not rec.shape and (rec.dtype.byteorder == '<' or 

544 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): 

545 rec = rec.byteswap() 

546 self.fp.write(rec.tobytes()) 

547 # Padding 

548 count = rec.size * rec.itemsize 

549 self._write_var_padding(var, var._vsize - count) 

550 pos += self._recsize 

551 self.fp.seek(pos) 

552 self.fp.seek(pos0 + var._vsize) 

553 

554 def _write_var_padding(self, var, size): 

555 encoded_fill_value = var._get_encoded_fill_value() 

556 num_fills = size // len(encoded_fill_value) 

557 self.fp.write(encoded_fill_value * num_fills) 

558 

559 def _write_att_values(self, values): 

560 if hasattr(values, 'dtype'): 

561 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize] 

562 else: 

563 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)] 

564 

565 # bytes index into scalars in py3k. Check for "string" types 

566 if isinstance(values, (str, bytes)): 

567 sample = values 

568 else: 

569 try: 

570 sample = values[0] # subscriptable? 

571 except TypeError: 

572 sample = values # scalar 

573 

574 for class_, nc_type in types: 

575 if isinstance(sample, class_): 

576 break 

577 

578 typecode, size = TYPEMAP[nc_type] 

579 dtype_ = '>%s' % typecode 

580 # asarray() dies with bytes and '>c' in py3k. Change to 'S' 

581 dtype_ = 'S' if dtype_ == '>c' else dtype_ 

582 

583 values = asarray(values, dtype=dtype_) 

584 

585 self.fp.write(nc_type) 

586 

587 if values.dtype.char == 'S': 

588 nelems = values.itemsize 

589 else: 

590 nelems = values.size 

591 self._pack_int(nelems) 

592 

593 if not values.shape and (values.dtype.byteorder == '<' or 

594 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): 

595 values = values.byteswap() 

596 self.fp.write(values.tobytes()) 

597 count = values.size * values.itemsize 

598 self.fp.write(b'\x00' * (-count % 4)) # pad 

599 

600 def _read(self): 

601 # Check magic bytes and version 

602 magic = self.fp.read(3) 

603 if not magic == b'CDF': 

604 raise TypeError("Error: %s is not a valid NetCDF 3 file" % 

605 self.filename) 

606 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0] 

607 

608 # Read file headers and set data. 

609 self._read_numrecs() 

610 self._read_dim_array() 

611 self._read_gatt_array() 

612 self._read_var_array() 

613 

614 def _read_numrecs(self): 

615 self.__dict__['_recs'] = self._unpack_int() 

616 

617 def _read_dim_array(self): 

618 header = self.fp.read(4) 

619 if header not in [ZERO, NC_DIMENSION]: 

620 raise ValueError("Unexpected header.") 

621 count = self._unpack_int() 

622 

623 for dim in range(count): 

624 name = self._unpack_string().decode('latin1') 

625 length = self._unpack_int() or None # None for record dimension 

626 self.dimensions[name] = length 

627 self._dims.append(name) # preserve order 

628 

629 def _read_gatt_array(self): 

630 for k, v in self._read_att_array().items(): 

631 self.__setattr__(k, v) 

632 

633 def _read_att_array(self): 

634 header = self.fp.read(4) 

635 if header not in [ZERO, NC_ATTRIBUTE]: 

636 raise ValueError("Unexpected header.") 

637 count = self._unpack_int() 

638 

639 attributes = {} 

640 for attr in range(count): 

641 name = self._unpack_string().decode('latin1') 

642 attributes[name] = self._read_att_values() 

643 return attributes 

644 

645 def _read_var_array(self): 

646 header = self.fp.read(4) 

647 if header not in [ZERO, NC_VARIABLE]: 

648 raise ValueError("Unexpected header.") 

649 

650 begin = 0 

651 dtypes = {'names': [], 'formats': []} 

652 rec_vars = [] 

653 count = self._unpack_int() 

654 for var in range(count): 

655 (name, dimensions, shape, attributes, 

656 typecode, size, dtype_, begin_, vsize) = self._read_var() 

657 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html 

658 # Note that vsize is the product of the dimension lengths 

659 # (omitting the record dimension) and the number of bytes 

660 # per value (determined from the type), increased to the 

661 # next multiple of 4, for each variable. If a record 

662 # variable, this is the amount of space per record. The 

663 # netCDF "record size" is calculated as the sum of the 

664 # vsize's of all the record variables. 

665 # 

666 # The vsize field is actually redundant, because its value 

667 # may be computed from other information in the header. The 

668 # 32-bit vsize field is not large enough to contain the size 

669 # of variables that require more than 2^32 - 4 bytes, so 

670 # 2^32 - 1 is used in the vsize field for such variables. 

671 if shape and shape[0] is None: # record variable 

672 rec_vars.append(name) 

673 # The netCDF "record size" is calculated as the sum of 

674 # the vsize's of all the record variables. 

675 self.__dict__['_recsize'] += vsize 

676 if begin == 0: 

677 begin = begin_ 

678 dtypes['names'].append(name) 

679 dtypes['formats'].append(str(shape[1:]) + dtype_) 

680 

681 # Handle padding with a virtual variable. 

682 if typecode in 'bch': 

683 actual_size = reduce(mul, (1,) + shape[1:]) * size 

684 padding = -actual_size % 4 

685 if padding: 

686 dtypes['names'].append('_padding_%d' % var) 

687 dtypes['formats'].append('(%d,)>b' % padding) 

688 

689 # Data will be set later. 

690 data = None 

691 else: # not a record variable 

692 # Calculate size to avoid problems with vsize (above) 

693 a_size = reduce(mul, shape, 1) * size 

694 if self.use_mmap: 

695 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_) 

696 data.shape = shape 

697 else: 

698 pos = self.fp.tell() 

699 self.fp.seek(begin_) 

700 data = frombuffer(self.fp.read(a_size), dtype=dtype_ 

701 ).copy() 

702 data.shape = shape 

703 self.fp.seek(pos) 

704 

705 # Add variable. 

706 self.variables[name] = netcdf_variable( 

707 data, typecode, size, shape, dimensions, attributes, 

708 maskandscale=self.maskandscale) 

709 

710 if rec_vars: 

711 # Remove padding when only one record variable. 

712 if len(rec_vars) == 1: 

713 dtypes['names'] = dtypes['names'][:1] 

714 dtypes['formats'] = dtypes['formats'][:1] 

715 

716 # Build rec array. 

717 if self.use_mmap: 

718 buf = self._mm_buf[begin:begin+self._recs*self._recsize] 

719 rec_array = buf.view(dtype=dtypes) 

720 rec_array.shape = (self._recs,) 

721 else: 

722 pos = self.fp.tell() 

723 self.fp.seek(begin) 

724 rec_array = frombuffer(self.fp.read(self._recs*self._recsize), 

725 dtype=dtypes).copy() 

726 rec_array.shape = (self._recs,) 

727 self.fp.seek(pos) 

728 

729 for var in rec_vars: 

730 self.variables[var].__dict__['data'] = rec_array[var] 

731 

732 def _read_var(self): 

733 name = self._unpack_string().decode('latin1') 

734 dimensions = [] 

735 shape = [] 

736 dims = self._unpack_int() 

737 

738 for i in range(dims): 

739 dimid = self._unpack_int() 

740 dimname = self._dims[dimid] 

741 dimensions.append(dimname) 

742 dim = self.dimensions[dimname] 

743 shape.append(dim) 

744 dimensions = tuple(dimensions) 

745 shape = tuple(shape) 

746 

747 attributes = self._read_att_array() 

748 nc_type = self.fp.read(4) 

749 vsize = self._unpack_int() 

750 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() 

751 

752 typecode, size = TYPEMAP[nc_type] 

753 dtype_ = '>%s' % typecode 

754 

755 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize 

756 

757 def _read_att_values(self): 

758 nc_type = self.fp.read(4) 

759 n = self._unpack_int() 

760 

761 typecode, size = TYPEMAP[nc_type] 

762 

763 count = n*size 

764 values = self.fp.read(int(count)) 

765 self.fp.read(-count % 4) # read padding 

766 

767 if typecode != 'c': 

768 values = frombuffer(values, dtype='>%s' % typecode).copy() 

769 if values.shape == (1,): 

770 values = values[0] 

771 else: 

772 values = values.rstrip(b'\x00') 

773 return values 

774 

775 def _pack_begin(self, begin): 

776 if self.version_byte == 1: 

777 self._pack_int(begin) 

778 elif self.version_byte == 2: 

779 self._pack_int64(begin) 

780 

781 def _pack_int(self, value): 

782 self.fp.write(array(value, '>i').tobytes()) 

783 _pack_int32 = _pack_int 

784 

785 def _unpack_int(self): 

786 return int(frombuffer(self.fp.read(4), '>i')[0]) 

787 _unpack_int32 = _unpack_int 

788 

789 def _pack_int64(self, value): 

790 self.fp.write(array(value, '>q').tobytes()) 

791 

792 def _unpack_int64(self): 

793 return frombuffer(self.fp.read(8), '>q')[0] 

794 

795 def _pack_string(self, s): 

796 count = len(s) 

797 self._pack_int(count) 

798 self.fp.write(s.encode('latin1')) 

799 self.fp.write(b'\x00' * (-count % 4)) # pad 

800 

801 def _unpack_string(self): 

802 count = self._unpack_int() 

803 s = self.fp.read(count).rstrip(b'\x00') 

804 self.fp.read(-count % 4) # read padding 

805 return s 

806 

807 

808class netcdf_variable: 

809 """ 

810 A data object for netcdf files. 

811 

812 `netcdf_variable` objects are constructed by calling the method 

813 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable` 

814 objects behave much like array objects defined in numpy, except that their 

815 data resides in a file. Data is read by indexing and written by assigning 

816 to an indexed subset; the entire array can be accessed by the index ``[:]`` 

817 or (for scalars) by using the methods `getValue` and `assignValue`. 

818 `netcdf_variable` objects also have attribute `shape` with the same meaning 

819 as for arrays, but the shape cannot be modified. There is another read-only 

820 attribute `dimensions`, whose value is the tuple of dimension names. 

821 

822 All other attributes correspond to variable attributes defined in 

823 the NetCDF file. Variable attributes are created by assigning to an 

824 attribute of the `netcdf_variable` object. 

825 

826 Parameters 

827 ---------- 

828 data : array_like 

829 The data array that holds the values for the variable. 

830 Typically, this is initialized as empty, but with the proper shape. 

831 typecode : dtype character code 

832 Desired data-type for the data array. 

833 size : int 

834 Desired element size for the data array. 

835 shape : sequence of ints 

836 The shape of the array. This should match the lengths of the 

837 variable's dimensions. 

838 dimensions : sequence of strings 

839 The names of the dimensions used by the variable. Must be in the 

840 same order of the dimension lengths given by `shape`. 

841 attributes : dict, optional 

842 Attribute values (any type) keyed by string names. These attributes 

843 become attributes for the netcdf_variable object. 

844 maskandscale : bool, optional 

845 Whether to automatically scale and/or mask data based on attributes. 

846 Default is False. 

847 

848 

849 Attributes 

850 ---------- 

851 dimensions : list of str 

852 List of names of dimensions used by the variable object. 

853 isrec, shape 

854 Properties 

855 

856 See also 

857 -------- 

858 isrec, shape 

859 

860 """ 

861 def __init__(self, data, typecode, size, shape, dimensions, 

862 attributes=None, 

863 maskandscale=False): 

864 self.data = data 

865 self._typecode = typecode 

866 self._size = size 

867 self._shape = shape 

868 self.dimensions = dimensions 

869 self.maskandscale = maskandscale 

870 

871 self._attributes = attributes or {} 

872 for k, v in self._attributes.items(): 

873 self.__dict__[k] = v 

874 

875 def __setattr__(self, attr, value): 

876 # Store user defined attributes in a separate dict, 

877 # so we can save them to file later. 

878 try: 

879 self._attributes[attr] = value 

880 except AttributeError: 

881 pass 

882 self.__dict__[attr] = value 

883 

884 def isrec(self): 

885 """Returns whether the variable has a record dimension or not. 

886 

887 A record dimension is a dimension along which additional data could be 

888 easily appended in the netcdf data structure without much rewriting of 

889 the data file. This attribute is a read-only property of the 

890 `netcdf_variable`. 

891 

892 """ 

893 return bool(self.data.shape) and not self._shape[0] 

894 isrec = property(isrec) 

895 

896 def shape(self): 

897 """Returns the shape tuple of the data variable. 

898 

899 This is a read-only attribute and can not be modified in the 

900 same manner of other numpy arrays. 

901 """ 

902 return self.data.shape 

903 shape = property(shape) 

904 

905 def getValue(self): 

906 """ 

907 Retrieve a scalar value from a `netcdf_variable` of length one. 

908 

909 Raises 

910 ------ 

911 ValueError 

912 If the netcdf variable is an array of length greater than one, 

913 this exception will be raised. 

914 

915 """ 

916 return self.data.item() 

917 

918 def assignValue(self, value): 

919 """ 

920 Assign a scalar value to a `netcdf_variable` of length one. 

921 

922 Parameters 

923 ---------- 

924 value : scalar 

925 Scalar value (of compatible type) to assign to a length-one netcdf 

926 variable. This value will be written to file. 

927 

928 Raises 

929 ------ 

930 ValueError 

931 If the input is not a scalar, or if the destination is not a length-one 

932 netcdf variable. 

933 

934 """ 

935 if not self.data.flags.writeable: 

936 # Work-around for a bug in NumPy. Calling itemset() on a read-only 

937 # memory-mapped array causes a seg. fault. 

938 # See NumPy ticket #1622, and SciPy ticket #1202. 

939 # This check for `writeable` can be removed when the oldest version 

940 # of NumPy still supported by scipy contains the fix for #1622. 

941 raise RuntimeError("variable is not writeable") 

942 

943 self.data[:] = value 

944 

945 def typecode(self): 

946 """ 

947 Return the typecode of the variable. 

948 

949 Returns 

950 ------- 

951 typecode : char 

952 The character typecode of the variable (e.g., 'i' for int). 

953 

954 """ 

955 return self._typecode 

956 

957 def itemsize(self): 

958 """ 

959 Return the itemsize of the variable. 

960 

961 Returns 

962 ------- 

963 itemsize : int 

964 The element size of the variable (e.g., 8 for float64). 

965 

966 """ 

967 return self._size 

968 

969 def __getitem__(self, index): 

970 if not self.maskandscale: 

971 return self.data[index] 

972 

973 data = self.data[index].copy() 

974 missing_value = self._get_missing_value() 

975 data = self._apply_missing_value(data, missing_value) 

976 scale_factor = self._attributes.get('scale_factor') 

977 add_offset = self._attributes.get('add_offset') 

978 if add_offset is not None or scale_factor is not None: 

979 data = data.astype(np.float64) 

980 if scale_factor is not None: 

981 data = data * scale_factor 

982 if add_offset is not None: 

983 data += add_offset 

984 

985 return data 

986 

987 def __setitem__(self, index, data): 

988 if self.maskandscale: 

989 missing_value = ( 

990 self._get_missing_value() or 

991 getattr(data, 'fill_value', 999999)) 

992 self._attributes.setdefault('missing_value', missing_value) 

993 self._attributes.setdefault('_FillValue', missing_value) 

994 data = ((data - self._attributes.get('add_offset', 0.0)) / 

995 self._attributes.get('scale_factor', 1.0)) 

996 data = np.ma.asarray(data).filled(missing_value) 

997 if self._typecode not in 'fd' and data.dtype.kind == 'f': 

998 data = np.round(data) 

999 

1000 # Expand data for record vars? 

1001 if self.isrec: 

1002 if isinstance(index, tuple): 

1003 rec_index = index[0] 

1004 else: 

1005 rec_index = index 

1006 if isinstance(rec_index, slice): 

1007 recs = (rec_index.start or 0) + len(data) 

1008 else: 

1009 recs = rec_index + 1 

1010 if recs > len(self.data): 

1011 shape = (recs,) + self._shape[1:] 

1012 # Resize in-place does not always work since 

1013 # the array might not be single-segment 

1014 try: 

1015 self.data.resize(shape) 

1016 except ValueError: 

1017 dtype = self.data.dtype 

1018 self.__dict__['data'] = np.resize(self.data, shape).astype(dtype) 

1019 self.data[index] = data 

1020 

1021 def _default_encoded_fill_value(self): 

1022 """ 

1023 The default encoded fill-value for this Variable's data type. 

1024 """ 

1025 nc_type = REVERSE[self.typecode(), self.itemsize()] 

1026 return FILLMAP[nc_type] 

1027 

1028 def _get_encoded_fill_value(self): 

1029 """ 

1030 Returns the encoded fill value for this variable as bytes. 

1031 

1032 This is taken from either the _FillValue attribute, or the default fill 

1033 value for this variable's data type. 

1034 """ 

1035 if '_FillValue' in self._attributes: 

1036 fill_value = np.array(self._attributes['_FillValue'], 

1037 dtype=self.data.dtype).tobytes() 

1038 if len(fill_value) == self.itemsize(): 

1039 return fill_value 

1040 else: 

1041 return self._default_encoded_fill_value() 

1042 else: 

1043 return self._default_encoded_fill_value() 

1044 

1045 def _get_missing_value(self): 

1046 """ 

1047 Returns the value denoting "no data" for this variable. 

1048 

1049 If this variable does not have a missing/fill value, returns None. 

1050 

1051 If both _FillValue and missing_value are given, give precedence to 

1052 _FillValue. The netCDF standard gives special meaning to _FillValue; 

1053 missing_value is just used for compatibility with old datasets. 

1054 """ 

1055 

1056 if '_FillValue' in self._attributes: 

1057 missing_value = self._attributes['_FillValue'] 

1058 elif 'missing_value' in self._attributes: 

1059 missing_value = self._attributes['missing_value'] 

1060 else: 

1061 missing_value = None 

1062 

1063 return missing_value 

1064 

1065 @staticmethod 

1066 def _apply_missing_value(data, missing_value): 

1067 """ 

1068 Applies the given missing value to the data array. 

1069 

1070 Returns a numpy.ma array, with any value equal to missing_value masked 

1071 out (unless missing_value is None, in which case the original array is 

1072 returned). 

1073 """ 

1074 

1075 if missing_value is None: 

1076 newdata = data 

1077 else: 

1078 try: 

1079 missing_value_isnan = np.isnan(missing_value) 

1080 except (TypeError, NotImplementedError): 

1081 # some data types (e.g., characters) cannot be tested for NaN 

1082 missing_value_isnan = False 

1083 

1084 if missing_value_isnan: 

1085 mymask = np.isnan(data) 

1086 else: 

1087 mymask = (data == missing_value) 

1088 

1089 newdata = np.ma.masked_where(mymask, data) 

1090 

1091 return newdata 

1092 

1093 

1094NetCDFFile = netcdf_file 

1095NetCDFVariable = netcdf_variable