Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/

1"""

2NetCDF reader/writer module.

4This module is used to read and create NetCDF files. NetCDF files are

5accessed through the `netcdf_file` object. Data written to and from NetCDF

6files are contained in `netcdf_variable` objects. Attributes are given

7as member variables of the `netcdf_file` and `netcdf_variable` objects.

9This module implements the Scientific.IO.NetCDF API to read and create

10NetCDF files. The same API is also used in the PyNIO and pynetcdf

11modules, allowing these modules to be used interchangeably when working

12with NetCDF files.

14Only NetCDF3 is supported here; for NetCDF4 see

15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,

16which has a similar API.

18"""

20# TODO:

21# * properly implement ``_FillValue``.

22# * fix character variables.

23# * implement PAGESIZE for Python 2.6?

25# The Scientific.IO.NetCDF API allows attributes to be added directly to

26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate

27# between user-set attributes and instance attributes, user-set attributes

28# are automatically stored in the ``_attributes`` attribute by overloading

29#``__setattr__``. This is the reason why the code sometimes uses

30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;

31# otherwise the key would be inserted into userspace attributes.

34__all__ = ['netcdf_file', 'netcdf_variable']

37import warnings

38import weakref

39from operator import mul

40from platform import python_implementation

42import mmap as mm

44import numpy as np

45from numpy import frombuffer, dtype, empty, array, asarray

46from numpy import little_endian as LITTLE_ENDIAN

47from functools import reduce

50IS_PYPY = python_implementation() == 'PyPy'

52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'

53ZERO = b'\x00\x00\x00\x00'

54NC_BYTE = b'\x00\x00\x00\x01'

55NC_CHAR = b'\x00\x00\x00\x02'

56NC_SHORT = b'\x00\x00\x00\x03'

57NC_INT = b'\x00\x00\x00\x04'

58NC_FLOAT = b'\x00\x00\x00\x05'

59NC_DOUBLE = b'\x00\x00\x00\x06'

60NC_DIMENSION = b'\x00\x00\x00\n'

61NC_VARIABLE = b'\x00\x00\x00\x0b'

62NC_ATTRIBUTE = b'\x00\x00\x00\x0c'

63FILL_BYTE = b'\x81'

64FILL_CHAR = b'\x00'

65FILL_SHORT = b'\x80\x01'

66FILL_INT = b'\x80\x00\x00\x01'

67FILL_FLOAT = b'\x7C\xF0\x00\x00'

68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'

70TYPEMAP = {NC_BYTE: ('b', 1),

71 NC_CHAR: ('c', 1),

72 NC_SHORT: ('h', 2),

73 NC_INT: ('i', 4),

74 NC_FLOAT: ('f', 4),

75 NC_DOUBLE: ('d', 8)}

77FILLMAP = {NC_BYTE: FILL_BYTE,

78 NC_CHAR: FILL_CHAR,

79 NC_SHORT: FILL_SHORT,

80 NC_INT: FILL_INT,

81 NC_FLOAT: FILL_FLOAT,

82 NC_DOUBLE: FILL_DOUBLE}

84REVERSE = {('b', 1): NC_BYTE,

85 ('B', 1): NC_CHAR,

86 ('c', 1): NC_CHAR,

87 ('h', 2): NC_SHORT,

88 ('i', 4): NC_INT,

89 ('f', 4): NC_FLOAT,

90 ('d', 8): NC_DOUBLE,

92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char,

93 # used when getting the types from generic attributes.

94 ('l', 4): NC_INT,

95 ('S', 1): NC_CHAR}

98class netcdf_file:

99 """

100 A file object for NetCDF data.

101

102 A `netcdf_file` object has two standard attributes: `dimensions` and

103 `variables`. The values of both are dictionaries, mapping dimension

104 names to their associated lengths and variable names to variables,

105 respectively. Application programs should never modify these

106 dictionaries.

107

108 All other attributes correspond to global attributes defined in the

109 NetCDF file. Global file attributes are created by assigning to an

110 attribute of the `netcdf_file` object.

111

112 Parameters

113 ----------

114 filename : string or file-like

115 string -> filename

116 mode : {'r', 'w', 'a'}, optional

117 read-write-append mode, default is 'r'

118 mmap : None or bool, optional

119 Whether to mmap `filename` when reading. Default is True

120 when `filename` is a file name, False when `filename` is a

121 file-like object. Note that when mmap is in use, data arrays

122 returned refer directly to the mmapped data on disk, and the

123 file cannot be closed as long as references to it exist.

124 version : {1, 2}, optional

125 version of netcdf to read / write, where 1 means *Classic

126 format* and 2 means *64-bit offset format*. Default is 1. See

127 `here <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_introduction.html#select_format>`__

128 for more info.

129 maskandscale : bool, optional

130 Whether to automatically scale and/or mask data based on attributes.

131 Default is False.

132

133 Notes

134 -----

135 The major advantage of this module over other modules is that it doesn't

136 require the code to be linked to the NetCDF libraries. This module is

137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.

138

139 NetCDF files are a self-describing binary data format. The file contains

140 metadata that describes the dimensions and variables in the file. More

141 details about NetCDF files can be found `here

142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There

143 are three main sections to a NetCDF data structure:

144

145 1. Dimensions

146 2. Variables

147 3. Attributes

148

149 The dimensions section records the name and length of each dimension used

150 by the variables. The variables would then indicate which dimensions it

151 uses and any attributes such as data units, along with containing the data

152 values for the variable. It is good practice to include a

153 variable that is the same name as a dimension to provide the values for

154 that axes. Lastly, the attributes section would contain additional

155 information such as the name of the file creator or the instrument used to

156 collect the data.

157

158 When writing data to a NetCDF file, there is often the need to indicate the

159 'record dimension'. A record dimension is the unbounded dimension for a

160 variable. For example, a temperature variable may have dimensions of

161 latitude, longitude and time. If one wants to add more temperature data to

162 the NetCDF file as time progresses, then the temperature variable should

163 have the time dimension flagged as the record dimension.

164

165 In addition, the NetCDF file header contains the position of the data in

166 the file, so access can be done in an efficient manner without loading

167 unnecessary data into memory. It uses the ``mmap`` module to create

168 Numpy arrays mapped to the data on disk, for the same purpose.

169

170 Note that when `netcdf_file` is used to open a file with mmap=True

171 (default for read-only), arrays returned by it refer to data

172 directly on the disk. The file should not be closed, and cannot be cleanly

173 closed when asked, if such arrays are alive. You may want to copy data arrays

174 obtained from mmapped Netcdf file if they are to be processed after the file

175 is closed, see the example below.

176

177 Examples

178 --------

179 To create a NetCDF file:

180

181 >>> from scipy.io import netcdf_file

182 >>> import numpy as np

183 >>> f = netcdf_file('simple.nc', 'w')

184 >>> f.history = 'Created for a test'

185 >>> f.createDimension('time', 10)

186 >>> time = f.createVariable('time', 'i', ('time',))

187 >>> time[:] = np.arange(10)

188 >>> time.units = 'days since 2008-01-01'

189 >>> f.close()

190

191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice

192 of the time variable allows for the data to be set in the object, rather

193 than letting ``arange(10)`` overwrite the ``time`` variable.

194

195 To read the NetCDF file we just created:

196

197 >>> from scipy.io import netcdf_file

198 >>> f = netcdf_file('simple.nc', 'r')

199 >>> print(f.history)

200 b'Created for a test'

201 >>> time = f.variables['time']

202 >>> print(time.units)

203 b'days since 2008-01-01'

204 >>> print(time.shape)

205 (10,)

206 >>> print(time[-1])

207 9

208

209 NetCDF files, when opened read-only, return arrays that refer

210 directly to memory-mapped data on disk:

211

212 >>> data = time[:]

213

214 If the data is to be processed after the file is closed, it needs

215 to be copied to main memory:

216

217 >>> data = time[:].copy()

218 >>> del time

219 >>> f.close()

220 >>> data.mean()

221 4.5

222

223 A NetCDF file can also be used as context manager:

224

225 >>> from scipy.io import netcdf_file

226 >>> with netcdf_file('simple.nc', 'r') as f:

227 ... print(f.history)

228 b'Created for a test'

229

230 """

231 def __init__(self, filename, mode='r', mmap=None, version=1,

232 maskandscale=False):

233 """Initialize netcdf_file from fileobj (str or file-like)."""

234 if mode not in 'rwa':

235 raise ValueError("Mode must be either 'r', 'w' or 'a'.")

236

237 if hasattr(filename, 'seek'): # file-like

238 self.fp = filename

239 self.filename = 'None'

240 if mmap is None:

241 mmap = False

242 elif mmap and not hasattr(filename, 'fileno'):

243 raise ValueError('Cannot use file object for mmap')

244 else: # maybe it's a string

245 self.filename = filename

246 omode = 'r+' if mode == 'a' else mode

247 self.fp = open(self.filename, '%sb' % omode)

248 if mmap is None:

249 # Mmapped files on PyPy cannot be usually closed

250 # before the GC runs, so it's better to use mmap=False

251 # as the default.

252 mmap = (not IS_PYPY)

253

254 if mode != 'r':

255 # Cannot read write-only files

256 mmap = False

257

258 self.use_mmap = mmap

259 self.mode = mode

260 self.version_byte = version

261 self.maskandscale = maskandscale

262

263 self.dimensions = {}

264 self.variables = {}

265

266 self._dims = []

267 self._recs = 0

268 self._recsize = 0

269

270 self._mm = None

271 self._mm_buf = None

272 if self.use_mmap:

273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)

274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)

275

276 self._attributes = {}

277

278 if mode in 'ra':

279 self._read()

280

281 def __setattr__(self, attr, value):

282 # Store user defined attributes in a separate dict,

283 # so we can save them to file later.

284 try:

285 self._attributes[attr] = value

286 except AttributeError:

287 pass

288 self.__dict__[attr] = value

289

290 def close(self):

291 """Closes the NetCDF file."""

292 if hasattr(self, 'fp') and not self.fp.closed:

293 try:

294 self.flush()

295 finally:

296 self.variables = {}

297 if self._mm_buf is not None:

298 ref = weakref.ref(self._mm_buf)

299 self._mm_buf = None

300 if ref() is None:

301 # self._mm_buf is gc'd, and we can close the mmap

302 self._mm.close()

303 else:

304 # we cannot close self._mm, since self._mm_buf is

305 # alive and there may still be arrays referring to it

306 warnings.warn((

307 "Cannot close a netcdf_file opened with mmap=True, when "

308 "netcdf_variables or arrays referring to its data still exist. "

309 "All data arrays obtained from such files refer directly to "

310 "data on disk, and must be copied before the file can be cleanly "

311 "closed. (See netcdf_file docstring for more information on mmap.)"

312 ), category=RuntimeWarning)

313 self._mm = None

314 self.fp.close()

315 __del__ = close

316

317 def __enter__(self):

318 return self

319

320 def __exit__(self, type, value, traceback):

321 self.close()

322

323 def createDimension(self, name, length):

324 """

325 Adds a dimension to the Dimension section of the NetCDF data structure.

326

327 Note that this function merely adds a new dimension that the variables can

328 reference. The values for the dimension, if desired, should be added as

329 a variable using `createVariable`, referring to this dimension.

330

331 Parameters

332 ----------

333 name : str

334 Name of the dimension (Eg, 'lat' or 'time').

335 length : int

336 Length of the dimension.

337

338 See Also

339 --------

340 createVariable

341

342 """

343 if length is None and self._dims:

344 raise ValueError("Only first dimension may be unlimited!")

345

346 self.dimensions[name] = length

347 self._dims.append(name)

348

349 def createVariable(self, name, type, dimensions):

350 """

351 Create an empty variable for the `netcdf_file` object, specifying its data

352 type and the dimensions it uses.

353

354 Parameters

355 ----------

356 name : str

357 Name of the new variable.

358 type : dtype or str

359 Data type of the variable.

360 dimensions : sequence of str

361 List of the dimension names used by the variable, in the desired order.

362

363 Returns

364 -------

365 variable : netcdf_variable

366 The newly created ``netcdf_variable`` object.

367 This object has also been added to the `netcdf_file` object as well.

368

369 See Also

370 --------

371 createDimension

372

373 Notes

374 -----

375 Any dimensions to be used by the variable should already exist in the

376 NetCDF data structure or should be created by `createDimension` prior to

377 creating the NetCDF variable.

378

379 """

380 shape = tuple([self.dimensions[dim] for dim in dimensions])

381 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy

382

383 type = dtype(type)

384 typecode, size = type.char, type.itemsize

385 if (typecode, size) not in REVERSE:

386 raise ValueError("NetCDF 3 does not support type %s" % type)

387

388 data = empty(shape_, dtype=type.newbyteorder("B")) # convert to big endian always for NetCDF 3

389 self.variables[name] = netcdf_variable(

390 data, typecode, size, shape, dimensions,

391 maskandscale=self.maskandscale)

392 return self.variables[name]

393

394 def flush(self):

395 """

396 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.

397

398 See Also

399 --------

400 sync : Identical function

401

402 """

403 if hasattr(self, 'mode') and self.mode in 'wa':

404 self._write()

405 sync = flush

406

407 def _write(self):

408 self.fp.seek(0)

409 self.fp.write(b'CDF')

410 self.fp.write(array(self.version_byte, '>b').tobytes())

411

412 # Write headers and data.

413 self._write_numrecs()

414 self._write_dim_array()

415 self._write_gatt_array()

416 self._write_var_array()

417

418 def _write_numrecs(self):

419 # Get highest record count from all record variables.

420 for var in self.variables.values():

421 if var.isrec and len(var.data) > self._recs:

422 self.__dict__['_recs'] = len(var.data)

423 self._pack_int(self._recs)

424

425 def _write_dim_array(self):

426 if self.dimensions:

427 self.fp.write(NC_DIMENSION)

428 self._pack_int(len(self.dimensions))

429 for name in self._dims:

430 self._pack_string(name)

431 length = self.dimensions[name]

432 self._pack_int(length or 0) # replace None with 0 for record dimension

433 else:

434 self.fp.write(ABSENT)

435

436 def _write_gatt_array(self):

437 self._write_att_array(self._attributes)

438

439 def _write_att_array(self, attributes):

440 if attributes:

441 self.fp.write(NC_ATTRIBUTE)

442 self._pack_int(len(attributes))

443 for name, values in attributes.items():

444 self._pack_string(name)

445 self._write_att_values(values)

446 else:

447 self.fp.write(ABSENT)

448

449 def _write_var_array(self):

450 if self.variables:

451 self.fp.write(NC_VARIABLE)

452 self._pack_int(len(self.variables))

453

454 # Sort variable names non-recs first, then recs.

455 def sortkey(n):

456 v = self.variables[n]

457 if v.isrec:

458 return (-1,)

459 return v._shape

460 variables = sorted(self.variables, key=sortkey, reverse=True)

461

462 # Set the metadata for all variables.

463 for name in variables:

464 self._write_var_metadata(name)

465 # Now that we have the metadata, we know the vsize of

466 # each record variable, so we can calculate recsize.

467 self.__dict__['_recsize'] = sum([

468 var._vsize for var in self.variables.values()

469 if var.isrec])

470 # Set the data for all variables.

471 for name in variables:

472 self._write_var_data(name)

473 else:

474 self.fp.write(ABSENT)

475

476 def _write_var_metadata(self, name):

477 var = self.variables[name]

478

479 self._pack_string(name)

480 self._pack_int(len(var.dimensions))

481 for dimname in var.dimensions:

482 dimid = self._dims.index(dimname)

483 self._pack_int(dimid)

484

485 self._write_att_array(var._attributes)

486

487 nc_type = REVERSE[var.typecode(), var.itemsize()]

488 self.fp.write(nc_type)

489

490 if not var.isrec:

491 vsize = var.data.size * var.data.itemsize

492 vsize += -vsize % 4

493 else: # record variable

494 try:

495 vsize = var.data[0].size * var.data.itemsize

496 except IndexError:

497 vsize = 0

498 rec_vars = len([v for v in self.variables.values()

499 if v.isrec])

500 if rec_vars > 1:

501 vsize += -vsize % 4

502 self.variables[name].__dict__['_vsize'] = vsize

503 self._pack_int(vsize)

504

505 # Pack a bogus begin, and set the real value later.

506 self.variables[name].__dict__['_begin'] = self.fp.tell()

507 self._pack_begin(0)

508

509 def _write_var_data(self, name):

510 var = self.variables[name]

511

512 # Set begin in file header.

513 the_beguine = self.fp.tell()

514 self.fp.seek(var._begin)

515 self._pack_begin(the_beguine)

516 self.fp.seek(the_beguine)

517

518 # Write data.

519 if not var.isrec:

520 self.fp.write(var.data.tobytes())

521 count = var.data.size * var.data.itemsize

522 self._write_var_padding(var, var._vsize - count)

523 else: # record variable

524 # Handle rec vars with shape[0] < nrecs.

525 if self._recs > len(var.data):

526 shape = (self._recs,) + var.data.shape[1:]

527 # Resize in-place does not always work since

528 # the array might not be single-segment

529 try:

530 var.data.resize(shape)

531 except ValueError:

532 var.__dict__['data'] = np.resize(var.data, shape).astype(var.data.dtype)

533

534 pos0 = pos = self.fp.tell()

535 for rec in var.data:

536 # Apparently scalars cannot be converted to big endian. If we

537 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype

538 # will remain as ``=i4``.

539 if not rec.shape and (rec.dtype.byteorder == '<' or

540 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):

541 rec = rec.byteswap()

542 self.fp.write(rec.tobytes())

543 # Padding

544 count = rec.size * rec.itemsize

545 self._write_var_padding(var, var._vsize - count)

546 pos += self._recsize

547 self.fp.seek(pos)

548 self.fp.seek(pos0 + var._vsize)

549

550 def _write_var_padding(self, var, size):

551 encoded_fill_value = var._get_encoded_fill_value()

552 num_fills = size // len(encoded_fill_value)

553 self.fp.write(encoded_fill_value * num_fills)

554

555 def _write_att_values(self, values):

556 if hasattr(values, 'dtype'):

557 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]

558 else:

559 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]

560

561 # bytes index into scalars in py3k. Check for "string" types

562 if isinstance(values, (str, bytes)):

563 sample = values

564 else:

565 try:

566 sample = values[0] # subscriptable?

567 except TypeError:

568 sample = values # scalar

569

570 for class_, nc_type in types:

571 if isinstance(sample, class_):

572 break

573

574 typecode, size = TYPEMAP[nc_type]

575 dtype_ = '>%s' % typecode

576 # asarray() dies with bytes and '>c' in py3k. Change to 'S'

577 dtype_ = 'S' if dtype_ == '>c' else dtype_

578

579 values = asarray(values, dtype=dtype_)

580

581 self.fp.write(nc_type)

582

583 if values.dtype.char == 'S':

584 nelems = values.itemsize

585 else:

586 nelems = values.size

587 self._pack_int(nelems)

588

589 if not values.shape and (values.dtype.byteorder == '<' or

590 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):

591 values = values.byteswap()

592 self.fp.write(values.tobytes())

593 count = values.size * values.itemsize

594 self.fp.write(b'\x00' * (-count % 4)) # pad

595

596 def _read(self):

597 # Check magic bytes and version

598 magic = self.fp.read(3)

599 if not magic == b'CDF':

600 raise TypeError("Error: %s is not a valid NetCDF 3 file" %

601 self.filename)

602 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]

603

604 # Read file headers and set data.

605 self._read_numrecs()

606 self._read_dim_array()

607 self._read_gatt_array()

608 self._read_var_array()

609

610 def _read_numrecs(self):

611 self.__dict__['_recs'] = self._unpack_int()

612

613 def _read_dim_array(self):

614 header = self.fp.read(4)

615 if header not in [ZERO, NC_DIMENSION]:

616 raise ValueError("Unexpected header.")

617 count = self._unpack_int()

618

619 for dim in range(count):

620 name = self._unpack_string().decode('latin1')

621 length = self._unpack_int() or None # None for record dimension

622 self.dimensions[name] = length

623 self._dims.append(name) # preserve order

624

625 def _read_gatt_array(self):

626 for k, v in self._read_att_array().items():

627 self.__setattr__(k, v)

628

629 def _read_att_array(self):

630 header = self.fp.read(4)

631 if header not in [ZERO, NC_ATTRIBUTE]:

632 raise ValueError("Unexpected header.")

633 count = self._unpack_int()

634

635 attributes = {}

636 for attr in range(count):

637 name = self._unpack_string().decode('latin1')

638 attributes[name] = self._read_att_values()

639 return attributes

640

641 def _read_var_array(self):

642 header = self.fp.read(4)

643 if header not in [ZERO, NC_VARIABLE]:

644 raise ValueError("Unexpected header.")

645

646 begin = 0

647 dtypes = {'names': [], 'formats': []}

648 rec_vars = []

649 count = self._unpack_int()

650 for var in range(count):

651 (name, dimensions, shape, attributes,

652 typecode, size, dtype_, begin_, vsize) = self._read_var()

653 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html

654 # Note that vsize is the product of the dimension lengths

655 # (omitting the record dimension) and the number of bytes

656 # per value (determined from the type), increased to the

657 # next multiple of 4, for each variable. If a record

658 # variable, this is the amount of space per record. The

659 # netCDF "record size" is calculated as the sum of the

660 # vsize's of all the record variables.

661 #

662 # The vsize field is actually redundant, because its value

663 # may be computed from other information in the header. The

664 # 32-bit vsize field is not large enough to contain the size

665 # of variables that require more than 2^32 - 4 bytes, so

666 # 2^32 - 1 is used in the vsize field for such variables.

667 if shape and shape[0] is None: # record variable

668 rec_vars.append(name)

669 # The netCDF "record size" is calculated as the sum of

670 # the vsize's of all the record variables.

671 self.__dict__['_recsize'] += vsize

672 if begin == 0:

673 begin = begin_

674 dtypes['names'].append(name)

675 dtypes['formats'].append(str(shape[1:]) + dtype_)

676

677 # Handle padding with a virtual variable.

678 if typecode in 'bch':

679 actual_size = reduce(mul, (1,) + shape[1:]) * size

680 padding = -actual_size % 4

681 if padding:

682 dtypes['names'].append('_padding_%d' % var)

683 dtypes['formats'].append('(%d,)>b' % padding)

684

685 # Data will be set later.

686 data = None

687 else: # not a record variable

688 # Calculate size to avoid problems with vsize (above)

689 a_size = reduce(mul, shape, 1) * size

690 if self.use_mmap:

691 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)

692 data.shape = shape

693 else:

694 pos = self.fp.tell()

695 self.fp.seek(begin_)

696 data = frombuffer(self.fp.read(a_size), dtype=dtype_

697 ).copy()

698 data.shape = shape

699 self.fp.seek(pos)

700

701 # Add variable.

702 self.variables[name] = netcdf_variable(

703 data, typecode, size, shape, dimensions, attributes,

704 maskandscale=self.maskandscale)

705

706 if rec_vars:

707 # Remove padding when only one record variable.

708 if len(rec_vars) == 1:

709 dtypes['names'] = dtypes['names'][:1]

710 dtypes['formats'] = dtypes['formats'][:1]

711

712 # Build rec array.

713 if self.use_mmap:

714 rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes)

715 rec_array.shape = (self._recs,)

716 else:

717 pos = self.fp.tell()

718 self.fp.seek(begin)

719 rec_array = frombuffer(self.fp.read(self._recs*self._recsize),

720 dtype=dtypes).copy()

721 rec_array.shape = (self._recs,)

722 self.fp.seek(pos)

723

724 for var in rec_vars:

725 self.variables[var].__dict__['data'] = rec_array[var]

726

727 def _read_var(self):

728 name = self._unpack_string().decode('latin1')

729 dimensions = []

730 shape = []

731 dims = self._unpack_int()

732

733 for i in range(dims):

734 dimid = self._unpack_int()

735 dimname = self._dims[dimid]

736 dimensions.append(dimname)

737 dim = self.dimensions[dimname]

738 shape.append(dim)

739 dimensions = tuple(dimensions)

740 shape = tuple(shape)

741

742 attributes = self._read_att_array()

743 nc_type = self.fp.read(4)

744 vsize = self._unpack_int()

745 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()

746

747 typecode, size = TYPEMAP[nc_type]

748 dtype_ = '>%s' % typecode

749

750 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize

751

752 def _read_att_values(self):

753 nc_type = self.fp.read(4)

754 n = self._unpack_int()

755

756 typecode, size = TYPEMAP[nc_type]

757

758 count = n*size

759 values = self.fp.read(int(count))

760 self.fp.read(-count % 4) # read padding

761

762 if typecode != 'c':

763 values = frombuffer(values, dtype='>%s' % typecode).copy()

764 if values.shape == (1,):

765 values = values[0]

766 else:

767 values = values.rstrip(b'\x00')

768 return values

769

770 def _pack_begin(self, begin):

771 if self.version_byte == 1:

772 self._pack_int(begin)

773 elif self.version_byte == 2:

774 self._pack_int64(begin)

775

776 def _pack_int(self, value):

777 self.fp.write(array(value, '>i').tobytes())

778 _pack_int32 = _pack_int

779

780 def _unpack_int(self):

781 return int(frombuffer(self.fp.read(4), '>i')[0])

782 _unpack_int32 = _unpack_int

783

784 def _pack_int64(self, value):

785 self.fp.write(array(value, '>q').tobytes())

786

787 def _unpack_int64(self):

788 return frombuffer(self.fp.read(8), '>q')[0]

789

790 def _pack_string(self, s):

791 count = len(s)

792 self._pack_int(count)

793 self.fp.write(s.encode('latin1'))

794 self.fp.write(b'\x00' * (-count % 4)) # pad

795

796 def _unpack_string(self):

797 count = self._unpack_int()

798 s = self.fp.read(count).rstrip(b'\x00')

799 self.fp.read(-count % 4) # read padding

800 return s

801

802

803class netcdf_variable:

804 """

805 A data object for netcdf files.

806

807 `netcdf_variable` objects are constructed by calling the method

808 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`

809 objects behave much like array objects defined in numpy, except that their

810 data resides in a file. Data is read by indexing and written by assigning

811 to an indexed subset; the entire array can be accessed by the index ``[:]``

812 or (for scalars) by using the methods `getValue` and `assignValue`.

813 `netcdf_variable` objects also have attribute `shape` with the same meaning

814 as for arrays, but the shape cannot be modified. There is another read-only

815 attribute `dimensions`, whose value is the tuple of dimension names.

816

817 All other attributes correspond to variable attributes defined in

818 the NetCDF file. Variable attributes are created by assigning to an

819 attribute of the `netcdf_variable` object.

820

821 Parameters

822 ----------

823 data : array_like

824 The data array that holds the values for the variable.

825 Typically, this is initialized as empty, but with the proper shape.

826 typecode : dtype character code

827 Desired data-type for the data array.

828 size : int

829 Desired element size for the data array.

830 shape : sequence of ints

831 The shape of the array. This should match the lengths of the

832 variable's dimensions.

833 dimensions : sequence of strings

834 The names of the dimensions used by the variable. Must be in the

835 same order of the dimension lengths given by `shape`.

836 attributes : dict, optional

837 Attribute values (any type) keyed by string names. These attributes

838 become attributes for the netcdf_variable object.

839 maskandscale : bool, optional

840 Whether to automatically scale and/or mask data based on attributes.

841 Default is False.

842

843

844 Attributes

845 ----------

846 dimensions : list of str

847 List of names of dimensions used by the variable object.

848 isrec, shape

849 Properties

850

851 See also

852 --------

853 isrec, shape

854

855 """

856 def __init__(self, data, typecode, size, shape, dimensions,

857 attributes=None,

858 maskandscale=False):

859 self.data = data

860 self._typecode = typecode

861 self._size = size

862 self._shape = shape

863 self.dimensions = dimensions

864 self.maskandscale = maskandscale

865

866 self._attributes = attributes or {}

867 for k, v in self._attributes.items():

868 self.__dict__[k] = v

869

870 def __setattr__(self, attr, value):

871 # Store user defined attributes in a separate dict,

872 # so we can save them to file later.

873 try:

874 self._attributes[attr] = value

875 except AttributeError:

876 pass

877 self.__dict__[attr] = value

878

879 def isrec(self):

880 """Returns whether the variable has a record dimension or not.

881

882 A record dimension is a dimension along which additional data could be

883 easily appended in the netcdf data structure without much rewriting of

884 the data file. This attribute is a read-only property of the

885 `netcdf_variable`.

886

887 """

888 return bool(self.data.shape) and not self._shape[0]

889 isrec = property(isrec)

890

891 def shape(self):

892 """Returns the shape tuple of the data variable.

893

894 This is a read-only attribute and can not be modified in the

895 same manner of other numpy arrays.

896 """

897 return self.data.shape

898 shape = property(shape)

899

900 def getValue(self):

901 """

902 Retrieve a scalar value from a `netcdf_variable` of length one.

903

904 Raises

905 ------

906 ValueError

907 If the netcdf variable is an array of length greater than one,

908 this exception will be raised.

909

910 """

911 return self.data.item()

912

913 def assignValue(self, value):

914 """

915 Assign a scalar value to a `netcdf_variable` of length one.

916

917 Parameters

918 ----------

919 value : scalar

920 Scalar value (of compatible type) to assign to a length-one netcdf

921 variable. This value will be written to file.

922

923 Raises

924 ------

925 ValueError

926 If the input is not a scalar, or if the destination is not a length-one

927 netcdf variable.

928

929 """

930 if not self.data.flags.writeable:

931 # Work-around for a bug in NumPy. Calling itemset() on a read-only

932 # memory-mapped array causes a seg. fault.

933 # See NumPy ticket #1622, and SciPy ticket #1202.

934 # This check for `writeable` can be removed when the oldest version

935 # of NumPy still supported by scipy contains the fix for #1622.

936 raise RuntimeError("variable is not writeable")

937

938 self.data[:] = value

939

940 def typecode(self):

941 """

942 Return the typecode of the variable.

943

944 Returns

945 -------

946 typecode : char

947 The character typecode of the variable (e.g., 'i' for int).

948

949 """

950 return self._typecode

951

952 def itemsize(self):

953 """

954 Return the itemsize of the variable.

955

956 Returns

957 -------

958 itemsize : int

959 The element size of the variable (e.g., 8 for float64).

960

961 """

962 return self._size

963

964 def __getitem__(self, index):

965 if not self.maskandscale:

966 return self.data[index]

967

968 data = self.data[index].copy()

969 missing_value = self._get_missing_value()

970 data = self._apply_missing_value(data, missing_value)

971 scale_factor = self._attributes.get('scale_factor')

972 add_offset = self._attributes.get('add_offset')

973 if add_offset is not None or scale_factor is not None:

974 data = data.astype(np.float64)

975 if scale_factor is not None:

976 data = data * scale_factor

977 if add_offset is not None:

978 data += add_offset

979

980 return data

981

982 def __setitem__(self, index, data):

983 if self.maskandscale:

984 missing_value = (

985 self._get_missing_value() or

986 getattr(data, 'fill_value', 999999))

987 self._attributes.setdefault('missing_value', missing_value)

988 self._attributes.setdefault('_FillValue', missing_value)

989 data = ((data - self._attributes.get('add_offset', 0.0)) /

990 self._attributes.get('scale_factor', 1.0))

991 data = np.ma.asarray(data).filled(missing_value)

992 if self._typecode not in 'fd' and data.dtype.kind == 'f':

993 data = np.round(data)

994

995 # Expand data for record vars?

996 if self.isrec:

997 if isinstance(index, tuple):

998 rec_index = index[0]

999 else:

1000 rec_index = index

1001 if isinstance(rec_index, slice):

1002 recs = (rec_index.start or 0) + len(data)

1003 else:

1004 recs = rec_index + 1

1005 if recs > len(self.data):

1006 shape = (recs,) + self._shape[1:]

1007 # Resize in-place does not always work since

1008 # the array might not be single-segment

1009 try:

1010 self.data.resize(shape)

1011 except ValueError:

1012 self.__dict__['data'] = np.resize(self.data, shape).astype(self.data.dtype)

1013 self.data[index] = data

1014

1015 def _default_encoded_fill_value(self):

1016 """

1017 The default encoded fill-value for this Variable's data type.

1018 """

1019 nc_type = REVERSE[self.typecode(), self.itemsize()]

1020 return FILLMAP[nc_type]

1021

1022 def _get_encoded_fill_value(self):

1023 """

1024 Returns the encoded fill value for this variable as bytes.

1025

1026 This is taken from either the _FillValue attribute, or the default fill

1027 value for this variable's data type.

1028 """

1029 if '_FillValue' in self._attributes:

1030 fill_value = np.array(self._attributes['_FillValue'],

1031 dtype=self.data.dtype).tobytes()

1032 if len(fill_value) == self.itemsize():

1033 return fill_value

1034 else:

1035 return self._default_encoded_fill_value()

1036 else:

1037 return self._default_encoded_fill_value()

1038

1039 def _get_missing_value(self):

1040 """

1041 Returns the value denoting "no data" for this variable.

1042

1043 If this variable does not have a missing/fill value, returns None.

1044

1045 If both _FillValue and missing_value are given, give precedence to

1046 _FillValue. The netCDF standard gives special meaning to _FillValue;

1047 missing_value is just used for compatibility with old datasets.

1048 """

1049

1050 if '_FillValue' in self._attributes:

1051 missing_value = self._attributes['_FillValue']

1052 elif 'missing_value' in self._attributes:

1053 missing_value = self._attributes['missing_value']

1054 else:

1055 missing_value = None

1056

1057 return missing_value

1058

1059 @staticmethod

1060 def _apply_missing_value(data, missing_value):

1061 """

1062 Applies the given missing value to the data array.

1063

1064 Returns a numpy.ma array, with any value equal to missing_value masked

1065 out (unless missing_value is None, in which case the original array is

1066 returned).

1067 """

1068

1069 if missing_value is None:

1070 newdata = data

1071 else:

1072 try:

1073 missing_value_isnan = np.isnan(missing_value)

1074 except (TypeError, NotImplementedError):

1075 # some data types (e.g., characters) cannot be tested for NaN

1076 missing_value_isnan = False

1077

1078 if missing_value_isnan:

1079 mymask = np.isnan(data)

1080 else:

1081 mymask = (data == missing_value)

1082

1083 newdata = np.ma.masked_where(mymask, data)

1084

1085 return newdata

1086

1087

1088NetCDFFile = netcdf_file

1089NetCDFVariable = netcdf_variable

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 19%

481 statements