Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/

1"""

2NetCDF reader/writer module.

4This module is used to read and create NetCDF files. NetCDF files are

5accessed through the `netcdf_file` object. Data written to and from NetCDF

6files are contained in `netcdf_variable` objects. Attributes are given

7as member variables of the `netcdf_file` and `netcdf_variable` objects.

9This module implements the Scientific.IO.NetCDF API to read and create

10NetCDF files. The same API is also used in the PyNIO and pynetcdf

11modules, allowing these modules to be used interchangeably when working

12with NetCDF files.

14Only NetCDF3 is supported here; for NetCDF4 see

15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,

16which has a similar API.

18"""

20# TODO:

21# * properly implement ``_FillValue``.

22# * fix character variables.

23# * implement PAGESIZE for Python 2.6?

25# The Scientific.IO.NetCDF API allows attributes to be added directly to

26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate

27# between user-set attributes and instance attributes, user-set attributes

28# are automatically stored in the ``_attributes`` attribute by overloading

29#``__setattr__``. This is the reason why the code sometimes uses

30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;

31# otherwise the key would be inserted into userspace attributes.

34__all__ = ['netcdf_file', 'netcdf_variable']

37import warnings

38import weakref

39from operator import mul

40from platform import python_implementation

42import mmap as mm

44import numpy as np

45from numpy import frombuffer, dtype, empty, array, asarray

46from numpy import little_endian as LITTLE_ENDIAN

47from functools import reduce

50IS_PYPY = python_implementation() == 'PyPy'

52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'

53ZERO = b'\x00\x00\x00\x00'

54NC_BYTE = b'\x00\x00\x00\x01'

55NC_CHAR = b'\x00\x00\x00\x02'

56NC_SHORT = b'\x00\x00\x00\x03'

57NC_INT = b'\x00\x00\x00\x04'

58NC_FLOAT = b'\x00\x00\x00\x05'

59NC_DOUBLE = b'\x00\x00\x00\x06'

60NC_DIMENSION = b'\x00\x00\x00\n'

61NC_VARIABLE = b'\x00\x00\x00\x0b'

62NC_ATTRIBUTE = b'\x00\x00\x00\x0c'

63FILL_BYTE = b'\x81'

64FILL_CHAR = b'\x00'

65FILL_SHORT = b'\x80\x01'

66FILL_INT = b'\x80\x00\x00\x01'

67FILL_FLOAT = b'\x7C\xF0\x00\x00'

68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'

70TYPEMAP = {NC_BYTE: ('b', 1),

71 NC_CHAR: ('c', 1),

72 NC_SHORT: ('h', 2),

73 NC_INT: ('i', 4),

74 NC_FLOAT: ('f', 4),

75 NC_DOUBLE: ('d', 8)}

77FILLMAP = {NC_BYTE: FILL_BYTE,

78 NC_CHAR: FILL_CHAR,

79 NC_SHORT: FILL_SHORT,

80 NC_INT: FILL_INT,

81 NC_FLOAT: FILL_FLOAT,

82 NC_DOUBLE: FILL_DOUBLE}

84REVERSE = {('b', 1): NC_BYTE,

85 ('B', 1): NC_CHAR,

86 ('c', 1): NC_CHAR,

87 ('h', 2): NC_SHORT,

88 ('i', 4): NC_INT,

89 ('f', 4): NC_FLOAT,

90 ('d', 8): NC_DOUBLE,

92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char,

93 # used when getting the types from generic attributes.

94 ('l', 4): NC_INT,

95 ('S', 1): NC_CHAR}

98class netcdf_file:

99 """

100 A file object for NetCDF data.

101

102 A `netcdf_file` object has two standard attributes: `dimensions` and

103 `variables`. The values of both are dictionaries, mapping dimension

104 names to their associated lengths and variable names to variables,

105 respectively. Application programs should never modify these

106 dictionaries.

107

108 All other attributes correspond to global attributes defined in the

109 NetCDF file. Global file attributes are created by assigning to an

110 attribute of the `netcdf_file` object.

111

112 Parameters

113 ----------

114 filename : string or file-like

115 string -> filename

116 mode : {'r', 'w', 'a'}, optional

117 read-write-append mode, default is 'r'

118 mmap : None or bool, optional

119 Whether to mmap `filename` when reading. Default is True

120 when `filename` is a file name, False when `filename` is a

121 file-like object. Note that when mmap is in use, data arrays

122 returned refer directly to the mmapped data on disk, and the

123 file cannot be closed as long as references to it exist.

124 version : {1, 2}, optional

125 version of netcdf to read / write, where 1 means *Classic

126 format* and 2 means *64-bit offset format*. Default is 1. See

127 `here <https://docs.unidata.ucar.edu/nug/current/netcdf_introduction.html#select_format>`__

128 for more info.

129 maskandscale : bool, optional

130 Whether to automatically scale and/or mask data based on attributes.

131 Default is False.

132

133 Notes

134 -----

135 The major advantage of this module over other modules is that it doesn't

136 require the code to be linked to the NetCDF libraries. This module is

137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.

138

139 NetCDF files are a self-describing binary data format. The file contains

140 metadata that describes the dimensions and variables in the file. More

141 details about NetCDF files can be found `here

142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There

143 are three main sections to a NetCDF data structure:

144

145 1. Dimensions

146 2. Variables

147 3. Attributes

148

149 The dimensions section records the name and length of each dimension used

150 by the variables. The variables would then indicate which dimensions it

151 uses and any attributes such as data units, along with containing the data

152 values for the variable. It is good practice to include a

153 variable that is the same name as a dimension to provide the values for

154 that axes. Lastly, the attributes section would contain additional

155 information such as the name of the file creator or the instrument used to

156 collect the data.

157

158 When writing data to a NetCDF file, there is often the need to indicate the

159 'record dimension'. A record dimension is the unbounded dimension for a

160 variable. For example, a temperature variable may have dimensions of

161 latitude, longitude and time. If one wants to add more temperature data to

162 the NetCDF file as time progresses, then the temperature variable should

163 have the time dimension flagged as the record dimension.

164

165 In addition, the NetCDF file header contains the position of the data in

166 the file, so access can be done in an efficient manner without loading

167 unnecessary data into memory. It uses the ``mmap`` module to create

168 Numpy arrays mapped to the data on disk, for the same purpose.

169

170 Note that when `netcdf_file` is used to open a file with mmap=True

171 (default for read-only), arrays returned by it refer to data

172 directly on the disk. The file should not be closed, and cannot be cleanly

173 closed when asked, if such arrays are alive. You may want to copy data arrays

174 obtained from mmapped Netcdf file if they are to be processed after the file

175 is closed, see the example below.

176

177 Examples

178 --------

179 To create a NetCDF file:

180

181 >>> from scipy.io import netcdf_file

182 >>> import numpy as np

183 >>> f = netcdf_file('simple.nc', 'w')

184 >>> f.history = 'Created for a test'

185 >>> f.createDimension('time', 10)

186 >>> time = f.createVariable('time', 'i', ('time',))

187 >>> time[:] = np.arange(10)

188 >>> time.units = 'days since 2008-01-01'

189 >>> f.close()

190

191 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice

192 of the time variable allows for the data to be set in the object, rather

193 than letting ``arange(10)`` overwrite the ``time`` variable.

194

195 To read the NetCDF file we just created:

196

197 >>> from scipy.io import netcdf_file

198 >>> f = netcdf_file('simple.nc', 'r')

199 >>> print(f.history)

200 b'Created for a test'

201 >>> time = f.variables['time']

202 >>> print(time.units)

203 b'days since 2008-01-01'

204 >>> print(time.shape)

205 (10,)

206 >>> print(time[-1])

207 9

208

209 NetCDF files, when opened read-only, return arrays that refer

210 directly to memory-mapped data on disk:

211

212 >>> data = time[:]

213

214 If the data is to be processed after the file is closed, it needs

215 to be copied to main memory:

216

217 >>> data = time[:].copy()

218 >>> del time

219 >>> f.close()

220 >>> data.mean()

221 4.5

222

223 A NetCDF file can also be used as context manager:

224

225 >>> from scipy.io import netcdf_file

226 >>> with netcdf_file('simple.nc', 'r') as f:

227 ... print(f.history)

228 b'Created for a test'

229

230 """

231 def __init__(self, filename, mode='r', mmap=None, version=1,

232 maskandscale=False):

233 """Initialize netcdf_file from fileobj (str or file-like)."""

234 if mode not in 'rwa':

235 raise ValueError("Mode must be either 'r', 'w' or 'a'.")

236

237 if hasattr(filename, 'seek'): # file-like

238 self.fp = filename

239 self.filename = 'None'

240 if mmap is None:

241 mmap = False

242 elif mmap and not hasattr(filename, 'fileno'):

243 raise ValueError('Cannot use file object for mmap')

244 else: # maybe it's a string

245 self.filename = filename

246 omode = 'r+' if mode == 'a' else mode

247 self.fp = open(self.filename, '%sb' % omode)

248 if mmap is None:

249 # Mmapped files on PyPy cannot be usually closed

250 # before the GC runs, so it's better to use mmap=False

251 # as the default.

252 mmap = (not IS_PYPY)

253

254 if mode != 'r':

255 # Cannot read write-only files

256 mmap = False

257

258 self.use_mmap = mmap

259 self.mode = mode

260 self.version_byte = version

261 self.maskandscale = maskandscale

262

263 self.dimensions = {}

264 self.variables = {}

265

266 self._dims = []

267 self._recs = 0

268 self._recsize = 0

269

270 self._mm = None

271 self._mm_buf = None

272 if self.use_mmap:

273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)

274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)

275

276 self._attributes = {}

277

278 if mode in 'ra':

279 self._read()

280

281 def __setattr__(self, attr, value):

282 # Store user defined attributes in a separate dict,

283 # so we can save them to file later.

284 try:

285 self._attributes[attr] = value

286 except AttributeError:

287 pass

288 self.__dict__[attr] = value

289

290 def close(self):

291 """Closes the NetCDF file."""

292 if hasattr(self, 'fp') and not self.fp.closed:

293 try:

294 self.flush()

295 finally:

296 self.variables = {}

297 if self._mm_buf is not None:

298 ref = weakref.ref(self._mm_buf)

299 self._mm_buf = None

300 if ref() is None:

301 # self._mm_buf is gc'd, and we can close the mmap

302 self._mm.close()

303 else:

304 # we cannot close self._mm, since self._mm_buf is

305 # alive and there may still be arrays referring to it

306 warnings.warn(

307 "Cannot close a netcdf_file opened with mmap=True, when "

308 "netcdf_variables or arrays referring to its data still "

309 "exist. All data arrays obtained from such files refer "

310 "directly to data on disk, and must be copied before the "

311 "file can be cleanly closed. "

312 "(See netcdf_file docstring for more information on mmap.)",

313 category=RuntimeWarning, stacklevel=2,

314 )

315 self._mm = None

316 self.fp.close()

317 __del__ = close

318

319 def __enter__(self):

320 return self

321

322 def __exit__(self, type, value, traceback):

323 self.close()

324

325 def createDimension(self, name, length):

326 """

327 Adds a dimension to the Dimension section of the NetCDF data structure.

328

329 Note that this function merely adds a new dimension that the variables can

330 reference. The values for the dimension, if desired, should be added as

331 a variable using `createVariable`, referring to this dimension.

332

333 Parameters

334 ----------

335 name : str

336 Name of the dimension (Eg, 'lat' or 'time').

337 length : int

338 Length of the dimension.

339

340 See Also

341 --------

342 createVariable

343

344 """

345 if length is None and self._dims:

346 raise ValueError("Only first dimension may be unlimited!")

347

348 self.dimensions[name] = length

349 self._dims.append(name)

350

351 def createVariable(self, name, type, dimensions):

352 """

353 Create an empty variable for the `netcdf_file` object, specifying its data

354 type and the dimensions it uses.

355

356 Parameters

357 ----------

358 name : str

359 Name of the new variable.

360 type : dtype or str

361 Data type of the variable.

362 dimensions : sequence of str

363 List of the dimension names used by the variable, in the desired order.

364

365 Returns

366 -------

367 variable : netcdf_variable

368 The newly created ``netcdf_variable`` object.

369 This object has also been added to the `netcdf_file` object as well.

370

371 See Also

372 --------

373 createDimension

374

375 Notes

376 -----

377 Any dimensions to be used by the variable should already exist in the

378 NetCDF data structure or should be created by `createDimension` prior to

379 creating the NetCDF variable.

380

381 """

382 shape = tuple([self.dimensions[dim] for dim in dimensions])

383 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy

384

385 type = dtype(type)

386 typecode, size = type.char, type.itemsize

387 if (typecode, size) not in REVERSE:

388 raise ValueError("NetCDF 3 does not support type %s" % type)

389

390 # convert to big endian always for NetCDF 3

391 data = empty(shape_, dtype=type.newbyteorder("B"))

392 self.variables[name] = netcdf_variable(

393 data, typecode, size, shape, dimensions,

394 maskandscale=self.maskandscale)

395 return self.variables[name]

396

397 def flush(self):

398 """

399 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.

400

401 See Also

402 --------

403 sync : Identical function

404

405 """

406 if hasattr(self, 'mode') and self.mode in 'wa':

407 self._write()

408 sync = flush

409

410 def _write(self):

411 self.fp.seek(0)

412 self.fp.write(b'CDF')

413 self.fp.write(array(self.version_byte, '>b').tobytes())

414

415 # Write headers and data.

416 self._write_numrecs()

417 self._write_dim_array()

418 self._write_gatt_array()

419 self._write_var_array()

420

421 def _write_numrecs(self):

422 # Get highest record count from all record variables.

423 for var in self.variables.values():

424 if var.isrec and len(var.data) > self._recs:

425 self.__dict__['_recs'] = len(var.data)

426 self._pack_int(self._recs)

427

428 def _write_dim_array(self):

429 if self.dimensions:

430 self.fp.write(NC_DIMENSION)

431 self._pack_int(len(self.dimensions))

432 for name in self._dims:

433 self._pack_string(name)

434 length = self.dimensions[name]

435 self._pack_int(length or 0) # replace None with 0 for record dimension

436 else:

437 self.fp.write(ABSENT)

438

439 def _write_gatt_array(self):

440 self._write_att_array(self._attributes)

441

442 def _write_att_array(self, attributes):

443 if attributes:

444 self.fp.write(NC_ATTRIBUTE)

445 self._pack_int(len(attributes))

446 for name, values in attributes.items():

447 self._pack_string(name)

448 self._write_att_values(values)

449 else:

450 self.fp.write(ABSENT)

451

452 def _write_var_array(self):

453 if self.variables:

454 self.fp.write(NC_VARIABLE)

455 self._pack_int(len(self.variables))

456

457 # Sort variable names non-recs first, then recs.

458 def sortkey(n):

459 v = self.variables[n]

460 if v.isrec:

461 return (-1,)

462 return v._shape

463 variables = sorted(self.variables, key=sortkey, reverse=True)

464

465 # Set the metadata for all variables.

466 for name in variables:

467 self._write_var_metadata(name)

468 # Now that we have the metadata, we know the vsize of

469 # each record variable, so we can calculate recsize.

470 self.__dict__['_recsize'] = sum([

471 var._vsize for var in self.variables.values()

472 if var.isrec])

473 # Set the data for all variables.

474 for name in variables:

475 self._write_var_data(name)

476 else:

477 self.fp.write(ABSENT)

478

479 def _write_var_metadata(self, name):

480 var = self.variables[name]

481

482 self._pack_string(name)

483 self._pack_int(len(var.dimensions))

484 for dimname in var.dimensions:

485 dimid = self._dims.index(dimname)

486 self._pack_int(dimid)

487

488 self._write_att_array(var._attributes)

489

490 nc_type = REVERSE[var.typecode(), var.itemsize()]

491 self.fp.write(nc_type)

492

493 if not var.isrec:

494 vsize = var.data.size * var.data.itemsize

495 vsize += -vsize % 4

496 else: # record variable

497 try:

498 vsize = var.data[0].size * var.data.itemsize

499 except IndexError:

500 vsize = 0

501 rec_vars = len([v for v in self.variables.values()

502 if v.isrec])

503 if rec_vars > 1:

504 vsize += -vsize % 4

505 self.variables[name].__dict__['_vsize'] = vsize

506 self._pack_int(vsize)

507

508 # Pack a bogus begin, and set the real value later.

509 self.variables[name].__dict__['_begin'] = self.fp.tell()

510 self._pack_begin(0)

511

512 def _write_var_data(self, name):

513 var = self.variables[name]

514

515 # Set begin in file header.

516 the_beguine = self.fp.tell()

517 self.fp.seek(var._begin)

518 self._pack_begin(the_beguine)

519 self.fp.seek(the_beguine)

520

521 # Write data.

522 if not var.isrec:

523 self.fp.write(var.data.tobytes())

524 count = var.data.size * var.data.itemsize

525 self._write_var_padding(var, var._vsize - count)

526 else: # record variable

527 # Handle rec vars with shape[0] < nrecs.

528 if self._recs > len(var.data):

529 shape = (self._recs,) + var.data.shape[1:]

530 # Resize in-place does not always work since

531 # the array might not be single-segment

532 try:

533 var.data.resize(shape)

534 except ValueError:

535 dtype = var.data.dtype

536 var.__dict__['data'] = np.resize(var.data, shape).astype(dtype)

537

538 pos0 = pos = self.fp.tell()

539 for rec in var.data:

540 # Apparently scalars cannot be converted to big endian. If we

541 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype

542 # will remain as ``=i4``.

543 if not rec.shape and (rec.dtype.byteorder == '<' or

544 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):

545 rec = rec.byteswap()

546 self.fp.write(rec.tobytes())

547 # Padding

548 count = rec.size * rec.itemsize

549 self._write_var_padding(var, var._vsize - count)

550 pos += self._recsize

551 self.fp.seek(pos)

552 self.fp.seek(pos0 + var._vsize)

553

554 def _write_var_padding(self, var, size):

555 encoded_fill_value = var._get_encoded_fill_value()

556 num_fills = size // len(encoded_fill_value)

557 self.fp.write(encoded_fill_value * num_fills)

558

559 def _write_att_values(self, values):

560 if hasattr(values, 'dtype'):

561 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]

562 else:

563 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]

564

565 # bytes index into scalars in py3k. Check for "string" types

566 if isinstance(values, (str, bytes)):

567 sample = values

568 else:

569 try:

570 sample = values[0] # subscriptable?

571 except TypeError:

572 sample = values # scalar

573

574 for class_, nc_type in types:

575 if isinstance(sample, class_):

576 break

577

578 typecode, size = TYPEMAP[nc_type]

579 dtype_ = '>%s' % typecode

580 # asarray() dies with bytes and '>c' in py3k. Change to 'S'

581 dtype_ = 'S' if dtype_ == '>c' else dtype_

582

583 values = asarray(values, dtype=dtype_)

584

585 self.fp.write(nc_type)

586

587 if values.dtype.char == 'S':

588 nelems = values.itemsize

589 else:

590 nelems = values.size

591 self._pack_int(nelems)

592

593 if not values.shape and (values.dtype.byteorder == '<' or

594 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):

595 values = values.byteswap()

596 self.fp.write(values.tobytes())

597 count = values.size * values.itemsize

598 self.fp.write(b'\x00' * (-count % 4)) # pad

599

600 def _read(self):

601 # Check magic bytes and version

602 magic = self.fp.read(3)

603 if not magic == b'CDF':

604 raise TypeError("Error: %s is not a valid NetCDF 3 file" %

605 self.filename)

606 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]

607

608 # Read file headers and set data.

609 self._read_numrecs()

610 self._read_dim_array()

611 self._read_gatt_array()

612 self._read_var_array()

613

614 def _read_numrecs(self):

615 self.__dict__['_recs'] = self._unpack_int()

616

617 def _read_dim_array(self):

618 header = self.fp.read(4)

619 if header not in [ZERO, NC_DIMENSION]:

620 raise ValueError("Unexpected header.")

621 count = self._unpack_int()

622

623 for dim in range(count):

624 name = self._unpack_string().decode('latin1')

625 length = self._unpack_int() or None # None for record dimension

626 self.dimensions[name] = length

627 self._dims.append(name) # preserve order

628

629 def _read_gatt_array(self):

630 for k, v in self._read_att_array().items():

631 self.__setattr__(k, v)

632

633 def _read_att_array(self):

634 header = self.fp.read(4)

635 if header not in [ZERO, NC_ATTRIBUTE]:

636 raise ValueError("Unexpected header.")

637 count = self._unpack_int()

638

639 attributes = {}

640 for attr in range(count):

641 name = self._unpack_string().decode('latin1')

642 attributes[name] = self._read_att_values()

643 return attributes

644

645 def _read_var_array(self):

646 header = self.fp.read(4)

647 if header not in [ZERO, NC_VARIABLE]:

648 raise ValueError("Unexpected header.")

649

650 begin = 0

651 dtypes = {'names': [], 'formats': []}

652 rec_vars = []

653 count = self._unpack_int()

654 for var in range(count):

655 (name, dimensions, shape, attributes,

656 typecode, size, dtype_, begin_, vsize) = self._read_var()

657 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html

658 # Note that vsize is the product of the dimension lengths

659 # (omitting the record dimension) and the number of bytes

660 # per value (determined from the type), increased to the

661 # next multiple of 4, for each variable. If a record

662 # variable, this is the amount of space per record. The

663 # netCDF "record size" is calculated as the sum of the

664 # vsize's of all the record variables.

665 #

666 # The vsize field is actually redundant, because its value

667 # may be computed from other information in the header. The

668 # 32-bit vsize field is not large enough to contain the size

669 # of variables that require more than 2^32 - 4 bytes, so

670 # 2^32 - 1 is used in the vsize field for such variables.

671 if shape and shape[0] is None: # record variable

672 rec_vars.append(name)

673 # The netCDF "record size" is calculated as the sum of

674 # the vsize's of all the record variables.

675 self.__dict__['_recsize'] += vsize

676 if begin == 0:

677 begin = begin_

678 dtypes['names'].append(name)

679 dtypes['formats'].append(str(shape[1:]) + dtype_)

680

681 # Handle padding with a virtual variable.

682 if typecode in 'bch':

683 actual_size = reduce(mul, (1,) + shape[1:]) * size

684 padding = -actual_size % 4

685 if padding:

686 dtypes['names'].append('_padding_%d' % var)

687 dtypes['formats'].append('(%d,)>b' % padding)

688

689 # Data will be set later.

690 data = None

691 else: # not a record variable

692 # Calculate size to avoid problems with vsize (above)

693 a_size = reduce(mul, shape, 1) * size

694 if self.use_mmap:

695 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)

696 data.shape = shape

697 else:

698 pos = self.fp.tell()

699 self.fp.seek(begin_)

700 data = frombuffer(self.fp.read(a_size), dtype=dtype_

701 ).copy()

702 data.shape = shape

703 self.fp.seek(pos)

704

705 # Add variable.

706 self.variables[name] = netcdf_variable(

707 data, typecode, size, shape, dimensions, attributes,

708 maskandscale=self.maskandscale)

709

710 if rec_vars:

711 # Remove padding when only one record variable.

712 if len(rec_vars) == 1:

713 dtypes['names'] = dtypes['names'][:1]

714 dtypes['formats'] = dtypes['formats'][:1]

715

716 # Build rec array.

717 if self.use_mmap:

718 buf = self._mm_buf[begin:begin+self._recs*self._recsize]

719 rec_array = buf.view(dtype=dtypes)

720 rec_array.shape = (self._recs,)

721 else:

722 pos = self.fp.tell()

723 self.fp.seek(begin)

724 rec_array = frombuffer(self.fp.read(self._recs*self._recsize),

725 dtype=dtypes).copy()

726 rec_array.shape = (self._recs,)

727 self.fp.seek(pos)

728

729 for var in rec_vars:

730 self.variables[var].__dict__['data'] = rec_array[var]

731

732 def _read_var(self):

733 name = self._unpack_string().decode('latin1')

734 dimensions = []

735 shape = []

736 dims = self._unpack_int()

737

738 for i in range(dims):

739 dimid = self._unpack_int()

740 dimname = self._dims[dimid]

741 dimensions.append(dimname)

742 dim = self.dimensions[dimname]

743 shape.append(dim)

744 dimensions = tuple(dimensions)

745 shape = tuple(shape)

746

747 attributes = self._read_att_array()

748 nc_type = self.fp.read(4)

749 vsize = self._unpack_int()

750 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()

751

752 typecode, size = TYPEMAP[nc_type]

753 dtype_ = '>%s' % typecode

754

755 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize

756

757 def _read_att_values(self):

758 nc_type = self.fp.read(4)

759 n = self._unpack_int()

760

761 typecode, size = TYPEMAP[nc_type]

762

763 count = n*size

764 values = self.fp.read(int(count))

765 self.fp.read(-count % 4) # read padding

766

767 if typecode != 'c':

768 values = frombuffer(values, dtype='>%s' % typecode).copy()

769 if values.shape == (1,):

770 values = values[0]

771 else:

772 values = values.rstrip(b'\x00')

773 return values

774

775 def _pack_begin(self, begin):

776 if self.version_byte == 1:

777 self._pack_int(begin)

778 elif self.version_byte == 2:

779 self._pack_int64(begin)

780

781 def _pack_int(self, value):

782 self.fp.write(array(value, '>i').tobytes())

783 _pack_int32 = _pack_int

784

785 def _unpack_int(self):

786 return int(frombuffer(self.fp.read(4), '>i')[0])

787 _unpack_int32 = _unpack_int

788

789 def _pack_int64(self, value):

790 self.fp.write(array(value, '>q').tobytes())

791

792 def _unpack_int64(self):

793 return frombuffer(self.fp.read(8), '>q')[0]

794

795 def _pack_string(self, s):

796 count = len(s)

797 self._pack_int(count)

798 self.fp.write(s.encode('latin1'))

799 self.fp.write(b'\x00' * (-count % 4)) # pad

800

801 def _unpack_string(self):

802 count = self._unpack_int()

803 s = self.fp.read(count).rstrip(b'\x00')

804 self.fp.read(-count % 4) # read padding

805 return s

806

807

808class netcdf_variable:

809 """

810 A data object for netcdf files.

811

812 `netcdf_variable` objects are constructed by calling the method

813 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`

814 objects behave much like array objects defined in numpy, except that their

815 data resides in a file. Data is read by indexing and written by assigning

816 to an indexed subset; the entire array can be accessed by the index ``[:]``

817 or (for scalars) by using the methods `getValue` and `assignValue`.

818 `netcdf_variable` objects also have attribute `shape` with the same meaning

819 as for arrays, but the shape cannot be modified. There is another read-only

820 attribute `dimensions`, whose value is the tuple of dimension names.

821

822 All other attributes correspond to variable attributes defined in

823 the NetCDF file. Variable attributes are created by assigning to an

824 attribute of the `netcdf_variable` object.

825

826 Parameters

827 ----------

828 data : array_like

829 The data array that holds the values for the variable.

830 Typically, this is initialized as empty, but with the proper shape.

831 typecode : dtype character code

832 Desired data-type for the data array.

833 size : int

834 Desired element size for the data array.

835 shape : sequence of ints

836 The shape of the array. This should match the lengths of the

837 variable's dimensions.

838 dimensions : sequence of strings

839 The names of the dimensions used by the variable. Must be in the

840 same order of the dimension lengths given by `shape`.

841 attributes : dict, optional

842 Attribute values (any type) keyed by string names. These attributes

843 become attributes for the netcdf_variable object.

844 maskandscale : bool, optional

845 Whether to automatically scale and/or mask data based on attributes.

846 Default is False.

847

848

849 Attributes

850 ----------

851 dimensions : list of str

852 List of names of dimensions used by the variable object.

853 isrec, shape

854 Properties

855

856 See also

857 --------

858 isrec, shape

859

860 """

861 def __init__(self, data, typecode, size, shape, dimensions,

862 attributes=None,

863 maskandscale=False):

864 self.data = data

865 self._typecode = typecode

866 self._size = size

867 self._shape = shape

868 self.dimensions = dimensions

869 self.maskandscale = maskandscale

870

871 self._attributes = attributes or {}

872 for k, v in self._attributes.items():

873 self.__dict__[k] = v

874

875 def __setattr__(self, attr, value):

876 # Store user defined attributes in a separate dict,

877 # so we can save them to file later.

878 try:

879 self._attributes[attr] = value

880 except AttributeError:

881 pass

882 self.__dict__[attr] = value

883

884 def isrec(self):

885 """Returns whether the variable has a record dimension or not.

886

887 A record dimension is a dimension along which additional data could be

888 easily appended in the netcdf data structure without much rewriting of

889 the data file. This attribute is a read-only property of the

890 `netcdf_variable`.

891

892 """

893 return bool(self.data.shape) and not self._shape[0]

894 isrec = property(isrec)

895

896 def shape(self):

897 """Returns the shape tuple of the data variable.

898

899 This is a read-only attribute and can not be modified in the

900 same manner of other numpy arrays.

901 """

902 return self.data.shape

903 shape = property(shape)

904

905 def getValue(self):

906 """

907 Retrieve a scalar value from a `netcdf_variable` of length one.

908

909 Raises

910 ------

911 ValueError

912 If the netcdf variable is an array of length greater than one,

913 this exception will be raised.

914

915 """

916 return self.data.item()

917

918 def assignValue(self, value):

919 """

920 Assign a scalar value to a `netcdf_variable` of length one.

921

922 Parameters

923 ----------

924 value : scalar

925 Scalar value (of compatible type) to assign to a length-one netcdf

926 variable. This value will be written to file.

927

928 Raises

929 ------

930 ValueError

931 If the input is not a scalar, or if the destination is not a length-one

932 netcdf variable.

933

934 """

935 if not self.data.flags.writeable:

936 # Work-around for a bug in NumPy. Calling itemset() on a read-only

937 # memory-mapped array causes a seg. fault.

938 # See NumPy ticket #1622, and SciPy ticket #1202.

939 # This check for `writeable` can be removed when the oldest version

940 # of NumPy still supported by scipy contains the fix for #1622.

941 raise RuntimeError("variable is not writeable")

942

943 self.data[:] = value

944

945 def typecode(self):

946 """

947 Return the typecode of the variable.

948

949 Returns

950 -------

951 typecode : char

952 The character typecode of the variable (e.g., 'i' for int).

953

954 """

955 return self._typecode

956

957 def itemsize(self):

958 """

959 Return the itemsize of the variable.

960

961 Returns

962 -------

963 itemsize : int

964 The element size of the variable (e.g., 8 for float64).

965

966 """

967 return self._size

968

969 def __getitem__(self, index):

970 if not self.maskandscale:

971 return self.data[index]

972

973 data = self.data[index].copy()

974 missing_value = self._get_missing_value()

975 data = self._apply_missing_value(data, missing_value)

976 scale_factor = self._attributes.get('scale_factor')

977 add_offset = self._attributes.get('add_offset')

978 if add_offset is not None or scale_factor is not None:

979 data = data.astype(np.float64)

980 if scale_factor is not None:

981 data = data * scale_factor

982 if add_offset is not None:

983 data += add_offset

984

985 return data

986

987 def __setitem__(self, index, data):

988 if self.maskandscale:

989 missing_value = (

990 self._get_missing_value() or

991 getattr(data, 'fill_value', 999999))

992 self._attributes.setdefault('missing_value', missing_value)

993 self._attributes.setdefault('_FillValue', missing_value)

994 data = ((data - self._attributes.get('add_offset', 0.0)) /

995 self._attributes.get('scale_factor', 1.0))

996 data = np.ma.asarray(data).filled(missing_value)

997 if self._typecode not in 'fd' and data.dtype.kind == 'f':

998 data = np.round(data)

999

1000 # Expand data for record vars?

1001 if self.isrec:

1002 if isinstance(index, tuple):

1003 rec_index = index[0]

1004 else:

1005 rec_index = index

1006 if isinstance(rec_index, slice):

1007 recs = (rec_index.start or 0) + len(data)

1008 else:

1009 recs = rec_index + 1

1010 if recs > len(self.data):

1011 shape = (recs,) + self._shape[1:]

1012 # Resize in-place does not always work since

1013 # the array might not be single-segment

1014 try:

1015 self.data.resize(shape)

1016 except ValueError:

1017 dtype = self.data.dtype

1018 self.__dict__['data'] = np.resize(self.data, shape).astype(dtype)

1019 self.data[index] = data

1020

1021 def _default_encoded_fill_value(self):

1022 """

1023 The default encoded fill-value for this Variable's data type.

1024 """

1025 nc_type = REVERSE[self.typecode(), self.itemsize()]

1026 return FILLMAP[nc_type]

1027

1028 def _get_encoded_fill_value(self):

1029 """

1030 Returns the encoded fill value for this variable as bytes.

1031

1032 This is taken from either the _FillValue attribute, or the default fill

1033 value for this variable's data type.

1034 """

1035 if '_FillValue' in self._attributes:

1036 fill_value = np.array(self._attributes['_FillValue'],

1037 dtype=self.data.dtype).tobytes()

1038 if len(fill_value) == self.itemsize():

1039 return fill_value

1040 else:

1041 return self._default_encoded_fill_value()

1042 else:

1043 return self._default_encoded_fill_value()

1044

1045 def _get_missing_value(self):

1046 """

1047 Returns the value denoting "no data" for this variable.

1048

1049 If this variable does not have a missing/fill value, returns None.

1050

1051 If both _FillValue and missing_value are given, give precedence to

1052 _FillValue. The netCDF standard gives special meaning to _FillValue;

1053 missing_value is just used for compatibility with old datasets.

1054 """

1055

1056 if '_FillValue' in self._attributes:

1057 missing_value = self._attributes['_FillValue']

1058 elif 'missing_value' in self._attributes:

1059 missing_value = self._attributes['missing_value']

1060 else:

1061 missing_value = None

1062

1063 return missing_value

1064

1065 @staticmethod

1066 def _apply_missing_value(data, missing_value):

1067 """

1068 Applies the given missing value to the data array.

1069

1070 Returns a numpy.ma array, with any value equal to missing_value masked

1071 out (unless missing_value is None, in which case the original array is

1072 returned).

1073 """

1074

1075 if missing_value is None:

1076 newdata = data

1077 else:

1078 try:

1079 missing_value_isnan = np.isnan(missing_value)

1080 except (TypeError, NotImplementedError):

1081 # some data types (e.g., characters) cannot be tested for NaN

1082 missing_value_isnan = False

1083

1084 if missing_value_isnan:

1085 mymask = np.isnan(data)

1086 else:

1087 mymask = (data == missing_value)

1088

1089 newdata = np.ma.masked_where(mymask, data)

1090

1091 return newdata

1092

1093

1094NetCDFFile = netcdf_file

1095NetCDFVariable = netcdf_variable

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_netcdf.py: 18%

484 statements