Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/hb.py: 18%

265 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-22 06:44 +0000

1""" 

2Implementation of Harwell-Boeing read/write. 

3 

4At the moment not the full Harwell-Boeing format is supported. Supported 

5features are: 

6 

7 - assembled, non-symmetric, real matrices 

8 - integer for pointer/indices 

9 - exponential format for float values, and int format 

10 

11""" 

12# TODO: 

13# - Add more support (symmetric/complex matrices, non-assembled matrices ?) 

14 

15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but 

16# takes a lot of memory. Being faster would require compiled code. 

17# write is not efficient. Although not a terribly exciting task, 

18# having reusable facilities to efficiently read/write fortran-formatted files 

19# would be useful outside this module. 

20 

21import warnings 

22 

23import numpy as np 

24from scipy.sparse import csc_matrix 

25from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat 

26 

27__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile", 

28 "HBMatrixType"] 

29 

30 

31class MalformedHeader(Exception): 

32 pass 

33 

34 

35class LineOverflow(Warning): 

36 pass 

37 

38 

39def _nbytes_full(fmt, nlines): 

40 """Return the number of bytes to read to get every full lines for the 

41 given parsed fortran format.""" 

42 return (fmt.repeat * fmt.width + 1) * (nlines - 1) 

43 

44 

45class HBInfo: 

46 @classmethod 

47 def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None): 

48 """Create a HBInfo instance from an existing sparse matrix. 

49 

50 Parameters 

51 ---------- 

52 m : sparse matrix 

53 the HBInfo instance will derive its parameters from m 

54 title : str 

55 Title to put in the HB header 

56 key : str 

57 Key 

58 mxtype : HBMatrixType 

59 type of the input matrix 

60 fmt : dict 

61 not implemented 

62 

63 Returns 

64 ------- 

65 hb_info : HBInfo instance 

66 """ 

67 m = m.tocsc(copy=False) 

68 

69 pointer = m.indptr 

70 indices = m.indices 

71 values = m.data 

72 

73 nrows, ncols = m.shape 

74 nnon_zeros = m.nnz 

75 

76 if fmt is None: 

77 # +1 because HB use one-based indexing (Fortran), and we will write 

78 # the indices /pointer as such 

79 pointer_fmt = IntFormat.from_number(np.max(pointer+1)) 

80 indices_fmt = IntFormat.from_number(np.max(indices+1)) 

81 

82 if values.dtype.kind in np.typecodes["AllFloat"]: 

83 values_fmt = ExpFormat.from_number(-np.max(np.abs(values))) 

84 elif values.dtype.kind in np.typecodes["AllInteger"]: 

85 values_fmt = IntFormat.from_number(-np.max(np.abs(values))) 

86 else: 

87 message = f"type {values.dtype.kind} not implemented yet" 

88 raise NotImplementedError(message) 

89 else: 

90 raise NotImplementedError("fmt argument not supported yet.") 

91 

92 if mxtype is None: 

93 if not np.isrealobj(values): 

94 raise ValueError("Complex values not supported yet") 

95 if values.dtype.kind in np.typecodes["AllInteger"]: 

96 tp = "integer" 

97 elif values.dtype.kind in np.typecodes["AllFloat"]: 

98 tp = "real" 

99 else: 

100 raise NotImplementedError("type %s for values not implemented" 

101 % values.dtype) 

102 mxtype = HBMatrixType(tp, "unsymmetric", "assembled") 

103 else: 

104 raise ValueError("mxtype argument not handled yet.") 

105 

106 def _nlines(fmt, size): 

107 nlines = size // fmt.repeat 

108 if nlines * fmt.repeat != size: 

109 nlines += 1 

110 return nlines 

111 

112 pointer_nlines = _nlines(pointer_fmt, pointer.size) 

113 indices_nlines = _nlines(indices_fmt, indices.size) 

114 values_nlines = _nlines(values_fmt, values.size) 

115 

116 total_nlines = pointer_nlines + indices_nlines + values_nlines 

117 

118 return cls(title, key, 

119 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

120 mxtype, nrows, ncols, nnon_zeros, 

121 pointer_fmt.fortran_format, indices_fmt.fortran_format, 

122 values_fmt.fortran_format) 

123 

124 @classmethod 

125 def from_file(cls, fid): 

126 """Create a HBInfo instance from a file object containing a matrix in the 

127 HB format. 

128 

129 Parameters 

130 ---------- 

131 fid : file-like matrix 

132 File or file-like object containing a matrix in the HB format. 

133 

134 Returns 

135 ------- 

136 hb_info : HBInfo instance 

137 """ 

138 # First line 

139 line = fid.readline().strip("\n") 

140 if not len(line) > 72: 

141 raise ValueError("Expected at least 72 characters for first line, " 

142 "got: \n%s" % line) 

143 title = line[:72] 

144 key = line[72:] 

145 

146 # Second line 

147 line = fid.readline().strip("\n") 

148 if not len(line.rstrip()) >= 56: 

149 raise ValueError("Expected at least 56 characters for second line, " 

150 "got: \n%s" % line) 

151 total_nlines = _expect_int(line[:14]) 

152 pointer_nlines = _expect_int(line[14:28]) 

153 indices_nlines = _expect_int(line[28:42]) 

154 values_nlines = _expect_int(line[42:56]) 

155 

156 rhs_nlines = line[56:72].strip() 

157 if rhs_nlines == '': 

158 rhs_nlines = 0 

159 else: 

160 rhs_nlines = _expect_int(rhs_nlines) 

161 if not rhs_nlines == 0: 

162 raise ValueError("Only files without right hand side supported for " 

163 "now.") 

164 

165 # Third line 

166 line = fid.readline().strip("\n") 

167 if not len(line) >= 70: 

168 raise ValueError("Expected at least 72 character for third line, got:\n" 

169 "%s" % line) 

170 

171 mxtype_s = line[:3].upper() 

172 if not len(mxtype_s) == 3: 

173 raise ValueError("mxtype expected to be 3 characters long") 

174 

175 mxtype = HBMatrixType.from_fortran(mxtype_s) 

176 if mxtype.value_type not in ["real", "integer"]: 

177 raise ValueError("Only real or integer matrices supported for " 

178 "now (detected %s)" % mxtype) 

179 if not mxtype.structure == "unsymmetric": 

180 raise ValueError("Only unsymmetric matrices supported for " 

181 "now (detected %s)" % mxtype) 

182 if not mxtype.storage == "assembled": 

183 raise ValueError("Only assembled matrices supported for now") 

184 

185 if not line[3:14] == " " * 11: 

186 raise ValueError("Malformed data for third line: %s" % line) 

187 

188 nrows = _expect_int(line[14:28]) 

189 ncols = _expect_int(line[28:42]) 

190 nnon_zeros = _expect_int(line[42:56]) 

191 nelementals = _expect_int(line[56:70]) 

192 if not nelementals == 0: 

193 raise ValueError("Unexpected value %d for nltvl (last entry of line 3)" 

194 % nelementals) 

195 

196 # Fourth line 

197 line = fid.readline().strip("\n") 

198 

199 ct = line.split() 

200 if not len(ct) == 3: 

201 raise ValueError("Expected 3 formats, got %s" % ct) 

202 

203 return cls(title, key, 

204 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

205 mxtype, nrows, ncols, nnon_zeros, 

206 ct[0], ct[1], ct[2], 

207 rhs_nlines, nelementals) 

208 

209 def __init__(self, title, key, 

210 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

211 mxtype, nrows, ncols, nnon_zeros, 

212 pointer_format_str, indices_format_str, values_format_str, 

213 right_hand_sides_nlines=0, nelementals=0): 

214 """Do not use this directly, but the class ctrs (from_* functions).""" 

215 self.title = title 

216 self.key = key 

217 if title is None: 

218 title = "No Title" 

219 if len(title) > 72: 

220 raise ValueError("title cannot be > 72 characters") 

221 

222 if key is None: 

223 key = "|No Key" 

224 if len(key) > 8: 

225 warnings.warn("key is > 8 characters (key is %s)" % key, 

226 LineOverflow, stacklevel=3) 

227 

228 self.total_nlines = total_nlines 

229 self.pointer_nlines = pointer_nlines 

230 self.indices_nlines = indices_nlines 

231 self.values_nlines = values_nlines 

232 

233 parser = FortranFormatParser() 

234 pointer_format = parser.parse(pointer_format_str) 

235 if not isinstance(pointer_format, IntFormat): 

236 raise ValueError("Expected int format for pointer format, got %s" 

237 % pointer_format) 

238 

239 indices_format = parser.parse(indices_format_str) 

240 if not isinstance(indices_format, IntFormat): 

241 raise ValueError("Expected int format for indices format, got %s" % 

242 indices_format) 

243 

244 values_format = parser.parse(values_format_str) 

245 if isinstance(values_format, ExpFormat): 

246 if mxtype.value_type not in ["real", "complex"]: 

247 raise ValueError(f"Inconsistency between matrix type {mxtype} and " 

248 f"value type {values_format}") 

249 values_dtype = np.float64 

250 elif isinstance(values_format, IntFormat): 

251 if mxtype.value_type not in ["integer"]: 

252 raise ValueError(f"Inconsistency between matrix type {mxtype} and " 

253 f"value type {values_format}") 

254 # XXX: fortran int -> dtype association ? 

255 values_dtype = int 

256 else: 

257 raise ValueError(f"Unsupported format for values {values_format!r}") 

258 

259 self.pointer_format = pointer_format 

260 self.indices_format = indices_format 

261 self.values_format = values_format 

262 

263 self.pointer_dtype = np.int32 

264 self.indices_dtype = np.int32 

265 self.values_dtype = values_dtype 

266 

267 self.pointer_nlines = pointer_nlines 

268 self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines) 

269 

270 self.indices_nlines = indices_nlines 

271 self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines) 

272 

273 self.values_nlines = values_nlines 

274 self.values_nbytes_full = _nbytes_full(values_format, values_nlines) 

275 

276 self.nrows = nrows 

277 self.ncols = ncols 

278 self.nnon_zeros = nnon_zeros 

279 self.nelementals = nelementals 

280 self.mxtype = mxtype 

281 

282 def dump(self): 

283 """Gives the header corresponding to this instance as a string.""" 

284 header = [self.title.ljust(72) + self.key.ljust(8)] 

285 

286 header.append("%14d%14d%14d%14d" % 

287 (self.total_nlines, self.pointer_nlines, 

288 self.indices_nlines, self.values_nlines)) 

289 header.append("%14s%14d%14d%14d%14d" % 

290 (self.mxtype.fortran_format.ljust(14), self.nrows, 

291 self.ncols, self.nnon_zeros, 0)) 

292 

293 pffmt = self.pointer_format.fortran_format 

294 iffmt = self.indices_format.fortran_format 

295 vffmt = self.values_format.fortran_format 

296 header.append("%16s%16s%20s" % 

297 (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20))) 

298 return "\n".join(header) 

299 

300 

301def _expect_int(value, msg=None): 

302 try: 

303 return int(value) 

304 except ValueError as e: 

305 if msg is None: 

306 msg = "Expected an int, got %s" 

307 raise ValueError(msg % value) from e 

308 

309 

310def _read_hb_data(content, header): 

311 # XXX: look at a way to reduce memory here (big string creation) 

312 ptr_string = "".join([content.read(header.pointer_nbytes_full), 

313 content.readline()]) 

314 ptr = np.fromstring(ptr_string, 

315 dtype=int, sep=' ') 

316 

317 ind_string = "".join([content.read(header.indices_nbytes_full), 

318 content.readline()]) 

319 ind = np.fromstring(ind_string, 

320 dtype=int, sep=' ') 

321 

322 val_string = "".join([content.read(header.values_nbytes_full), 

323 content.readline()]) 

324 val = np.fromstring(val_string, 

325 dtype=header.values_dtype, sep=' ') 

326 

327 try: 

328 return csc_matrix((val, ind-1, ptr-1), 

329 shape=(header.nrows, header.ncols)) 

330 except ValueError as e: 

331 raise e 

332 

333 

334def _write_data(m, fid, header): 

335 m = m.tocsc(copy=False) 

336 

337 def write_array(f, ar, nlines, fmt): 

338 # ar_nlines is the number of full lines, n is the number of items per 

339 # line, ffmt the fortran format 

340 pyfmt = fmt.python_format 

341 pyfmt_full = pyfmt * fmt.repeat 

342 

343 # for each array to write, we first write the full lines, and special 

344 # case for partial line 

345 full = ar[:(nlines - 1) * fmt.repeat] 

346 for row in full.reshape((nlines-1, fmt.repeat)): 

347 f.write(pyfmt_full % tuple(row) + "\n") 

348 nremain = ar.size - full.size 

349 if nremain > 0: 

350 f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n") 

351 

352 fid.write(header.dump()) 

353 fid.write("\n") 

354 # +1 is for Fortran one-based indexing 

355 write_array(fid, m.indptr+1, header.pointer_nlines, 

356 header.pointer_format) 

357 write_array(fid, m.indices+1, header.indices_nlines, 

358 header.indices_format) 

359 write_array(fid, m.data, header.values_nlines, 

360 header.values_format) 

361 

362 

363class HBMatrixType: 

364 """Class to hold the matrix type.""" 

365 # q2f* translates qualified names to Fortran character 

366 _q2f_type = { 

367 "real": "R", 

368 "complex": "C", 

369 "pattern": "P", 

370 "integer": "I", 

371 } 

372 _q2f_structure = { 

373 "symmetric": "S", 

374 "unsymmetric": "U", 

375 "hermitian": "H", 

376 "skewsymmetric": "Z", 

377 "rectangular": "R" 

378 } 

379 _q2f_storage = { 

380 "assembled": "A", 

381 "elemental": "E", 

382 } 

383 

384 _f2q_type = {j: i for i, j in _q2f_type.items()} 

385 _f2q_structure = {j: i for i, j in _q2f_structure.items()} 

386 _f2q_storage = {j: i for i, j in _q2f_storage.items()} 

387 

388 @classmethod 

389 def from_fortran(cls, fmt): 

390 if not len(fmt) == 3: 

391 raise ValueError("Fortran format for matrix type should be 3 " 

392 "characters long") 

393 try: 

394 value_type = cls._f2q_type[fmt[0]] 

395 structure = cls._f2q_structure[fmt[1]] 

396 storage = cls._f2q_storage[fmt[2]] 

397 return cls(value_type, structure, storage) 

398 except KeyError as e: 

399 raise ValueError("Unrecognized format %s" % fmt) from e 

400 

401 def __init__(self, value_type, structure, storage="assembled"): 

402 self.value_type = value_type 

403 self.structure = structure 

404 self.storage = storage 

405 

406 if value_type not in self._q2f_type: 

407 raise ValueError("Unrecognized type %s" % value_type) 

408 if structure not in self._q2f_structure: 

409 raise ValueError("Unrecognized structure %s" % structure) 

410 if storage not in self._q2f_storage: 

411 raise ValueError("Unrecognized storage %s" % storage) 

412 

413 @property 

414 def fortran_format(self): 

415 return self._q2f_type[self.value_type] + \ 

416 self._q2f_structure[self.structure] + \ 

417 self._q2f_storage[self.storage] 

418 

419 def __repr__(self): 

420 return f"HBMatrixType({self.value_type}, {self.structure}, {self.storage})" 

421 

422 

423class HBFile: 

424 def __init__(self, file, hb_info=None): 

425 """Create a HBFile instance. 

426 

427 Parameters 

428 ---------- 

429 file : file-object 

430 StringIO work as well 

431 hb_info : HBInfo, optional 

432 Should be given as an argument for writing, in which case the file 

433 should be writable. 

434 """ 

435 self._fid = file 

436 if hb_info is None: 

437 self._hb_info = HBInfo.from_file(file) 

438 else: 

439 #raise OSError("file %s is not writable, and hb_info " 

440 # "was given." % file) 

441 self._hb_info = hb_info 

442 

443 @property 

444 def title(self): 

445 return self._hb_info.title 

446 

447 @property 

448 def key(self): 

449 return self._hb_info.key 

450 

451 @property 

452 def type(self): 

453 return self._hb_info.mxtype.value_type 

454 

455 @property 

456 def structure(self): 

457 return self._hb_info.mxtype.structure 

458 

459 @property 

460 def storage(self): 

461 return self._hb_info.mxtype.storage 

462 

463 def read_matrix(self): 

464 return _read_hb_data(self._fid, self._hb_info) 

465 

466 def write_matrix(self, m): 

467 return _write_data(m, self._fid, self._hb_info) 

468 

469 

470def hb_read(path_or_open_file): 

471 """Read HB-format file. 

472 

473 Parameters 

474 ---------- 

475 path_or_open_file : path-like or file-like 

476 If a file-like object, it is used as-is. Otherwise, it is opened 

477 before reading. 

478 

479 Returns 

480 ------- 

481 data : scipy.sparse.csc_matrix instance 

482 The data read from the HB file as a sparse matrix. 

483 

484 Notes 

485 ----- 

486 At the moment not the full Harwell-Boeing format is supported. Supported 

487 features are: 

488 

489 - assembled, non-symmetric, real matrices 

490 - integer for pointer/indices 

491 - exponential format for float values, and int format 

492 

493 Examples 

494 -------- 

495 We can read and write a harwell-boeing format file: 

496 

497 >>> from scipy.io import hb_read, hb_write 

498 >>> from scipy.sparse import csr_matrix, eye 

499 >>> data = csr_matrix(eye(3)) # create a sparse matrix 

500 >>> hb_write("data.hb", data) # write a hb file 

501 >>> print(hb_read("data.hb")) # read a hb file 

502 (0, 0) 1.0 

503 (1, 1) 1.0 

504 (2, 2) 1.0 

505 

506 """ 

507 def _get_matrix(fid): 

508 hb = HBFile(fid) 

509 return hb.read_matrix() 

510 

511 if hasattr(path_or_open_file, 'read'): 

512 return _get_matrix(path_or_open_file) 

513 else: 

514 with open(path_or_open_file) as f: 

515 return _get_matrix(f) 

516 

517 

518def hb_write(path_or_open_file, m, hb_info=None): 

519 """Write HB-format file. 

520 

521 Parameters 

522 ---------- 

523 path_or_open_file : path-like or file-like 

524 If a file-like object, it is used as-is. Otherwise, it is opened 

525 before writing. 

526 m : sparse-matrix 

527 the sparse matrix to write 

528 hb_info : HBInfo 

529 contains the meta-data for write 

530 

531 Returns 

532 ------- 

533 None 

534 

535 Notes 

536 ----- 

537 At the moment not the full Harwell-Boeing format is supported. Supported 

538 features are: 

539 

540 - assembled, non-symmetric, real matrices 

541 - integer for pointer/indices 

542 - exponential format for float values, and int format 

543 

544 Examples 

545 -------- 

546 We can read and write a harwell-boeing format file: 

547 

548 >>> from scipy.io import hb_read, hb_write 

549 >>> from scipy.sparse import csr_matrix, eye 

550 >>> data = csr_matrix(eye(3)) # create a sparse matrix 

551 >>> hb_write("data.hb", data) # write a hb file 

552 >>> print(hb_read("data.hb")) # read a hb file 

553 (0, 0) 1.0 

554 (1, 1) 1.0 

555 (2, 2) 1.0 

556 

557 """ 

558 m = m.tocsc(copy=False) 

559 

560 if hb_info is None: 

561 hb_info = HBInfo.from_data(m) 

562 

563 def _set_matrix(fid): 

564 hb = HBFile(fid, hb_info) 

565 return hb.write_matrix(m) 

566 

567 if hasattr(path_or_open_file, 'write'): 

568 return _set_matrix(path_or_open_file) 

569 else: 

570 with open(path_or_open_file, 'w') as f: 

571 return _set_matrix(f)