Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/hb.py: 19%

264 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1""" 

2Implementation of Harwell-Boeing read/write. 

3 

4At the moment not the full Harwell-Boeing format is supported. Supported 

5features are: 

6 

7 - assembled, non-symmetric, real matrices 

8 - integer for pointer/indices 

9 - exponential format for float values, and int format 

10 

11""" 

12# TODO: 

13# - Add more support (symmetric/complex matrices, non-assembled matrices ?) 

14 

15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but 

16# takes a lot of memory. Being faster would require compiled code. 

17# write is not efficient. Although not a terribly exciting task, 

18# having reusable facilities to efficiently read/write fortran-formatted files 

19# would be useful outside this module. 

20 

21import warnings 

22 

23import numpy as np 

24from scipy.sparse import csc_matrix 

25from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat 

26 

27__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile", 

28 "HBMatrixType"] 

29 

30 

31class MalformedHeader(Exception): 

32 pass 

33 

34 

35class LineOverflow(Warning): 

36 pass 

37 

38 

39def _nbytes_full(fmt, nlines): 

40 """Return the number of bytes to read to get every full lines for the 

41 given parsed fortran format.""" 

42 return (fmt.repeat * fmt.width + 1) * (nlines - 1) 

43 

44 

45class HBInfo: 

46 @classmethod 

47 def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None): 

48 """Create a HBInfo instance from an existing sparse matrix. 

49 

50 Parameters 

51 ---------- 

52 m : sparse matrix 

53 the HBInfo instance will derive its parameters from m 

54 title : str 

55 Title to put in the HB header 

56 key : str 

57 Key 

58 mxtype : HBMatrixType 

59 type of the input matrix 

60 fmt : dict 

61 not implemented 

62 

63 Returns 

64 ------- 

65 hb_info : HBInfo instance 

66 """ 

67 m = m.tocsc(copy=False) 

68 

69 pointer = m.indptr 

70 indices = m.indices 

71 values = m.data 

72 

73 nrows, ncols = m.shape 

74 nnon_zeros = m.nnz 

75 

76 if fmt is None: 

77 # +1 because HB use one-based indexing (Fortran), and we will write 

78 # the indices /pointer as such 

79 pointer_fmt = IntFormat.from_number(np.max(pointer+1)) 

80 indices_fmt = IntFormat.from_number(np.max(indices+1)) 

81 

82 if values.dtype.kind in np.typecodes["AllFloat"]: 

83 values_fmt = ExpFormat.from_number(-np.max(np.abs(values))) 

84 elif values.dtype.kind in np.typecodes["AllInteger"]: 

85 values_fmt = IntFormat.from_number(-np.max(np.abs(values))) 

86 else: 

87 raise NotImplementedError("type %s not implemented yet" % values.dtype.kind) 

88 else: 

89 raise NotImplementedError("fmt argument not supported yet.") 

90 

91 if mxtype is None: 

92 if not np.isrealobj(values): 

93 raise ValueError("Complex values not supported yet") 

94 if values.dtype.kind in np.typecodes["AllInteger"]: 

95 tp = "integer" 

96 elif values.dtype.kind in np.typecodes["AllFloat"]: 

97 tp = "real" 

98 else: 

99 raise NotImplementedError("type %s for values not implemented" 

100 % values.dtype) 

101 mxtype = HBMatrixType(tp, "unsymmetric", "assembled") 

102 else: 

103 raise ValueError("mxtype argument not handled yet.") 

104 

105 def _nlines(fmt, size): 

106 nlines = size // fmt.repeat 

107 if nlines * fmt.repeat != size: 

108 nlines += 1 

109 return nlines 

110 

111 pointer_nlines = _nlines(pointer_fmt, pointer.size) 

112 indices_nlines = _nlines(indices_fmt, indices.size) 

113 values_nlines = _nlines(values_fmt, values.size) 

114 

115 total_nlines = pointer_nlines + indices_nlines + values_nlines 

116 

117 return cls(title, key, 

118 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

119 mxtype, nrows, ncols, nnon_zeros, 

120 pointer_fmt.fortran_format, indices_fmt.fortran_format, 

121 values_fmt.fortran_format) 

122 

123 @classmethod 

124 def from_file(cls, fid): 

125 """Create a HBInfo instance from a file object containing a matrix in the 

126 HB format. 

127 

128 Parameters 

129 ---------- 

130 fid : file-like matrix 

131 File or file-like object containing a matrix in the HB format. 

132 

133 Returns 

134 ------- 

135 hb_info : HBInfo instance 

136 """ 

137 # First line 

138 line = fid.readline().strip("\n") 

139 if not len(line) > 72: 

140 raise ValueError("Expected at least 72 characters for first line, " 

141 "got: \n%s" % line) 

142 title = line[:72] 

143 key = line[72:] 

144 

145 # Second line 

146 line = fid.readline().strip("\n") 

147 if not len(line.rstrip()) >= 56: 

148 raise ValueError("Expected at least 56 characters for second line, " 

149 "got: \n%s" % line) 

150 total_nlines = _expect_int(line[:14]) 

151 pointer_nlines = _expect_int(line[14:28]) 

152 indices_nlines = _expect_int(line[28:42]) 

153 values_nlines = _expect_int(line[42:56]) 

154 

155 rhs_nlines = line[56:72].strip() 

156 if rhs_nlines == '': 

157 rhs_nlines = 0 

158 else: 

159 rhs_nlines = _expect_int(rhs_nlines) 

160 if not rhs_nlines == 0: 

161 raise ValueError("Only files without right hand side supported for " 

162 "now.") 

163 

164 # Third line 

165 line = fid.readline().strip("\n") 

166 if not len(line) >= 70: 

167 raise ValueError("Expected at least 72 character for third line, got:\n" 

168 "%s" % line) 

169 

170 mxtype_s = line[:3].upper() 

171 if not len(mxtype_s) == 3: 

172 raise ValueError("mxtype expected to be 3 characters long") 

173 

174 mxtype = HBMatrixType.from_fortran(mxtype_s) 

175 if mxtype.value_type not in ["real", "integer"]: 

176 raise ValueError("Only real or integer matrices supported for " 

177 "now (detected %s)" % mxtype) 

178 if not mxtype.structure == "unsymmetric": 

179 raise ValueError("Only unsymmetric matrices supported for " 

180 "now (detected %s)" % mxtype) 

181 if not mxtype.storage == "assembled": 

182 raise ValueError("Only assembled matrices supported for now") 

183 

184 if not line[3:14] == " " * 11: 

185 raise ValueError("Malformed data for third line: %s" % line) 

186 

187 nrows = _expect_int(line[14:28]) 

188 ncols = _expect_int(line[28:42]) 

189 nnon_zeros = _expect_int(line[42:56]) 

190 nelementals = _expect_int(line[56:70]) 

191 if not nelementals == 0: 

192 raise ValueError("Unexpected value %d for nltvl (last entry of line 3)" 

193 % nelementals) 

194 

195 # Fourth line 

196 line = fid.readline().strip("\n") 

197 

198 ct = line.split() 

199 if not len(ct) == 3: 

200 raise ValueError("Expected 3 formats, got %s" % ct) 

201 

202 return cls(title, key, 

203 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

204 mxtype, nrows, ncols, nnon_zeros, 

205 ct[0], ct[1], ct[2], 

206 rhs_nlines, nelementals) 

207 

208 def __init__(self, title, key, 

209 total_nlines, pointer_nlines, indices_nlines, values_nlines, 

210 mxtype, nrows, ncols, nnon_zeros, 

211 pointer_format_str, indices_format_str, values_format_str, 

212 right_hand_sides_nlines=0, nelementals=0): 

213 """Do not use this directly, but the class ctrs (from_* functions).""" 

214 self.title = title 

215 self.key = key 

216 if title is None: 

217 title = "No Title" 

218 if len(title) > 72: 

219 raise ValueError("title cannot be > 72 characters") 

220 

221 if key is None: 

222 key = "|No Key" 

223 if len(key) > 8: 

224 warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow) 

225 

226 self.total_nlines = total_nlines 

227 self.pointer_nlines = pointer_nlines 

228 self.indices_nlines = indices_nlines 

229 self.values_nlines = values_nlines 

230 

231 parser = FortranFormatParser() 

232 pointer_format = parser.parse(pointer_format_str) 

233 if not isinstance(pointer_format, IntFormat): 

234 raise ValueError("Expected int format for pointer format, got %s" 

235 % pointer_format) 

236 

237 indices_format = parser.parse(indices_format_str) 

238 if not isinstance(indices_format, IntFormat): 

239 raise ValueError("Expected int format for indices format, got %s" % 

240 indices_format) 

241 

242 values_format = parser.parse(values_format_str) 

243 if isinstance(values_format, ExpFormat): 

244 if mxtype.value_type not in ["real", "complex"]: 

245 raise ValueError("Inconsistency between matrix type {} and " 

246 "value type {}".format(mxtype, values_format)) 

247 values_dtype = np.float64 

248 elif isinstance(values_format, IntFormat): 

249 if mxtype.value_type not in ["integer"]: 

250 raise ValueError("Inconsistency between matrix type {} and " 

251 "value type {}".format(mxtype, values_format)) 

252 # XXX: fortran int -> dtype association ? 

253 values_dtype = int 

254 else: 

255 raise ValueError(f"Unsupported format for values {values_format!r}") 

256 

257 self.pointer_format = pointer_format 

258 self.indices_format = indices_format 

259 self.values_format = values_format 

260 

261 self.pointer_dtype = np.int32 

262 self.indices_dtype = np.int32 

263 self.values_dtype = values_dtype 

264 

265 self.pointer_nlines = pointer_nlines 

266 self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines) 

267 

268 self.indices_nlines = indices_nlines 

269 self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines) 

270 

271 self.values_nlines = values_nlines 

272 self.values_nbytes_full = _nbytes_full(values_format, values_nlines) 

273 

274 self.nrows = nrows 

275 self.ncols = ncols 

276 self.nnon_zeros = nnon_zeros 

277 self.nelementals = nelementals 

278 self.mxtype = mxtype 

279 

280 def dump(self): 

281 """Gives the header corresponding to this instance as a string.""" 

282 header = [self.title.ljust(72) + self.key.ljust(8)] 

283 

284 header.append("%14d%14d%14d%14d" % 

285 (self.total_nlines, self.pointer_nlines, 

286 self.indices_nlines, self.values_nlines)) 

287 header.append("%14s%14d%14d%14d%14d" % 

288 (self.mxtype.fortran_format.ljust(14), self.nrows, 

289 self.ncols, self.nnon_zeros, 0)) 

290 

291 pffmt = self.pointer_format.fortran_format 

292 iffmt = self.indices_format.fortran_format 

293 vffmt = self.values_format.fortran_format 

294 header.append("%16s%16s%20s" % 

295 (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20))) 

296 return "\n".join(header) 

297 

298 

299def _expect_int(value, msg=None): 

300 try: 

301 return int(value) 

302 except ValueError as e: 

303 if msg is None: 

304 msg = "Expected an int, got %s" 

305 raise ValueError(msg % value) from e 

306 

307 

308def _read_hb_data(content, header): 

309 # XXX: look at a way to reduce memory here (big string creation) 

310 ptr_string = "".join([content.read(header.pointer_nbytes_full), 

311 content.readline()]) 

312 ptr = np.fromstring(ptr_string, 

313 dtype=int, sep=' ') 

314 

315 ind_string = "".join([content.read(header.indices_nbytes_full), 

316 content.readline()]) 

317 ind = np.fromstring(ind_string, 

318 dtype=int, sep=' ') 

319 

320 val_string = "".join([content.read(header.values_nbytes_full), 

321 content.readline()]) 

322 val = np.fromstring(val_string, 

323 dtype=header.values_dtype, sep=' ') 

324 

325 try: 

326 return csc_matrix((val, ind-1, ptr-1), 

327 shape=(header.nrows, header.ncols)) 

328 except ValueError as e: 

329 raise e 

330 

331 

332def _write_data(m, fid, header): 

333 m = m.tocsc(copy=False) 

334 

335 def write_array(f, ar, nlines, fmt): 

336 # ar_nlines is the number of full lines, n is the number of items per 

337 # line, ffmt the fortran format 

338 pyfmt = fmt.python_format 

339 pyfmt_full = pyfmt * fmt.repeat 

340 

341 # for each array to write, we first write the full lines, and special 

342 # case for partial line 

343 full = ar[:(nlines - 1) * fmt.repeat] 

344 for row in full.reshape((nlines-1, fmt.repeat)): 

345 f.write(pyfmt_full % tuple(row) + "\n") 

346 nremain = ar.size - full.size 

347 if nremain > 0: 

348 f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n") 

349 

350 fid.write(header.dump()) 

351 fid.write("\n") 

352 # +1 is for Fortran one-based indexing 

353 write_array(fid, m.indptr+1, header.pointer_nlines, 

354 header.pointer_format) 

355 write_array(fid, m.indices+1, header.indices_nlines, 

356 header.indices_format) 

357 write_array(fid, m.data, header.values_nlines, 

358 header.values_format) 

359 

360 

361class HBMatrixType: 

362 """Class to hold the matrix type.""" 

363 # q2f* translates qualified names to Fortran character 

364 _q2f_type = { 

365 "real": "R", 

366 "complex": "C", 

367 "pattern": "P", 

368 "integer": "I", 

369 } 

370 _q2f_structure = { 

371 "symmetric": "S", 

372 "unsymmetric": "U", 

373 "hermitian": "H", 

374 "skewsymmetric": "Z", 

375 "rectangular": "R" 

376 } 

377 _q2f_storage = { 

378 "assembled": "A", 

379 "elemental": "E", 

380 } 

381 

382 _f2q_type = {j: i for i, j in _q2f_type.items()} 

383 _f2q_structure = {j: i for i, j in _q2f_structure.items()} 

384 _f2q_storage = {j: i for i, j in _q2f_storage.items()} 

385 

386 @classmethod 

387 def from_fortran(cls, fmt): 

388 if not len(fmt) == 3: 

389 raise ValueError("Fortran format for matrix type should be 3 " 

390 "characters long") 

391 try: 

392 value_type = cls._f2q_type[fmt[0]] 

393 structure = cls._f2q_structure[fmt[1]] 

394 storage = cls._f2q_storage[fmt[2]] 

395 return cls(value_type, structure, storage) 

396 except KeyError as e: 

397 raise ValueError("Unrecognized format %s" % fmt) from e 

398 

399 def __init__(self, value_type, structure, storage="assembled"): 

400 self.value_type = value_type 

401 self.structure = structure 

402 self.storage = storage 

403 

404 if value_type not in self._q2f_type: 

405 raise ValueError("Unrecognized type %s" % value_type) 

406 if structure not in self._q2f_structure: 

407 raise ValueError("Unrecognized structure %s" % structure) 

408 if storage not in self._q2f_storage: 

409 raise ValueError("Unrecognized storage %s" % storage) 

410 

411 @property 

412 def fortran_format(self): 

413 return self._q2f_type[self.value_type] + \ 

414 self._q2f_structure[self.structure] + \ 

415 self._q2f_storage[self.storage] 

416 

417 def __repr__(self): 

418 return "HBMatrixType(%s, %s, %s)" % \ 

419 (self.value_type, self.structure, self.storage) 

420 

421 

422class HBFile: 

423 def __init__(self, file, hb_info=None): 

424 """Create a HBFile instance. 

425 

426 Parameters 

427 ---------- 

428 file : file-object 

429 StringIO work as well 

430 hb_info : HBInfo, optional 

431 Should be given as an argument for writing, in which case the file 

432 should be writable. 

433 """ 

434 self._fid = file 

435 if hb_info is None: 

436 self._hb_info = HBInfo.from_file(file) 

437 else: 

438 #raise OSError("file %s is not writable, and hb_info " 

439 # "was given." % file) 

440 self._hb_info = hb_info 

441 

442 @property 

443 def title(self): 

444 return self._hb_info.title 

445 

446 @property 

447 def key(self): 

448 return self._hb_info.key 

449 

450 @property 

451 def type(self): 

452 return self._hb_info.mxtype.value_type 

453 

454 @property 

455 def structure(self): 

456 return self._hb_info.mxtype.structure 

457 

458 @property 

459 def storage(self): 

460 return self._hb_info.mxtype.storage 

461 

462 def read_matrix(self): 

463 return _read_hb_data(self._fid, self._hb_info) 

464 

465 def write_matrix(self, m): 

466 return _write_data(m, self._fid, self._hb_info) 

467 

468 

469def hb_read(path_or_open_file): 

470 """Read HB-format file. 

471 

472 Parameters 

473 ---------- 

474 path_or_open_file : path-like or file-like 

475 If a file-like object, it is used as-is. Otherwise, it is opened 

476 before reading. 

477 

478 Returns 

479 ------- 

480 data : scipy.sparse.csc_matrix instance 

481 The data read from the HB file as a sparse matrix. 

482 

483 Notes 

484 ----- 

485 At the moment not the full Harwell-Boeing format is supported. Supported 

486 features are: 

487 

488 - assembled, non-symmetric, real matrices 

489 - integer for pointer/indices 

490 - exponential format for float values, and int format 

491 

492 Examples 

493 -------- 

494 We can read and write a harwell-boeing format file: 

495 

496 >>> from scipy.io import hb_read, hb_write 

497 >>> from scipy.sparse import csr_matrix, eye 

498 >>> data = csr_matrix(eye(3)) # create a sparse matrix 

499 >>> hb_write("data.hb", data) # write a hb file 

500 >>> print(hb_read("data.hb")) # read a hb file 

501 (0, 0) 1.0 

502 (1, 1) 1.0 

503 (2, 2) 1.0 

504 

505 """ 

506 def _get_matrix(fid): 

507 hb = HBFile(fid) 

508 return hb.read_matrix() 

509 

510 if hasattr(path_or_open_file, 'read'): 

511 return _get_matrix(path_or_open_file) 

512 else: 

513 with open(path_or_open_file) as f: 

514 return _get_matrix(f) 

515 

516 

517def hb_write(path_or_open_file, m, hb_info=None): 

518 """Write HB-format file. 

519 

520 Parameters 

521 ---------- 

522 path_or_open_file : path-like or file-like 

523 If a file-like object, it is used as-is. Otherwise, it is opened 

524 before writing. 

525 m : sparse-matrix 

526 the sparse matrix to write 

527 hb_info : HBInfo 

528 contains the meta-data for write 

529 

530 Returns 

531 ------- 

532 None 

533 

534 Notes 

535 ----- 

536 At the moment not the full Harwell-Boeing format is supported. Supported 

537 features are: 

538 

539 - assembled, non-symmetric, real matrices 

540 - integer for pointer/indices 

541 - exponential format for float values, and int format 

542 

543 Examples 

544 -------- 

545 We can read and write a harwell-boeing format file: 

546 

547 >>> from scipy.io import hb_read, hb_write 

548 >>> from scipy.sparse import csr_matrix, eye 

549 >>> data = csr_matrix(eye(3)) # create a sparse matrix 

550 >>> hb_write("data.hb", data) # write a hb file 

551 >>> print(hb_read("data.hb")) # read a hb file 

552 (0, 0) 1.0 

553 (1, 1) 1.0 

554 (2, 2) 1.0 

555 

556 """ 

557 m = m.tocsc(copy=False) 

558 

559 if hb_info is None: 

560 hb_info = HBInfo.from_data(m) 

561 

562 def _set_matrix(fid): 

563 hb = HBFile(fid, hb_info) 

564 return hb.write_matrix(m) 

565 

566 if hasattr(path_or_open_file, 'write'): 

567 return _set_matrix(path_or_open_file) 

568 else: 

569 with open(path_or_open_file, 'w') as f: 

570 return _set_matrix(f)