Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/hb.py: 19%
264 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1"""
2Implementation of Harwell-Boeing read/write.
4At the moment not the full Harwell-Boeing format is supported. Supported
5features are:
7 - assembled, non-symmetric, real matrices
8 - integer for pointer/indices
9 - exponential format for float values, and int format
11"""
12# TODO:
13# - Add more support (symmetric/complex matrices, non-assembled matrices ?)
15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
16# takes a lot of memory. Being faster would require compiled code.
17# write is not efficient. Although not a terribly exciting task,
18# having reusable facilities to efficiently read/write fortran-formatted files
19# would be useful outside this module.
21import warnings
23import numpy as np
24from scipy.sparse import csc_matrix
25from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat
27__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
28 "HBMatrixType"]
31class MalformedHeader(Exception):
32 pass
35class LineOverflow(Warning):
36 pass
39def _nbytes_full(fmt, nlines):
40 """Return the number of bytes to read to get every full lines for the
41 given parsed fortran format."""
42 return (fmt.repeat * fmt.width + 1) * (nlines - 1)
45class HBInfo:
46 @classmethod
47 def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
48 """Create a HBInfo instance from an existing sparse matrix.
50 Parameters
51 ----------
52 m : sparse matrix
53 the HBInfo instance will derive its parameters from m
54 title : str
55 Title to put in the HB header
56 key : str
57 Key
58 mxtype : HBMatrixType
59 type of the input matrix
60 fmt : dict
61 not implemented
63 Returns
64 -------
65 hb_info : HBInfo instance
66 """
67 m = m.tocsc(copy=False)
69 pointer = m.indptr
70 indices = m.indices
71 values = m.data
73 nrows, ncols = m.shape
74 nnon_zeros = m.nnz
76 if fmt is None:
77 # +1 because HB use one-based indexing (Fortran), and we will write
78 # the indices /pointer as such
79 pointer_fmt = IntFormat.from_number(np.max(pointer+1))
80 indices_fmt = IntFormat.from_number(np.max(indices+1))
82 if values.dtype.kind in np.typecodes["AllFloat"]:
83 values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
84 elif values.dtype.kind in np.typecodes["AllInteger"]:
85 values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
86 else:
87 raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
88 else:
89 raise NotImplementedError("fmt argument not supported yet.")
91 if mxtype is None:
92 if not np.isrealobj(values):
93 raise ValueError("Complex values not supported yet")
94 if values.dtype.kind in np.typecodes["AllInteger"]:
95 tp = "integer"
96 elif values.dtype.kind in np.typecodes["AllFloat"]:
97 tp = "real"
98 else:
99 raise NotImplementedError("type %s for values not implemented"
100 % values.dtype)
101 mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
102 else:
103 raise ValueError("mxtype argument not handled yet.")
105 def _nlines(fmt, size):
106 nlines = size // fmt.repeat
107 if nlines * fmt.repeat != size:
108 nlines += 1
109 return nlines
111 pointer_nlines = _nlines(pointer_fmt, pointer.size)
112 indices_nlines = _nlines(indices_fmt, indices.size)
113 values_nlines = _nlines(values_fmt, values.size)
115 total_nlines = pointer_nlines + indices_nlines + values_nlines
117 return cls(title, key,
118 total_nlines, pointer_nlines, indices_nlines, values_nlines,
119 mxtype, nrows, ncols, nnon_zeros,
120 pointer_fmt.fortran_format, indices_fmt.fortran_format,
121 values_fmt.fortran_format)
123 @classmethod
124 def from_file(cls, fid):
125 """Create a HBInfo instance from a file object containing a matrix in the
126 HB format.
128 Parameters
129 ----------
130 fid : file-like matrix
131 File or file-like object containing a matrix in the HB format.
133 Returns
134 -------
135 hb_info : HBInfo instance
136 """
137 # First line
138 line = fid.readline().strip("\n")
139 if not len(line) > 72:
140 raise ValueError("Expected at least 72 characters for first line, "
141 "got: \n%s" % line)
142 title = line[:72]
143 key = line[72:]
145 # Second line
146 line = fid.readline().strip("\n")
147 if not len(line.rstrip()) >= 56:
148 raise ValueError("Expected at least 56 characters for second line, "
149 "got: \n%s" % line)
150 total_nlines = _expect_int(line[:14])
151 pointer_nlines = _expect_int(line[14:28])
152 indices_nlines = _expect_int(line[28:42])
153 values_nlines = _expect_int(line[42:56])
155 rhs_nlines = line[56:72].strip()
156 if rhs_nlines == '':
157 rhs_nlines = 0
158 else:
159 rhs_nlines = _expect_int(rhs_nlines)
160 if not rhs_nlines == 0:
161 raise ValueError("Only files without right hand side supported for "
162 "now.")
164 # Third line
165 line = fid.readline().strip("\n")
166 if not len(line) >= 70:
167 raise ValueError("Expected at least 72 character for third line, got:\n"
168 "%s" % line)
170 mxtype_s = line[:3].upper()
171 if not len(mxtype_s) == 3:
172 raise ValueError("mxtype expected to be 3 characters long")
174 mxtype = HBMatrixType.from_fortran(mxtype_s)
175 if mxtype.value_type not in ["real", "integer"]:
176 raise ValueError("Only real or integer matrices supported for "
177 "now (detected %s)" % mxtype)
178 if not mxtype.structure == "unsymmetric":
179 raise ValueError("Only unsymmetric matrices supported for "
180 "now (detected %s)" % mxtype)
181 if not mxtype.storage == "assembled":
182 raise ValueError("Only assembled matrices supported for now")
184 if not line[3:14] == " " * 11:
185 raise ValueError("Malformed data for third line: %s" % line)
187 nrows = _expect_int(line[14:28])
188 ncols = _expect_int(line[28:42])
189 nnon_zeros = _expect_int(line[42:56])
190 nelementals = _expect_int(line[56:70])
191 if not nelementals == 0:
192 raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
193 % nelementals)
195 # Fourth line
196 line = fid.readline().strip("\n")
198 ct = line.split()
199 if not len(ct) == 3:
200 raise ValueError("Expected 3 formats, got %s" % ct)
202 return cls(title, key,
203 total_nlines, pointer_nlines, indices_nlines, values_nlines,
204 mxtype, nrows, ncols, nnon_zeros,
205 ct[0], ct[1], ct[2],
206 rhs_nlines, nelementals)
208 def __init__(self, title, key,
209 total_nlines, pointer_nlines, indices_nlines, values_nlines,
210 mxtype, nrows, ncols, nnon_zeros,
211 pointer_format_str, indices_format_str, values_format_str,
212 right_hand_sides_nlines=0, nelementals=0):
213 """Do not use this directly, but the class ctrs (from_* functions)."""
214 self.title = title
215 self.key = key
216 if title is None:
217 title = "No Title"
218 if len(title) > 72:
219 raise ValueError("title cannot be > 72 characters")
221 if key is None:
222 key = "|No Key"
223 if len(key) > 8:
224 warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
226 self.total_nlines = total_nlines
227 self.pointer_nlines = pointer_nlines
228 self.indices_nlines = indices_nlines
229 self.values_nlines = values_nlines
231 parser = FortranFormatParser()
232 pointer_format = parser.parse(pointer_format_str)
233 if not isinstance(pointer_format, IntFormat):
234 raise ValueError("Expected int format for pointer format, got %s"
235 % pointer_format)
237 indices_format = parser.parse(indices_format_str)
238 if not isinstance(indices_format, IntFormat):
239 raise ValueError("Expected int format for indices format, got %s" %
240 indices_format)
242 values_format = parser.parse(values_format_str)
243 if isinstance(values_format, ExpFormat):
244 if mxtype.value_type not in ["real", "complex"]:
245 raise ValueError("Inconsistency between matrix type {} and "
246 "value type {}".format(mxtype, values_format))
247 values_dtype = np.float64
248 elif isinstance(values_format, IntFormat):
249 if mxtype.value_type not in ["integer"]:
250 raise ValueError("Inconsistency between matrix type {} and "
251 "value type {}".format(mxtype, values_format))
252 # XXX: fortran int -> dtype association ?
253 values_dtype = int
254 else:
255 raise ValueError(f"Unsupported format for values {values_format!r}")
257 self.pointer_format = pointer_format
258 self.indices_format = indices_format
259 self.values_format = values_format
261 self.pointer_dtype = np.int32
262 self.indices_dtype = np.int32
263 self.values_dtype = values_dtype
265 self.pointer_nlines = pointer_nlines
266 self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
268 self.indices_nlines = indices_nlines
269 self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
271 self.values_nlines = values_nlines
272 self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
274 self.nrows = nrows
275 self.ncols = ncols
276 self.nnon_zeros = nnon_zeros
277 self.nelementals = nelementals
278 self.mxtype = mxtype
280 def dump(self):
281 """Gives the header corresponding to this instance as a string."""
282 header = [self.title.ljust(72) + self.key.ljust(8)]
284 header.append("%14d%14d%14d%14d" %
285 (self.total_nlines, self.pointer_nlines,
286 self.indices_nlines, self.values_nlines))
287 header.append("%14s%14d%14d%14d%14d" %
288 (self.mxtype.fortran_format.ljust(14), self.nrows,
289 self.ncols, self.nnon_zeros, 0))
291 pffmt = self.pointer_format.fortran_format
292 iffmt = self.indices_format.fortran_format
293 vffmt = self.values_format.fortran_format
294 header.append("%16s%16s%20s" %
295 (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
296 return "\n".join(header)
299def _expect_int(value, msg=None):
300 try:
301 return int(value)
302 except ValueError as e:
303 if msg is None:
304 msg = "Expected an int, got %s"
305 raise ValueError(msg % value) from e
308def _read_hb_data(content, header):
309 # XXX: look at a way to reduce memory here (big string creation)
310 ptr_string = "".join([content.read(header.pointer_nbytes_full),
311 content.readline()])
312 ptr = np.fromstring(ptr_string,
313 dtype=int, sep=' ')
315 ind_string = "".join([content.read(header.indices_nbytes_full),
316 content.readline()])
317 ind = np.fromstring(ind_string,
318 dtype=int, sep=' ')
320 val_string = "".join([content.read(header.values_nbytes_full),
321 content.readline()])
322 val = np.fromstring(val_string,
323 dtype=header.values_dtype, sep=' ')
325 try:
326 return csc_matrix((val, ind-1, ptr-1),
327 shape=(header.nrows, header.ncols))
328 except ValueError as e:
329 raise e
332def _write_data(m, fid, header):
333 m = m.tocsc(copy=False)
335 def write_array(f, ar, nlines, fmt):
336 # ar_nlines is the number of full lines, n is the number of items per
337 # line, ffmt the fortran format
338 pyfmt = fmt.python_format
339 pyfmt_full = pyfmt * fmt.repeat
341 # for each array to write, we first write the full lines, and special
342 # case for partial line
343 full = ar[:(nlines - 1) * fmt.repeat]
344 for row in full.reshape((nlines-1, fmt.repeat)):
345 f.write(pyfmt_full % tuple(row) + "\n")
346 nremain = ar.size - full.size
347 if nremain > 0:
348 f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
350 fid.write(header.dump())
351 fid.write("\n")
352 # +1 is for Fortran one-based indexing
353 write_array(fid, m.indptr+1, header.pointer_nlines,
354 header.pointer_format)
355 write_array(fid, m.indices+1, header.indices_nlines,
356 header.indices_format)
357 write_array(fid, m.data, header.values_nlines,
358 header.values_format)
361class HBMatrixType:
362 """Class to hold the matrix type."""
363 # q2f* translates qualified names to Fortran character
364 _q2f_type = {
365 "real": "R",
366 "complex": "C",
367 "pattern": "P",
368 "integer": "I",
369 }
370 _q2f_structure = {
371 "symmetric": "S",
372 "unsymmetric": "U",
373 "hermitian": "H",
374 "skewsymmetric": "Z",
375 "rectangular": "R"
376 }
377 _q2f_storage = {
378 "assembled": "A",
379 "elemental": "E",
380 }
382 _f2q_type = {j: i for i, j in _q2f_type.items()}
383 _f2q_structure = {j: i for i, j in _q2f_structure.items()}
384 _f2q_storage = {j: i for i, j in _q2f_storage.items()}
386 @classmethod
387 def from_fortran(cls, fmt):
388 if not len(fmt) == 3:
389 raise ValueError("Fortran format for matrix type should be 3 "
390 "characters long")
391 try:
392 value_type = cls._f2q_type[fmt[0]]
393 structure = cls._f2q_structure[fmt[1]]
394 storage = cls._f2q_storage[fmt[2]]
395 return cls(value_type, structure, storage)
396 except KeyError as e:
397 raise ValueError("Unrecognized format %s" % fmt) from e
399 def __init__(self, value_type, structure, storage="assembled"):
400 self.value_type = value_type
401 self.structure = structure
402 self.storage = storage
404 if value_type not in self._q2f_type:
405 raise ValueError("Unrecognized type %s" % value_type)
406 if structure not in self._q2f_structure:
407 raise ValueError("Unrecognized structure %s" % structure)
408 if storage not in self._q2f_storage:
409 raise ValueError("Unrecognized storage %s" % storage)
411 @property
412 def fortran_format(self):
413 return self._q2f_type[self.value_type] + \
414 self._q2f_structure[self.structure] + \
415 self._q2f_storage[self.storage]
417 def __repr__(self):
418 return "HBMatrixType(%s, %s, %s)" % \
419 (self.value_type, self.structure, self.storage)
422class HBFile:
423 def __init__(self, file, hb_info=None):
424 """Create a HBFile instance.
426 Parameters
427 ----------
428 file : file-object
429 StringIO work as well
430 hb_info : HBInfo, optional
431 Should be given as an argument for writing, in which case the file
432 should be writable.
433 """
434 self._fid = file
435 if hb_info is None:
436 self._hb_info = HBInfo.from_file(file)
437 else:
438 #raise OSError("file %s is not writable, and hb_info "
439 # "was given." % file)
440 self._hb_info = hb_info
442 @property
443 def title(self):
444 return self._hb_info.title
446 @property
447 def key(self):
448 return self._hb_info.key
450 @property
451 def type(self):
452 return self._hb_info.mxtype.value_type
454 @property
455 def structure(self):
456 return self._hb_info.mxtype.structure
458 @property
459 def storage(self):
460 return self._hb_info.mxtype.storage
462 def read_matrix(self):
463 return _read_hb_data(self._fid, self._hb_info)
465 def write_matrix(self, m):
466 return _write_data(m, self._fid, self._hb_info)
469def hb_read(path_or_open_file):
470 """Read HB-format file.
472 Parameters
473 ----------
474 path_or_open_file : path-like or file-like
475 If a file-like object, it is used as-is. Otherwise, it is opened
476 before reading.
478 Returns
479 -------
480 data : scipy.sparse.csc_matrix instance
481 The data read from the HB file as a sparse matrix.
483 Notes
484 -----
485 At the moment not the full Harwell-Boeing format is supported. Supported
486 features are:
488 - assembled, non-symmetric, real matrices
489 - integer for pointer/indices
490 - exponential format for float values, and int format
492 Examples
493 --------
494 We can read and write a harwell-boeing format file:
496 >>> from scipy.io import hb_read, hb_write
497 >>> from scipy.sparse import csr_matrix, eye
498 >>> data = csr_matrix(eye(3)) # create a sparse matrix
499 >>> hb_write("data.hb", data) # write a hb file
500 >>> print(hb_read("data.hb")) # read a hb file
501 (0, 0) 1.0
502 (1, 1) 1.0
503 (2, 2) 1.0
505 """
506 def _get_matrix(fid):
507 hb = HBFile(fid)
508 return hb.read_matrix()
510 if hasattr(path_or_open_file, 'read'):
511 return _get_matrix(path_or_open_file)
512 else:
513 with open(path_or_open_file) as f:
514 return _get_matrix(f)
517def hb_write(path_or_open_file, m, hb_info=None):
518 """Write HB-format file.
520 Parameters
521 ----------
522 path_or_open_file : path-like or file-like
523 If a file-like object, it is used as-is. Otherwise, it is opened
524 before writing.
525 m : sparse-matrix
526 the sparse matrix to write
527 hb_info : HBInfo
528 contains the meta-data for write
530 Returns
531 -------
532 None
534 Notes
535 -----
536 At the moment not the full Harwell-Boeing format is supported. Supported
537 features are:
539 - assembled, non-symmetric, real matrices
540 - integer for pointer/indices
541 - exponential format for float values, and int format
543 Examples
544 --------
545 We can read and write a harwell-boeing format file:
547 >>> from scipy.io import hb_read, hb_write
548 >>> from scipy.sparse import csr_matrix, eye
549 >>> data = csr_matrix(eye(3)) # create a sparse matrix
550 >>> hb_write("data.hb", data) # write a hb file
551 >>> print(hb_read("data.hb")) # read a hb file
552 (0, 0) 1.0
553 (1, 1) 1.0
554 (2, 2) 1.0
556 """
557 m = m.tocsc(copy=False)
559 if hb_info is None:
560 hb_info = HBInfo.from_data(m)
562 def _set_matrix(fid):
563 hb = HBFile(fid, hb_info)
564 return hb.write_matrix(m)
566 if hasattr(path_or_open_file, 'write'):
567 return _set_matrix(path_or_open_file)
568 else:
569 with open(path_or_open_file, 'w') as f:
570 return _set_matrix(f)