Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/hb.py: 18%
265 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:37 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:37 +0000
1"""
2Implementation of Harwell-Boeing read/write.
4At the moment not the full Harwell-Boeing format is supported. Supported
5features are:
7 - assembled, non-symmetric, real matrices
8 - integer for pointer/indices
9 - exponential format for float values, and int format
11"""
12# TODO:
13# - Add more support (symmetric/complex matrices, non-assembled matrices ?)
15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
16# takes a lot of memory. Being faster would require compiled code.
17# write is not efficient. Although not a terribly exciting task,
18# having reusable facilities to efficiently read/write fortran-formatted files
19# would be useful outside this module.
21import warnings
23import numpy as np
24from scipy.sparse import csc_matrix
25from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat
27__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
28 "HBMatrixType"]
31class MalformedHeader(Exception):
32 pass
35class LineOverflow(Warning):
36 pass
39def _nbytes_full(fmt, nlines):
40 """Return the number of bytes to read to get every full lines for the
41 given parsed fortran format."""
42 return (fmt.repeat * fmt.width + 1) * (nlines - 1)
45class HBInfo:
46 @classmethod
47 def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
48 """Create a HBInfo instance from an existing sparse matrix.
50 Parameters
51 ----------
52 m : sparse matrix
53 the HBInfo instance will derive its parameters from m
54 title : str
55 Title to put in the HB header
56 key : str
57 Key
58 mxtype : HBMatrixType
59 type of the input matrix
60 fmt : dict
61 not implemented
63 Returns
64 -------
65 hb_info : HBInfo instance
66 """
67 m = m.tocsc(copy=False)
69 pointer = m.indptr
70 indices = m.indices
71 values = m.data
73 nrows, ncols = m.shape
74 nnon_zeros = m.nnz
76 if fmt is None:
77 # +1 because HB use one-based indexing (Fortran), and we will write
78 # the indices /pointer as such
79 pointer_fmt = IntFormat.from_number(np.max(pointer+1))
80 indices_fmt = IntFormat.from_number(np.max(indices+1))
82 if values.dtype.kind in np.typecodes["AllFloat"]:
83 values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
84 elif values.dtype.kind in np.typecodes["AllInteger"]:
85 values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
86 else:
87 message = f"type {values.dtype.kind} not implemented yet"
88 raise NotImplementedError(message)
89 else:
90 raise NotImplementedError("fmt argument not supported yet.")
92 if mxtype is None:
93 if not np.isrealobj(values):
94 raise ValueError("Complex values not supported yet")
95 if values.dtype.kind in np.typecodes["AllInteger"]:
96 tp = "integer"
97 elif values.dtype.kind in np.typecodes["AllFloat"]:
98 tp = "real"
99 else:
100 raise NotImplementedError("type %s for values not implemented"
101 % values.dtype)
102 mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
103 else:
104 raise ValueError("mxtype argument not handled yet.")
106 def _nlines(fmt, size):
107 nlines = size // fmt.repeat
108 if nlines * fmt.repeat != size:
109 nlines += 1
110 return nlines
112 pointer_nlines = _nlines(pointer_fmt, pointer.size)
113 indices_nlines = _nlines(indices_fmt, indices.size)
114 values_nlines = _nlines(values_fmt, values.size)
116 total_nlines = pointer_nlines + indices_nlines + values_nlines
118 return cls(title, key,
119 total_nlines, pointer_nlines, indices_nlines, values_nlines,
120 mxtype, nrows, ncols, nnon_zeros,
121 pointer_fmt.fortran_format, indices_fmt.fortran_format,
122 values_fmt.fortran_format)
124 @classmethod
125 def from_file(cls, fid):
126 """Create a HBInfo instance from a file object containing a matrix in the
127 HB format.
129 Parameters
130 ----------
131 fid : file-like matrix
132 File or file-like object containing a matrix in the HB format.
134 Returns
135 -------
136 hb_info : HBInfo instance
137 """
138 # First line
139 line = fid.readline().strip("\n")
140 if not len(line) > 72:
141 raise ValueError("Expected at least 72 characters for first line, "
142 "got: \n%s" % line)
143 title = line[:72]
144 key = line[72:]
146 # Second line
147 line = fid.readline().strip("\n")
148 if not len(line.rstrip()) >= 56:
149 raise ValueError("Expected at least 56 characters for second line, "
150 "got: \n%s" % line)
151 total_nlines = _expect_int(line[:14])
152 pointer_nlines = _expect_int(line[14:28])
153 indices_nlines = _expect_int(line[28:42])
154 values_nlines = _expect_int(line[42:56])
156 rhs_nlines = line[56:72].strip()
157 if rhs_nlines == '':
158 rhs_nlines = 0
159 else:
160 rhs_nlines = _expect_int(rhs_nlines)
161 if not rhs_nlines == 0:
162 raise ValueError("Only files without right hand side supported for "
163 "now.")
165 # Third line
166 line = fid.readline().strip("\n")
167 if not len(line) >= 70:
168 raise ValueError("Expected at least 72 character for third line, got:\n"
169 "%s" % line)
171 mxtype_s = line[:3].upper()
172 if not len(mxtype_s) == 3:
173 raise ValueError("mxtype expected to be 3 characters long")
175 mxtype = HBMatrixType.from_fortran(mxtype_s)
176 if mxtype.value_type not in ["real", "integer"]:
177 raise ValueError("Only real or integer matrices supported for "
178 "now (detected %s)" % mxtype)
179 if not mxtype.structure == "unsymmetric":
180 raise ValueError("Only unsymmetric matrices supported for "
181 "now (detected %s)" % mxtype)
182 if not mxtype.storage == "assembled":
183 raise ValueError("Only assembled matrices supported for now")
185 if not line[3:14] == " " * 11:
186 raise ValueError("Malformed data for third line: %s" % line)
188 nrows = _expect_int(line[14:28])
189 ncols = _expect_int(line[28:42])
190 nnon_zeros = _expect_int(line[42:56])
191 nelementals = _expect_int(line[56:70])
192 if not nelementals == 0:
193 raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
194 % nelementals)
196 # Fourth line
197 line = fid.readline().strip("\n")
199 ct = line.split()
200 if not len(ct) == 3:
201 raise ValueError("Expected 3 formats, got %s" % ct)
203 return cls(title, key,
204 total_nlines, pointer_nlines, indices_nlines, values_nlines,
205 mxtype, nrows, ncols, nnon_zeros,
206 ct[0], ct[1], ct[2],
207 rhs_nlines, nelementals)
209 def __init__(self, title, key,
210 total_nlines, pointer_nlines, indices_nlines, values_nlines,
211 mxtype, nrows, ncols, nnon_zeros,
212 pointer_format_str, indices_format_str, values_format_str,
213 right_hand_sides_nlines=0, nelementals=0):
214 """Do not use this directly, but the class ctrs (from_* functions)."""
215 self.title = title
216 self.key = key
217 if title is None:
218 title = "No Title"
219 if len(title) > 72:
220 raise ValueError("title cannot be > 72 characters")
222 if key is None:
223 key = "|No Key"
224 if len(key) > 8:
225 warnings.warn("key is > 8 characters (key is %s)" % key,
226 LineOverflow, stacklevel=3)
228 self.total_nlines = total_nlines
229 self.pointer_nlines = pointer_nlines
230 self.indices_nlines = indices_nlines
231 self.values_nlines = values_nlines
233 parser = FortranFormatParser()
234 pointer_format = parser.parse(pointer_format_str)
235 if not isinstance(pointer_format, IntFormat):
236 raise ValueError("Expected int format for pointer format, got %s"
237 % pointer_format)
239 indices_format = parser.parse(indices_format_str)
240 if not isinstance(indices_format, IntFormat):
241 raise ValueError("Expected int format for indices format, got %s" %
242 indices_format)
244 values_format = parser.parse(values_format_str)
245 if isinstance(values_format, ExpFormat):
246 if mxtype.value_type not in ["real", "complex"]:
247 raise ValueError(f"Inconsistency between matrix type {mxtype} and "
248 f"value type {values_format}")
249 values_dtype = np.float64
250 elif isinstance(values_format, IntFormat):
251 if mxtype.value_type not in ["integer"]:
252 raise ValueError(f"Inconsistency between matrix type {mxtype} and "
253 f"value type {values_format}")
254 # XXX: fortran int -> dtype association ?
255 values_dtype = int
256 else:
257 raise ValueError(f"Unsupported format for values {values_format!r}")
259 self.pointer_format = pointer_format
260 self.indices_format = indices_format
261 self.values_format = values_format
263 self.pointer_dtype = np.int32
264 self.indices_dtype = np.int32
265 self.values_dtype = values_dtype
267 self.pointer_nlines = pointer_nlines
268 self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
270 self.indices_nlines = indices_nlines
271 self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
273 self.values_nlines = values_nlines
274 self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
276 self.nrows = nrows
277 self.ncols = ncols
278 self.nnon_zeros = nnon_zeros
279 self.nelementals = nelementals
280 self.mxtype = mxtype
282 def dump(self):
283 """Gives the header corresponding to this instance as a string."""
284 header = [self.title.ljust(72) + self.key.ljust(8)]
286 header.append("%14d%14d%14d%14d" %
287 (self.total_nlines, self.pointer_nlines,
288 self.indices_nlines, self.values_nlines))
289 header.append("%14s%14d%14d%14d%14d" %
290 (self.mxtype.fortran_format.ljust(14), self.nrows,
291 self.ncols, self.nnon_zeros, 0))
293 pffmt = self.pointer_format.fortran_format
294 iffmt = self.indices_format.fortran_format
295 vffmt = self.values_format.fortran_format
296 header.append("%16s%16s%20s" %
297 (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
298 return "\n".join(header)
301def _expect_int(value, msg=None):
302 try:
303 return int(value)
304 except ValueError as e:
305 if msg is None:
306 msg = "Expected an int, got %s"
307 raise ValueError(msg % value) from e
310def _read_hb_data(content, header):
311 # XXX: look at a way to reduce memory here (big string creation)
312 ptr_string = "".join([content.read(header.pointer_nbytes_full),
313 content.readline()])
314 ptr = np.fromstring(ptr_string,
315 dtype=int, sep=' ')
317 ind_string = "".join([content.read(header.indices_nbytes_full),
318 content.readline()])
319 ind = np.fromstring(ind_string,
320 dtype=int, sep=' ')
322 val_string = "".join([content.read(header.values_nbytes_full),
323 content.readline()])
324 val = np.fromstring(val_string,
325 dtype=header.values_dtype, sep=' ')
327 try:
328 return csc_matrix((val, ind-1, ptr-1),
329 shape=(header.nrows, header.ncols))
330 except ValueError as e:
331 raise e
334def _write_data(m, fid, header):
335 m = m.tocsc(copy=False)
337 def write_array(f, ar, nlines, fmt):
338 # ar_nlines is the number of full lines, n is the number of items per
339 # line, ffmt the fortran format
340 pyfmt = fmt.python_format
341 pyfmt_full = pyfmt * fmt.repeat
343 # for each array to write, we first write the full lines, and special
344 # case for partial line
345 full = ar[:(nlines - 1) * fmt.repeat]
346 for row in full.reshape((nlines-1, fmt.repeat)):
347 f.write(pyfmt_full % tuple(row) + "\n")
348 nremain = ar.size - full.size
349 if nremain > 0:
350 f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
352 fid.write(header.dump())
353 fid.write("\n")
354 # +1 is for Fortran one-based indexing
355 write_array(fid, m.indptr+1, header.pointer_nlines,
356 header.pointer_format)
357 write_array(fid, m.indices+1, header.indices_nlines,
358 header.indices_format)
359 write_array(fid, m.data, header.values_nlines,
360 header.values_format)
363class HBMatrixType:
364 """Class to hold the matrix type."""
365 # q2f* translates qualified names to Fortran character
366 _q2f_type = {
367 "real": "R",
368 "complex": "C",
369 "pattern": "P",
370 "integer": "I",
371 }
372 _q2f_structure = {
373 "symmetric": "S",
374 "unsymmetric": "U",
375 "hermitian": "H",
376 "skewsymmetric": "Z",
377 "rectangular": "R"
378 }
379 _q2f_storage = {
380 "assembled": "A",
381 "elemental": "E",
382 }
384 _f2q_type = {j: i for i, j in _q2f_type.items()}
385 _f2q_structure = {j: i for i, j in _q2f_structure.items()}
386 _f2q_storage = {j: i for i, j in _q2f_storage.items()}
388 @classmethod
389 def from_fortran(cls, fmt):
390 if not len(fmt) == 3:
391 raise ValueError("Fortran format for matrix type should be 3 "
392 "characters long")
393 try:
394 value_type = cls._f2q_type[fmt[0]]
395 structure = cls._f2q_structure[fmt[1]]
396 storage = cls._f2q_storage[fmt[2]]
397 return cls(value_type, structure, storage)
398 except KeyError as e:
399 raise ValueError("Unrecognized format %s" % fmt) from e
401 def __init__(self, value_type, structure, storage="assembled"):
402 self.value_type = value_type
403 self.structure = structure
404 self.storage = storage
406 if value_type not in self._q2f_type:
407 raise ValueError("Unrecognized type %s" % value_type)
408 if structure not in self._q2f_structure:
409 raise ValueError("Unrecognized structure %s" % structure)
410 if storage not in self._q2f_storage:
411 raise ValueError("Unrecognized storage %s" % storage)
413 @property
414 def fortran_format(self):
415 return self._q2f_type[self.value_type] + \
416 self._q2f_structure[self.structure] + \
417 self._q2f_storage[self.storage]
419 def __repr__(self):
420 return f"HBMatrixType({self.value_type}, {self.structure}, {self.storage})"
423class HBFile:
424 def __init__(self, file, hb_info=None):
425 """Create a HBFile instance.
427 Parameters
428 ----------
429 file : file-object
430 StringIO work as well
431 hb_info : HBInfo, optional
432 Should be given as an argument for writing, in which case the file
433 should be writable.
434 """
435 self._fid = file
436 if hb_info is None:
437 self._hb_info = HBInfo.from_file(file)
438 else:
439 #raise OSError("file %s is not writable, and hb_info "
440 # "was given." % file)
441 self._hb_info = hb_info
443 @property
444 def title(self):
445 return self._hb_info.title
447 @property
448 def key(self):
449 return self._hb_info.key
451 @property
452 def type(self):
453 return self._hb_info.mxtype.value_type
455 @property
456 def structure(self):
457 return self._hb_info.mxtype.structure
459 @property
460 def storage(self):
461 return self._hb_info.mxtype.storage
463 def read_matrix(self):
464 return _read_hb_data(self._fid, self._hb_info)
466 def write_matrix(self, m):
467 return _write_data(m, self._fid, self._hb_info)
470def hb_read(path_or_open_file):
471 """Read HB-format file.
473 Parameters
474 ----------
475 path_or_open_file : path-like or file-like
476 If a file-like object, it is used as-is. Otherwise, it is opened
477 before reading.
479 Returns
480 -------
481 data : scipy.sparse.csc_matrix instance
482 The data read from the HB file as a sparse matrix.
484 Notes
485 -----
486 At the moment not the full Harwell-Boeing format is supported. Supported
487 features are:
489 - assembled, non-symmetric, real matrices
490 - integer for pointer/indices
491 - exponential format for float values, and int format
493 Examples
494 --------
495 We can read and write a harwell-boeing format file:
497 >>> from scipy.io import hb_read, hb_write
498 >>> from scipy.sparse import csr_matrix, eye
499 >>> data = csr_matrix(eye(3)) # create a sparse matrix
500 >>> hb_write("data.hb", data) # write a hb file
501 >>> print(hb_read("data.hb")) # read a hb file
502 (0, 0) 1.0
503 (1, 1) 1.0
504 (2, 2) 1.0
506 """
507 def _get_matrix(fid):
508 hb = HBFile(fid)
509 return hb.read_matrix()
511 if hasattr(path_or_open_file, 'read'):
512 return _get_matrix(path_or_open_file)
513 else:
514 with open(path_or_open_file) as f:
515 return _get_matrix(f)
518def hb_write(path_or_open_file, m, hb_info=None):
519 """Write HB-format file.
521 Parameters
522 ----------
523 path_or_open_file : path-like or file-like
524 If a file-like object, it is used as-is. Otherwise, it is opened
525 before writing.
526 m : sparse-matrix
527 the sparse matrix to write
528 hb_info : HBInfo
529 contains the meta-data for write
531 Returns
532 -------
533 None
535 Notes
536 -----
537 At the moment not the full Harwell-Boeing format is supported. Supported
538 features are:
540 - assembled, non-symmetric, real matrices
541 - integer for pointer/indices
542 - exponential format for float values, and int format
544 Examples
545 --------
546 We can read and write a harwell-boeing format file:
548 >>> from scipy.io import hb_read, hb_write
549 >>> from scipy.sparse import csr_matrix, eye
550 >>> data = csr_matrix(eye(3)) # create a sparse matrix
551 >>> hb_write("data.hb", data) # write a hb file
552 >>> print(hb_read("data.hb")) # read a hb file
553 (0, 0) 1.0
554 (1, 1) 1.0
555 (2, 2) 1.0
557 """
558 m = m.tocsc(copy=False)
560 if hb_info is None:
561 hb_info = HBInfo.from_data(m)
563 def _set_matrix(fid):
564 hb = HBFile(fid, hb_info)
565 return hb.write_matrix(m)
567 if hasattr(path_or_open_file, 'write'):
568 return _set_matrix(path_or_open_file)
569 else:
570 with open(path_or_open_file, 'w') as f:
571 return _set_matrix(f)