Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_fast_matrix_market/__init__.py: 17%
188 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
1# Copyright (C) 2022-2023 Adam Lugowski. All rights reserved.
2# Use of this source code is governed by the BSD 2-clause license found in
3# the LICENSE.txt file.
4# SPDX-License-Identifier: BSD-2-Clause
5"""
6Matrix Market I/O with a C++ backend.
7See http://math.nist.gov/MatrixMarket/formats.html
8for information about the Matrix Market format.
10.. versionadded:: 1.12.0
11"""
12import io
13import os
15import numpy as np
16import scipy.sparse
17from scipy.io import _mmio
19__all__ = ['mminfo', 'mmread', 'mmwrite']
21PARALLELISM = 0
22"""
23Number of threads that `mmread()` and `mmwrite()` use.
240 means number of CPUs in the system.
25Use `threadpoolctl` to set this value.
26"""
28ALWAYS_FIND_SYMMETRY = False
29"""
30Whether mmwrite() with symmetry='AUTO' will always search for symmetry
31inside the matrix. This is scipy.io._mmio.mmwrite()'s default behavior,
32but has a significant performance cost on large matrices.
33"""
35_field_to_dtype = {
36 "integer": "int64",
37 "unsigned-integer": "uint64",
38 "real": "float64",
39 "complex": "complex",
40 "pattern": "float64",
41}
44def _fmm_version():
45 from . import _fmm_core
46 return _fmm_core.__version__
49# Register with threadpoolctl, if available
50try:
51 import threadpoolctl
53 class _FMMThreadPoolCtlController(threadpoolctl.LibController):
54 user_api = "scipy"
55 internal_api = "scipy_mmio"
57 filename_prefixes = ("_fmm_core",)
59 def get_num_threads(self):
60 global PARALLELISM
61 return PARALLELISM
63 def set_num_threads(self, num_threads):
64 global PARALLELISM
65 PARALLELISM = num_threads
67 def get_version(self):
68 return _fmm_version
70 def set_additional_attributes(self):
71 pass
73 threadpoolctl.register(_FMMThreadPoolCtlController)
74except (ImportError, AttributeError):
75 # threadpoolctl not installed or version too old
76 pass
79class _TextToBytesWrapper(io.BufferedReader):
80 """
81 Convert a TextIOBase string stream to a byte stream.
82 """
84 def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs):
85 super().__init__(text_io_buffer, **kwargs)
86 self.encoding = encoding or text_io_buffer.encoding or 'utf-8'
87 self.errors = errors or text_io_buffer.errors or 'strict'
89 def __del__(self):
90 # do not close the wrapped stream
91 self.detach()
93 def _encoding_call(self, method_name, *args, **kwargs):
94 raw_method = getattr(self.raw, method_name)
95 val = raw_method(*args, **kwargs)
96 return val.encode(self.encoding, errors=self.errors)
98 def read(self, size=-1):
99 return self._encoding_call('read', size)
101 def read1(self, size=-1):
102 return self._encoding_call('read1', size)
104 def peek(self, size=-1):
105 return self._encoding_call('peek', size)
107 def seek(self, offset, whence=0):
108 # Random seeks are not allowed because of non-trivial conversion
109 # between byte and character offsets,
110 # with the possibility of a byte offset landing within a character.
111 if offset == 0 and whence == 0 or \
112 offset == 0 and whence == 2:
113 # seek to start or end is ok
114 super().seek(offset, whence)
115 else:
116 # Drop any other seek
117 # In this application this may happen when pystreambuf seeks during sync(),
118 # which can happen when closing a partially-read stream.
119 # Ex. when mminfo() only reads the header then exits.
120 pass
123def _read_body_array(cursor):
124 """
125 Read MatrixMarket array body
126 """
127 from . import _fmm_core
129 vals = np.zeros(cursor.header.shape, dtype=_field_to_dtype.get(cursor.header.field))
130 _fmm_core.read_body_array(cursor, vals)
131 return vals
134def _read_body_coo(cursor, generalize_symmetry=True):
135 """
136 Read MatrixMarket coordinate body
137 """
138 from . import _fmm_core
140 index_dtype = "int32"
141 if cursor.header.nrows >= 2**31 or cursor.header.ncols >= 2**31:
142 # Dimensions are too large to fit in int32
143 index_dtype = "int64"
145 i = np.zeros(cursor.header.nnz, dtype=index_dtype)
146 j = np.zeros(cursor.header.nnz, dtype=index_dtype)
147 data = np.zeros(cursor.header.nnz, dtype=_field_to_dtype.get(cursor.header.field))
149 _fmm_core.read_body_coo(cursor, i, j, data)
151 if generalize_symmetry and cursor.header.symmetry != "general":
152 off_diagonal_mask = (i != j)
153 off_diagonal_rows = i[off_diagonal_mask]
154 off_diagonal_cols = j[off_diagonal_mask]
155 off_diagonal_data = data[off_diagonal_mask]
157 if cursor.header.symmetry == "skew-symmetric":
158 off_diagonal_data *= -1
159 elif cursor.header.symmetry == "hermitian":
160 off_diagonal_data = off_diagonal_data.conjugate()
162 i = np.concatenate((i, off_diagonal_cols))
163 j = np.concatenate((j, off_diagonal_rows))
164 data = np.concatenate((data, off_diagonal_data))
166 return (data, (i, j)), cursor.header.shape
169def _get_read_cursor(source, parallelism=None):
170 """
171 Open file for reading.
172 """
173 from . import _fmm_core
175 ret_stream_to_close = None
176 if parallelism is None:
177 parallelism = PARALLELISM
179 try:
180 source = os.fspath(source)
181 # It's a file path
182 is_path = True
183 except TypeError:
184 is_path = False
186 if is_path:
187 path = str(source)
188 if path.endswith('.gz'):
189 import gzip
190 source = gzip.GzipFile(path, 'r')
191 ret_stream_to_close = source
192 elif path.endswith('.bz2'):
193 import bz2
194 source = bz2.BZ2File(path, 'rb')
195 ret_stream_to_close = source
196 else:
197 return _fmm_core.open_read_file(path, parallelism), ret_stream_to_close
199 # Stream object.
200 if hasattr(source, "read"):
201 if isinstance(source, io.TextIOBase):
202 source = _TextToBytesWrapper(source)
203 return _fmm_core.open_read_stream(source, parallelism), ret_stream_to_close
204 else:
205 raise TypeError("Unknown source type")
208def _get_write_cursor(target, h=None, comment=None, parallelism=None,
209 symmetry="general", precision=None):
210 """
211 Open file for writing.
212 """
213 from . import _fmm_core
215 if parallelism is None:
216 parallelism = PARALLELISM
217 if comment is None:
218 comment = ''
219 if symmetry is None:
220 symmetry = "general"
221 if precision is None:
222 precision = -1
224 if not h:
225 h = _fmm_core.header(comment=comment, symmetry=symmetry)
227 try:
228 target = os.fspath(target)
229 # It's a file path
230 return _fmm_core.open_write_file(str(target), h, parallelism, precision)
231 except TypeError:
232 pass
234 if hasattr(target, "write"):
235 # Stream object.
236 if isinstance(target, io.TextIOBase):
237 raise TypeError("target stream must be open in binary mode.")
238 return _fmm_core.open_write_stream(target, h, parallelism, precision)
239 else:
240 raise TypeError("Unknown source object")
243def _apply_field(data, field, no_pattern=False):
244 """
245 Ensure that ``data.dtype`` is compatible with the specified MatrixMarket field type.
247 Parameters
248 ----------
249 data : ndarray
250 Input array.
252 field : str
253 Matrix Market field, such as 'real', 'complex', 'integer', 'pattern'.
255 no_pattern : bool, optional
256 Whether an empty array may be returned for a 'pattern' field.
258 Returns
259 -------
260 data : ndarray
261 Input data if no conversion necessary, or a converted version
262 """
264 if field is None:
265 return data
266 if field == "pattern":
267 if no_pattern:
268 return data
269 else:
270 return np.zeros(0)
272 dtype = _field_to_dtype.get(field, None)
273 if dtype is None:
274 raise ValueError("Invalid field.")
276 return np.asarray(data, dtype=dtype)
279def _validate_symmetry(symmetry):
280 """
281 Check that the symmetry parameter is one that MatrixMarket allows..
282 """
283 if symmetry is None:
284 return "general"
286 symmetry = str(symmetry).lower()
287 symmetries = ["general", "symmetric", "skew-symmetric", "hermitian"]
288 if symmetry not in symmetries:
289 raise ValueError("Invalid symmetry. Must be one of: " + ", ".join(symmetries))
291 return symmetry
294def mmread(source):
295 """
296 Reads the contents of a Matrix Market file-like 'source' into a matrix.
298 Parameters
299 ----------
300 source : str or file-like
301 Matrix Market filename (extensions .mtx, .mtz.gz)
302 or open file-like object.
304 Returns
305 -------
306 a : ndarray or coo_matrix
307 Dense or sparse matrix depending on the matrix format in the
308 Matrix Market file.
310 Notes
311 -----
312 .. versionchanged:: 1.12.0
313 C++ implementation.
315 Examples
316 --------
317 >>> from io import StringIO
318 >>> from scipy.io import mmread
320 >>> text = '''%%MatrixMarket matrix coordinate real general
321 ... 5 5 7
322 ... 2 3 1.0
323 ... 3 4 2.0
324 ... 3 5 3.0
325 ... 4 1 4.0
326 ... 4 2 5.0
327 ... 4 3 6.0
328 ... 4 4 7.0
329 ... '''
331 ``mmread(source)`` returns the data as sparse matrix in COO format.
333 >>> m = mmread(StringIO(text))
334 >>> m
335 <5x5 sparse matrix of type '<class 'numpy.float64'>'
336 with 7 stored elements in COOrdinate format>
337 >>> m.A
338 array([[0., 0., 0., 0., 0.],
339 [0., 0., 1., 0., 0.],
340 [0., 0., 0., 2., 3.],
341 [4., 5., 6., 7., 0.],
342 [0., 0., 0., 0., 0.]])
344 This method is threaded.
345 The default number of threads is equal to the number of CPUs in the system.
346 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override:
348 >>> import threadpoolctl
349 >>>
350 >>> with threadpoolctl.threadpool_limits(limits=2):
351 ... m = mmread(StringIO(text))
353 """
354 cursor, stream_to_close = _get_read_cursor(source)
356 if cursor.header.format == "array":
357 mat = _read_body_array(cursor)
358 if stream_to_close:
359 stream_to_close.close()
360 return mat
361 else:
362 from scipy.sparse import coo_matrix
363 triplet, shape = _read_body_coo(cursor, generalize_symmetry=True)
364 if stream_to_close:
365 stream_to_close.close()
366 return coo_matrix(triplet, shape=shape)
369def mmwrite(target, a, comment=None, field=None, precision=None, symmetry="AUTO"):
370 r"""
371 Writes the sparse or dense array `a` to Matrix Market file-like `target`.
373 Parameters
374 ----------
375 target : str or file-like
376 Matrix Market filename (extension .mtx) or open file-like object.
377 a : array like
378 Sparse or dense 2-D array.
379 comment : str, optional
380 Comments to be prepended to the Matrix Market file.
381 field : None or str, optional
382 Either 'real', 'complex', 'pattern', or 'integer'.
383 precision : None or int, optional
384 Number of digits to display for real or complex values.
385 symmetry : None or str, optional
386 Either 'AUTO', 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
387 If symmetry is None the symmetry type of 'a' is determined by its
388 values. If symmetry is 'AUTO' the symmetry type of 'a' is either
389 determined or set to 'general', at mmwrite's discretion.
391 Returns
392 -------
393 None
395 Notes
396 -----
397 .. versionchanged:: 1.12.0
398 C++ implementation.
400 Examples
401 --------
402 >>> from io import BytesIO
403 >>> import numpy as np
404 >>> from scipy.sparse import coo_matrix
405 >>> from scipy.io import mmwrite
407 Write a small NumPy array to a matrix market file. The file will be
408 written in the ``'array'`` format.
410 >>> a = np.array([[1.0, 0, 0, 0], [0, 2.5, 0, 6.25]])
411 >>> target = BytesIO()
412 >>> mmwrite(target, a)
413 >>> print(target.getvalue().decode('latin1'))
414 %%MatrixMarket matrix array real general
415 %
416 2 4
417 1
418 0
419 0
420 2.5
421 0
422 0
423 0
424 6.25
426 Add a comment to the output file, and set the precision to 3.
428 >>> target = BytesIO()
429 >>> mmwrite(target, a, comment='\n Some test data.\n', precision=3)
430 >>> print(target.getvalue().decode('latin1'))
431 %%MatrixMarket matrix array real general
432 %
433 % Some test data.
434 %
435 2 4
436 1.00e+00
437 0.00e+00
438 0.00e+00
439 2.50e+00
440 0.00e+00
441 0.00e+00
442 0.00e+00
443 6.25e+00
445 Convert to a sparse matrix before calling ``mmwrite``. This will
446 result in the output format being ``'coordinate'`` rather than
447 ``'array'``.
449 >>> target = BytesIO()
450 >>> mmwrite(target, coo_matrix(a), precision=3)
451 >>> print(target.getvalue().decode('latin1'))
452 %%MatrixMarket matrix coordinate real general
453 %
454 2 4 3
455 1 1 1.00e+00
456 2 2 2.50e+00
457 2 4 6.25e+00
459 Write a complex Hermitian array to a matrix market file. Note that
460 only six values are actually written to the file; the other values
461 are implied by the symmetry.
463 >>> z = np.array([[3, 1+2j, 4-3j], [1-2j, 1, -5j], [4+3j, 5j, 2.5]])
464 >>> z
465 array([[ 3. +0.j, 1. +2.j, 4. -3.j],
466 [ 1. -2.j, 1. +0.j, -0. -5.j],
467 [ 4. +3.j, 0. +5.j, 2.5+0.j]])
469 >>> target = BytesIO()
470 >>> mmwrite(target, z, precision=2)
471 >>> print(target.getvalue().decode('latin1'))
472 %%MatrixMarket matrix array complex hermitian
473 %
474 3 3
475 3.0e+00 0.0e+00
476 1.0e+00 -2.0e+00
477 4.0e+00 3.0e+00
478 1.0e+00 0.0e+00
479 0.0e+00 5.0e+00
480 2.5e+00 0.0e+00
482 This method is threaded.
483 The default number of threads is equal to the number of CPUs in the system.
484 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override:
486 >>> import threadpoolctl
487 >>>
488 >>> target = BytesIO()
489 >>> with threadpoolctl.threadpool_limits(limits=2):
490 ... mmwrite(target, a)
492 """
493 from . import _fmm_core
495 if isinstance(a, list) or isinstance(a, tuple) or hasattr(a, "__array__"):
496 a = np.asarray(a)
498 if symmetry == "AUTO":
499 if ALWAYS_FIND_SYMMETRY or (hasattr(a, "shape") and max(a.shape) < 100):
500 symmetry = None
501 else:
502 symmetry = "general"
504 if symmetry is None:
505 symmetry = _mmio.MMFile()._get_symmetry(a)
507 symmetry = _validate_symmetry(symmetry)
508 cursor = _get_write_cursor(target, comment=comment,
509 precision=precision, symmetry=symmetry)
511 if isinstance(a, np.ndarray):
512 # Write dense numpy arrays
513 a = _apply_field(a, field, no_pattern=True)
514 _fmm_core.write_body_array(cursor, a)
516 elif scipy.sparse.issparse(a):
517 # Write sparse scipy matrices
518 a = a.tocoo()
520 if symmetry is not None and symmetry != "general":
521 # A symmetric matrix only specifies the elements below the diagonal.
522 # Ensure that the matrix satisfies this requirement.
523 from scipy.sparse import coo_array
524 lower_triangle_mask = a.row >= a.col
525 a = coo_array((a.data[lower_triangle_mask],
526 (a.row[lower_triangle_mask],
527 a.col[lower_triangle_mask])), shape=a.shape)
529 data = _apply_field(a.data, field)
530 _fmm_core.write_body_coo(cursor, a.shape, a.row, a.col, data)
532 else:
533 raise ValueError("unknown matrix type: %s" % type(a))
536def mminfo(source):
537 """
538 Return size and storage parameters from Matrix Market file-like 'source'.
540 Parameters
541 ----------
542 source : str or file-like
543 Matrix Market filename (extension .mtx) or open file-like object
545 Returns
546 -------
547 rows : int
548 Number of matrix rows.
549 cols : int
550 Number of matrix columns.
551 entries : int
552 Number of non-zero entries of a sparse matrix
553 or rows*cols for a dense matrix.
554 format : str
555 Either 'coordinate' or 'array'.
556 field : str
557 Either 'real', 'complex', 'pattern', or 'integer'.
558 symmetry : str
559 Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
561 Notes
562 -----
563 .. versionchanged:: 1.12.0
564 C++ implementation.
566 Examples
567 --------
568 >>> from io import StringIO
569 >>> from scipy.io import mminfo
571 >>> text = '''%%MatrixMarket matrix coordinate real general
572 ... 5 5 7
573 ... 2 3 1.0
574 ... 3 4 2.0
575 ... 3 5 3.0
576 ... 4 1 4.0
577 ... 4 2 5.0
578 ... 4 3 6.0
579 ... 4 4 7.0
580 ... '''
583 ``mminfo(source)`` returns the number of rows, number of columns,
584 format, field type and symmetry attribute of the source file.
586 >>> mminfo(StringIO(text))
587 (5, 5, 7, 'coordinate', 'real', 'general')
588 """
589 cursor, stream_to_close = _get_read_cursor(source, 1)
590 h = cursor.header
591 cursor.close()
592 if stream_to_close:
593 stream_to_close.close()
594 return h.nrows, h.ncols, h.nnz, h.format, h.field, h.symmetry