Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_fast_matrix_market/__init__.py: 17%
187 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1# Copyright (C) 2022-2023 Adam Lugowski. All rights reserved.
2# Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file.
3# SPDX-License-Identifier: BSD-2-Clause
4"""
5Matrix Market I/O with a C++ backend.
6See http://math.nist.gov/MatrixMarket/formats.html
7for information about the Matrix Market format.
9.. versionadded:: 1.12.0
10"""
11import io
12import os
14import numpy as np
15import scipy.sparse
17__all__ = ['mminfo', 'mmread', 'mmwrite']
19PARALLELISM = 0
20"""
21Number of threads that `mmread()` and `mmwrite()` use.
220 means number of CPUs in the system.
23Use `threadpoolctl` to set this value.
24"""
26ALWAYS_FIND_SYMMETRY = False
27"""
28Whether mmwrite() with symmetry='AUTO' will always search for symmetry inside the matrix.
29This is scipy.io._mmio.mmwrite()'s default behavior, but has a significant performance cost on large matrices.
30"""
32_field_to_dtype = {
33 "integer": "int64",
34 "unsigned-integer": "uint64",
35 "real": "float64",
36 "complex": "complex",
37 "pattern": "float64",
38}
41def _fmm_version():
42 from . import _fmm_core
43 return _fmm_core.__version__
46# Register with threadpoolctl, if available
47try:
48 import threadpoolctl
50 class _FMMThreadPoolCtlController(threadpoolctl.LibController):
51 user_api = "scipy"
52 internal_api = "scipy_mmio"
54 filename_prefixes = ("_fmm_core",)
56 def get_num_threads(self):
57 global PARALLELISM
58 return PARALLELISM
60 def set_num_threads(self, num_threads):
61 global PARALLELISM
62 PARALLELISM = num_threads
64 def get_version(self):
65 return _fmm_version
67 def set_additional_attributes(self):
68 pass
70 threadpoolctl.register(_FMMThreadPoolCtlController)
71except (ImportError, AttributeError):
72 # threadpoolctl not installed or version too old
73 pass
76class _TextToBytesWrapper(io.BufferedReader):
77 """
78 Convert a TextIOBase string stream to a byte stream.
79 """
81 def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs):
82 super(_TextToBytesWrapper, self).__init__(text_io_buffer, **kwargs)
83 self.encoding = encoding or text_io_buffer.encoding or 'utf-8'
84 self.errors = errors or text_io_buffer.errors or 'strict'
86 def __del__(self):
87 # do not close the wrapped stream
88 self.detach()
90 def _encoding_call(self, method_name, *args, **kwargs):
91 raw_method = getattr(self.raw, method_name)
92 val = raw_method(*args, **kwargs)
93 return val.encode(self.encoding, errors=self.errors)
95 def read(self, size=-1):
96 return self._encoding_call('read', size)
98 def read1(self, size=-1):
99 return self._encoding_call('read1', size)
101 def peek(self, size=-1):
102 return self._encoding_call('peek', size)
104 def seek(self, offset, whence=0):
105 # Random seeks are not allowed because of non-trivial conversion between byte and character offsets,
106 # with the possibility of a byte offset landing within a character.
107 if offset == 0 and whence == 0 or \
108 offset == 0 and whence == 2:
109 # seek to start or end is ok
110 super(_TextToBytesWrapper, self).seek(offset, whence)
111 else:
112 # Drop any other seek
113 # In this application this may happen when pystreambuf seeks during sync(), which can happen when closing
114 # a partially-read stream. Ex. when mminfo() only reads the header then exits.
115 pass
118def _read_body_array(cursor):
119 """
120 Read MatrixMarket array body
121 """
122 from . import _fmm_core
124 vals = np.zeros(cursor.header.shape, dtype=_field_to_dtype.get(cursor.header.field))
125 _fmm_core.read_body_array(cursor, vals)
126 return vals
129def _read_body_coo(cursor, generalize_symmetry=True):
130 """
131 Read MatrixMarket coordinate body
132 """
133 from . import _fmm_core
135 index_dtype = "int32"
136 if cursor.header.nrows >= 2**31 or cursor.header.ncols >= 2**31:
137 # Dimensions are too large to fit in int32
138 index_dtype = "int64"
140 i = np.zeros(cursor.header.nnz, dtype=index_dtype)
141 j = np.zeros(cursor.header.nnz, dtype=index_dtype)
142 data = np.zeros(cursor.header.nnz, dtype=_field_to_dtype.get(cursor.header.field))
144 _fmm_core.read_body_coo(cursor, i, j, data)
146 if generalize_symmetry and cursor.header.symmetry != "general":
147 off_diagonal_mask = (i != j)
148 off_diagonal_rows = i[off_diagonal_mask]
149 off_diagonal_cols = j[off_diagonal_mask]
150 off_diagonal_data = data[off_diagonal_mask]
152 if cursor.header.symmetry == "skew-symmetric":
153 off_diagonal_data *= -1
154 elif cursor.header.symmetry == "hermitian":
155 off_diagonal_data = off_diagonal_data.conjugate()
157 i = np.concatenate((i, off_diagonal_cols))
158 j = np.concatenate((j, off_diagonal_rows))
159 data = np.concatenate((data, off_diagonal_data))
161 return (data, (i, j)), cursor.header.shape
164def _get_read_cursor(source, parallelism=None):
165 """
166 Open file for reading.
167 """
168 from . import _fmm_core
170 ret_stream_to_close = None
171 if parallelism is None:
172 parallelism = PARALLELISM
174 try:
175 source = os.fspath(source)
176 # It's a file path
177 is_path = True
178 except TypeError:
179 is_path = False
181 if is_path:
182 path = str(source)
183 if path.endswith('.gz'):
184 import gzip
185 source = gzip.GzipFile(path, 'r')
186 ret_stream_to_close = source
187 elif path.endswith('.bz2'):
188 import bz2
189 source = bz2.BZ2File(path, 'rb')
190 ret_stream_to_close = source
191 else:
192 return _fmm_core.open_read_file(path, parallelism), ret_stream_to_close
194 # Stream object.
195 if hasattr(source, "read"):
196 if isinstance(source, io.TextIOBase):
197 source = _TextToBytesWrapper(source)
198 return _fmm_core.open_read_stream(source, parallelism), ret_stream_to_close
199 else:
200 raise TypeError("Unknown source type")
203def _get_write_cursor(target, h=None, comment=None, parallelism=None, symmetry="general", precision=None):
204 """
205 Open file for writing.
206 """
207 from . import _fmm_core
209 if parallelism is None:
210 parallelism = PARALLELISM
211 if comment is None:
212 comment = ''
213 if symmetry is None:
214 symmetry = "general"
215 if precision is None:
216 precision = -1
218 if not h:
219 h = _fmm_core.header(comment=comment, symmetry=symmetry)
221 try:
222 target = os.fspath(target)
223 # It's a file path
224 return _fmm_core.open_write_file(str(target), h, parallelism, precision)
225 except TypeError:
226 pass
228 if hasattr(target, "write"):
229 # Stream object.
230 if isinstance(target, io.TextIOBase):
231 raise TypeError("target stream must be open in binary mode.")
232 return _fmm_core.open_write_stream(target, h, parallelism, precision)
233 else:
234 raise TypeError("Unknown source object")
237def _apply_field(data, field, no_pattern=False):
238 """
239 Ensure that a NumPy array has a dtype compatible with the specified MatrixMarket field type.
241 Parameters
242 ----------
243 data : ndarray
244 Input array.
246 field : str
247 Matrix Market field, such as 'real', 'complex', 'integer', 'pattern'.
249 no_pattern : bool, optional
250 Whether an empty array may be returned for a 'pattern' field.
252 Returns
253 -------
254 data : ndarray
255 Input data if no conversion necessary, or a converted version
256 """
258 if field is None:
259 return data
260 if field == "pattern":
261 if no_pattern:
262 return data
263 else:
264 return np.zeros(0)
266 dtype = _field_to_dtype.get(field, None)
267 if dtype is None:
268 raise ValueError("Invalid field.")
270 return np.asarray(data, dtype=dtype)
273def _validate_symmetry(symmetry):
274 """
275 Check that the symmetry parameter is one that MatrixMarket allows..
276 """
277 if symmetry is None:
278 return "general"
280 symmetry = str(symmetry).lower()
281 symmetries = ["general", "symmetric", "skew-symmetric", "hermitian"]
282 if symmetry not in symmetries:
283 raise ValueError("Invalid symmetry. Must be one of: " + ", ".join(symmetries))
285 return symmetry
288def mmread(source):
289 """
290 Reads the contents of a Matrix Market file-like 'source' into a matrix.
292 Parameters
293 ----------
294 source : str or file-like
295 Matrix Market filename (extensions .mtx, .mtz.gz)
296 or open file-like object.
298 Returns
299 -------
300 a : ndarray or coo_matrix
301 Dense or sparse matrix depending on the matrix format in the
302 Matrix Market file.
304 Notes
305 -----
306 .. versionchanged:: 1.12.0
307 C++ implementation.
309 Examples
310 --------
311 >>> from io import StringIO
312 >>> from scipy.io import mmread
314 >>> text = '''%%MatrixMarket matrix coordinate real general
315 ... 5 5 7
316 ... 2 3 1.0
317 ... 3 4 2.0
318 ... 3 5 3.0
319 ... 4 1 4.0
320 ... 4 2 5.0
321 ... 4 3 6.0
322 ... 4 4 7.0
323 ... '''
325 ``mmread(source)`` returns the data as sparse matrix in COO format.
327 >>> m = mmread(StringIO(text))
328 >>> m
329 <5x5 sparse matrix of type '<class 'numpy.float64'>'
330 with 7 stored elements in COOrdinate format>
331 >>> m.A
332 array([[0., 0., 0., 0., 0.],
333 [0., 0., 1., 0., 0.],
334 [0., 0., 0., 2., 3.],
335 [4., 5., 6., 7., 0.],
336 [0., 0., 0., 0., 0.]])
338 This method is threaded. The default number of threads is equal to the number of CPUs in the system.
339 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override:
341 >>> import threadpoolctl
342 >>>
343 >>> with threadpoolctl.threadpool_limits(limits=2):
344 ... m = mmread(StringIO(text))
346 """
347 cursor, stream_to_close = _get_read_cursor(source)
349 if cursor.header.format == "array":
350 mat = _read_body_array(cursor)
351 if stream_to_close:
352 stream_to_close.close()
353 return mat
354 else:
355 from scipy.sparse import coo_matrix
356 triplet, shape = _read_body_coo(cursor, generalize_symmetry=True)
357 if stream_to_close:
358 stream_to_close.close()
359 return coo_matrix(triplet, shape=shape)
362def mmwrite(target, a, comment=None, field=None, precision=None, symmetry="AUTO"):
363 r"""
364 Writes the sparse or dense array `a` to Matrix Market file-like `target`.
366 Parameters
367 ----------
368 target : str or file-like
369 Matrix Market filename (extension .mtx) or open file-like object.
370 a : array like
371 Sparse or dense 2-D array.
372 comment : str, optional
373 Comments to be prepended to the Matrix Market file.
374 field : None or str, optional
375 Either 'real', 'complex', 'pattern', or 'integer'.
376 precision : None or int, optional
377 Number of digits to display for real or complex values.
378 symmetry : None or str, optional
379 Either 'AUTO', 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
380 If symmetry is None the symmetry type of 'a' is determined by its
381 values. If symmetry is 'AUTO' the symmetry type of 'a' is either
382 determined or set to 'general', at mmwrite's discretion.
384 Returns
385 -------
386 None
388 Notes
389 -----
390 .. versionchanged:: 1.12.0
391 C++ implementation.
393 Examples
394 --------
395 >>> from io import BytesIO
396 >>> import numpy as np
397 >>> from scipy.sparse import coo_matrix
398 >>> from scipy.io import mmwrite
400 Write a small NumPy array to a matrix market file. The file will be
401 written in the ``'array'`` format.
403 >>> a = np.array([[1.0, 0, 0, 0], [0, 2.5, 0, 6.25]])
404 >>> target = BytesIO()
405 >>> mmwrite(target, a)
406 >>> print(target.getvalue().decode('latin1'))
407 %%MatrixMarket matrix array real general
408 %
409 2 4
410 1
411 0
412 0
413 2.5
414 0
415 0
416 0
417 6.25
419 Add a comment to the output file, and set the precision to 3.
421 >>> target = BytesIO()
422 >>> mmwrite(target, a, comment='\n Some test data.\n', precision=3)
423 >>> print(target.getvalue().decode('latin1'))
424 %%MatrixMarket matrix array real general
425 %
426 % Some test data.
427 %
428 2 4
429 1.00e+00
430 0.00e+00
431 0.00e+00
432 2.50e+00
433 0.00e+00
434 0.00e+00
435 0.00e+00
436 6.25e+00
438 Convert to a sparse matrix before calling ``mmwrite``. This will
439 result in the output format being ``'coordinate'`` rather than
440 ``'array'``.
442 >>> target = BytesIO()
443 >>> mmwrite(target, coo_matrix(a), precision=3)
444 >>> print(target.getvalue().decode('latin1'))
445 %%MatrixMarket matrix coordinate real general
446 %
447 2 4 3
448 1 1 1.00e+00
449 2 2 2.50e+00
450 2 4 6.25e+00
452 Write a complex Hermitian array to a matrix market file. Note that
453 only six values are actually written to the file; the other values
454 are implied by the symmetry.
456 >>> z = np.array([[3, 1+2j, 4-3j], [1-2j, 1, -5j], [4+3j, 5j, 2.5]])
457 >>> z
458 array([[ 3. +0.j, 1. +2.j, 4. -3.j],
459 [ 1. -2.j, 1. +0.j, -0. -5.j],
460 [ 4. +3.j, 0. +5.j, 2.5+0.j]])
462 >>> target = BytesIO()
463 >>> mmwrite(target, z, precision=2)
464 >>> print(target.getvalue().decode('latin1'))
465 %%MatrixMarket matrix array complex hermitian
466 %
467 3 3
468 3.0e+00 0.0e+00
469 1.0e+00 -2.0e+00
470 4.0e+00 3.0e+00
471 1.0e+00 0.0e+00
472 0.0e+00 5.0e+00
473 2.5e+00 0.0e+00
475 This method is threaded. The default number of threads is equal to the number of CPUs in the system.
476 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override:
478 >>> import threadpoolctl
479 >>>
480 >>> target = BytesIO()
481 >>> with threadpoolctl.threadpool_limits(limits=2):
482 ... mmwrite(target, a)
484 """
485 from . import _fmm_core
487 if isinstance(a, list) or isinstance(a, tuple) or hasattr(a, "__array__"):
488 a = np.asarray(a)
490 if symmetry == "AUTO":
491 if ALWAYS_FIND_SYMMETRY or (hasattr(a, "shape") and max(a.shape) < 100):
492 symmetry = None
493 else:
494 symmetry = "general"
496 if symmetry is None:
497 symmetry = scipy.io._mmio.MMFile()._get_symmetry(a)
499 symmetry = _validate_symmetry(symmetry)
500 cursor = _get_write_cursor(target, comment=comment, precision=precision, symmetry=symmetry)
502 if isinstance(a, np.ndarray):
503 # Write dense numpy arrays
504 a = _apply_field(a, field, no_pattern=True)
505 _fmm_core.write_body_array(cursor, a)
507 elif scipy.sparse.issparse(a):
508 # Write sparse scipy matrices
509 a = a.tocoo()
511 if symmetry is not None and symmetry != "general":
512 # A symmetric matrix only specifies the elements below the diagonal.
513 # Ensure that the matrix satisfies this requirement.
514 from scipy.sparse import coo_array
515 lower_triangle_mask = a.row >= a.col
516 a = coo_array((a.data[lower_triangle_mask],
517 (a.row[lower_triangle_mask],
518 a.col[lower_triangle_mask])), shape=a.shape)
520 data = _apply_field(a.data, field)
521 _fmm_core.write_body_coo(cursor, a.shape, a.row, a.col, data)
523 else:
524 raise ValueError("unknown matrix type: %s" % type(a))
527def mminfo(source):
528 """
529 Return size and storage parameters from Matrix Market file-like 'source'.
531 Parameters
532 ----------
533 source : str or file-like
534 Matrix Market filename (extension .mtx) or open file-like object
536 Returns
537 -------
538 rows : int
539 Number of matrix rows.
540 cols : int
541 Number of matrix columns.
542 entries : int
543 Number of non-zero entries of a sparse matrix
544 or rows*cols for a dense matrix.
545 format : str
546 Either 'coordinate' or 'array'.
547 field : str
548 Either 'real', 'complex', 'pattern', or 'integer'.
549 symmetry : str
550 Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
552 Notes
553 -----
554 .. versionchanged:: 1.12.0
555 C++ implementation.
557 Examples
558 --------
559 >>> from io import StringIO
560 >>> from scipy.io import mminfo
562 >>> text = '''%%MatrixMarket matrix coordinate real general
563 ... 5 5 7
564 ... 2 3 1.0
565 ... 3 4 2.0
566 ... 3 5 3.0
567 ... 4 1 4.0
568 ... 4 2 5.0
569 ... 4 3 6.0
570 ... 4 4 7.0
571 ... '''
574 ``mminfo(source)`` returns the number of rows, number of columns,
575 format, field type and symmetry attribute of the source file.
577 >>> mminfo(StringIO(text))
578 (5, 5, 7, 'coordinate', 'real', 'general')
579 """
580 cursor, stream_to_close = _get_read_cursor(source, 1)
581 h = cursor.header
582 cursor.close()
583 if stream_to_close:
584 stream_to_close.close()
585 return h.nrows, h.ncols, h.nnz, h.format, h.field, h.symmetry