Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/convert_matrix.py: 10%
248 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Functions to convert NetworkX graphs to and from common data containers
2like numpy arrays, scipy sparse arrays, and pandas DataFrames.
4The preferred way of converting data to a NetworkX graph is through the
5graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph`
6function which attempts to guess the input type and convert it automatically.
8Examples
9--------
10Create a 10 node random graph from a numpy array
12>>> import numpy as np
13>>> rng = np.random.default_rng()
14>>> a = rng.integers(low=0, high=2, size=(10, 10))
15>>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph)
17or equivalently:
19>>> DG = nx.DiGraph(a)
21which calls `from_numpy_array` internally based on the type of ``a``.
23See Also
24--------
25nx_agraph, nx_pydot
26"""
28import itertools
29from collections import defaultdict
31import networkx as nx
32from networkx.utils import not_implemented_for
34__all__ = [
35 "from_pandas_adjacency",
36 "to_pandas_adjacency",
37 "from_pandas_edgelist",
38 "to_pandas_edgelist",
39 "from_scipy_sparse_array",
40 "to_scipy_sparse_array",
41 "from_numpy_array",
42 "to_numpy_array",
43]
46@nx._dispatch(edge_attrs="weight")
47def to_pandas_adjacency(
48 G,
49 nodelist=None,
50 dtype=None,
51 order=None,
52 multigraph_weight=sum,
53 weight="weight",
54 nonedge=0.0,
55):
56 """Returns the graph adjacency matrix as a Pandas DataFrame.
58 Parameters
59 ----------
60 G : graph
61 The NetworkX graph used to construct the Pandas DataFrame.
63 nodelist : list, optional
64 The rows and columns are ordered according to the nodes in `nodelist`.
65 If `nodelist` is None, then the ordering is produced by G.nodes().
67 multigraph_weight : {sum, min, max}, optional
68 An operator that determines how weights in multigraphs are handled.
69 The default is to sum the weights of the multiple edges.
71 weight : string or None, optional
72 The edge attribute that holds the numerical value used for
73 the edge weight. If an edge does not have that attribute, then the
74 value 1 is used instead.
76 nonedge : float, optional
77 The matrix values corresponding to nonedges are typically set to zero.
78 However, this could be undesirable if there are matrix values
79 corresponding to actual edges that also have the value zero. If so,
80 one might prefer nonedges to have some other value, such as nan.
82 Returns
83 -------
84 df : Pandas DataFrame
85 Graph adjacency matrix
87 Notes
88 -----
89 For directed graphs, entry i,j corresponds to an edge from i to j.
91 The DataFrame entries are assigned to the weight edge attribute. When
92 an edge does not have a weight attribute, the value of the entry is set to
93 the number 1. For multiple (parallel) edges, the values of the entries
94 are determined by the 'multigraph_weight' parameter. The default is to
95 sum the weight attributes for each of the parallel edges.
97 When `nodelist` does not contain every node in `G`, the matrix is built
98 from the subgraph of `G` that is induced by the nodes in `nodelist`.
100 The convention used for self-loop edges in graphs is to assign the
101 diagonal matrix entry value to the weight attribute of the edge
102 (or the number 1 if the edge has no weight attribute). If the
103 alternate convention of doubling the edge weight is desired the
104 resulting Pandas DataFrame can be modified as follows:
106 >>> import pandas as pd
107 >>> pd.options.display.max_columns = 20
108 >>> import numpy as np
109 >>> G = nx.Graph([(1, 1)])
110 >>> df = nx.to_pandas_adjacency(G, dtype=int)
111 >>> df
112 1
113 1 1
114 >>> df.values[np.diag_indices_from(df)] *= 2
115 >>> df
116 1
117 1 2
119 Examples
120 --------
121 >>> G = nx.MultiDiGraph()
122 >>> G.add_edge(0, 1, weight=2)
123 0
124 >>> G.add_edge(1, 0)
125 0
126 >>> G.add_edge(2, 2, weight=3)
127 0
128 >>> G.add_edge(2, 2)
129 1
130 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
131 0 1 2
132 0 0 2 0
133 1 1 0 0
134 2 0 0 4
136 """
137 import pandas as pd
139 M = to_numpy_array(
140 G,
141 nodelist=nodelist,
142 dtype=dtype,
143 order=order,
144 multigraph_weight=multigraph_weight,
145 weight=weight,
146 nonedge=nonedge,
147 )
148 if nodelist is None:
149 nodelist = list(G)
150 return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
153@nx._dispatch(graphs=None)
154def from_pandas_adjacency(df, create_using=None):
155 r"""Returns a graph from Pandas DataFrame.
157 The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
159 Parameters
160 ----------
161 df : Pandas DataFrame
162 An adjacency matrix representation of a graph
164 create_using : NetworkX graph constructor, optional (default=nx.Graph)
165 Graph type to create. If graph instance, then cleared before populated.
167 Notes
168 -----
169 For directed graphs, explicitly mention create_using=nx.DiGraph,
170 and entry i,j of df corresponds to an edge from i to j.
172 If `df` has a single data type for each entry it will be converted to an
173 appropriate Python data type.
175 If `df` has a user-specified compound data type the names
176 of the data fields will be used as attribute keys in the resulting
177 NetworkX graph.
179 See Also
180 --------
181 to_pandas_adjacency
183 Examples
184 --------
185 Simple integer weights on edges:
187 >>> import pandas as pd
188 >>> pd.options.display.max_columns = 20
189 >>> df = pd.DataFrame([[1, 1], [2, 1]])
190 >>> df
191 0 1
192 0 1 1
193 1 2 1
194 >>> G = nx.from_pandas_adjacency(df)
195 >>> G.name = "Graph from pandas adjacency matrix"
196 >>> print(G)
197 Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges
198 """
200 try:
201 df = df[df.index]
202 except Exception as err:
203 missing = list(set(df.index).difference(set(df.columns)))
204 msg = f"{missing} not in columns"
205 raise nx.NetworkXError("Columns must match Indices.", msg) from err
207 A = df.values
208 G = from_numpy_array(A, create_using=create_using)
210 nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False)
211 return G
214@nx._dispatch(preserve_edge_attrs=True)
215def to_pandas_edgelist(
216 G,
217 source="source",
218 target="target",
219 nodelist=None,
220 dtype=None,
221 edge_key=None,
222):
223 """Returns the graph edge list as a Pandas DataFrame.
225 Parameters
226 ----------
227 G : graph
228 The NetworkX graph used to construct the Pandas DataFrame.
230 source : str or int, optional
231 A valid column name (string or integer) for the source nodes (for the
232 directed case).
234 target : str or int, optional
235 A valid column name (string or integer) for the target nodes (for the
236 directed case).
238 nodelist : list, optional
239 Use only nodes specified in nodelist
241 dtype : dtype, default None
242 Use to create the DataFrame. Data type to force.
243 Only a single dtype is allowed. If None, infer.
245 edge_key : str or int or None, optional (default=None)
246 A valid column name (string or integer) for the edge keys (for the
247 multigraph case). If None, edge keys are not stored in the DataFrame.
249 Returns
250 -------
251 df : Pandas DataFrame
252 Graph edge list
254 Examples
255 --------
256 >>> G = nx.Graph(
257 ... [
258 ... ("A", "B", {"cost": 1, "weight": 7}),
259 ... ("C", "E", {"cost": 9, "weight": 10}),
260 ... ]
261 ... )
262 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
263 >>> df[["source", "target", "cost", "weight"]]
264 source target cost weight
265 0 A B 1 7
266 1 C E 9 10
268 >>> G = nx.MultiGraph([('A', 'B', {'cost': 1}), ('A', 'B', {'cost': 9})])
269 >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'], edge_key='ekey')
270 >>> df[['source', 'target', 'cost', 'ekey']]
271 source target cost ekey
272 0 A B 1 0
273 1 A B 9 1
275 """
276 import pandas as pd
278 if nodelist is None:
279 edgelist = G.edges(data=True)
280 else:
281 edgelist = G.edges(nodelist, data=True)
282 source_nodes = [s for s, _, _ in edgelist]
283 target_nodes = [t for _, t, _ in edgelist]
285 all_attrs = set().union(*(d.keys() for _, _, d in edgelist))
286 if source in all_attrs:
287 raise nx.NetworkXError(f"Source name {source!r} is an edge attr name")
288 if target in all_attrs:
289 raise nx.NetworkXError(f"Target name {target!r} is an edge attr name")
291 nan = float("nan")
292 edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs}
294 if G.is_multigraph() and edge_key is not None:
295 if edge_key in all_attrs:
296 raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name")
297 edge_keys = [k for _, _, k in G.edges(keys=True)]
298 edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys}
299 else:
300 edgelistdict = {source: source_nodes, target: target_nodes}
302 edgelistdict.update(edge_attr)
303 return pd.DataFrame(edgelistdict, dtype=dtype)
306@nx._dispatch(graphs=None)
307def from_pandas_edgelist(
308 df,
309 source="source",
310 target="target",
311 edge_attr=None,
312 create_using=None,
313 edge_key=None,
314):
315 """Returns a graph from Pandas DataFrame containing an edge list.
317 The Pandas DataFrame should contain at least two columns of node names and
318 zero or more columns of edge attributes. Each row will be processed as one
319 edge instance.
321 Note: This function iterates over DataFrame.values, which is not
322 guaranteed to retain the data type across columns in the row. This is only
323 a problem if your row is entirely numeric and a mix of ints and floats. In
324 that case, all values will be returned as floats. See the
325 DataFrame.iterrows documentation for an example.
327 Parameters
328 ----------
329 df : Pandas DataFrame
330 An edge list representation of a graph
332 source : str or int
333 A valid column name (string or integer) for the source nodes (for the
334 directed case).
336 target : str or int
337 A valid column name (string or integer) for the target nodes (for the
338 directed case).
340 edge_attr : str or int, iterable, True, or None
341 A valid column name (str or int) or iterable of column names that are
342 used to retrieve items and add them to the graph as edge attributes.
343 If `True`, all of the remaining columns will be added.
344 If `None`, no edge attributes are added to the graph.
346 create_using : NetworkX graph constructor, optional (default=nx.Graph)
347 Graph type to create. If graph instance, then cleared before populated.
349 edge_key : str or None, optional (default=None)
350 A valid column name for the edge keys (for a MultiGraph). The values in
351 this column are used for the edge keys when adding edges if create_using
352 is a multigraph.
354 See Also
355 --------
356 to_pandas_edgelist
358 Examples
359 --------
360 Simple integer weights on edges:
362 >>> import pandas as pd
363 >>> pd.options.display.max_columns = 20
364 >>> import numpy as np
365 >>> rng = np.random.RandomState(seed=5)
366 >>> ints = rng.randint(1, 11, size=(3, 2))
367 >>> a = ["A", "B", "C"]
368 >>> b = ["D", "A", "E"]
369 >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
370 >>> df[0] = a
371 >>> df["b"] = b
372 >>> df[["weight", "cost", 0, "b"]]
373 weight cost 0 b
374 0 4 7 A D
375 1 7 1 B A
376 2 10 9 C E
377 >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
378 >>> G["E"]["C"]["weight"]
379 10
380 >>> G["E"]["C"]["cost"]
381 9
382 >>> edges = pd.DataFrame(
383 ... {
384 ... "source": [0, 1, 2],
385 ... "target": [2, 2, 3],
386 ... "weight": [3, 4, 5],
387 ... "color": ["red", "blue", "blue"],
388 ... }
389 ... )
390 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
391 >>> G[0][2]["color"]
392 'red'
394 Build multigraph with custom keys:
396 >>> edges = pd.DataFrame(
397 ... {
398 ... "source": [0, 1, 2, 0],
399 ... "target": [2, 2, 3, 2],
400 ... "my_edge_key": ["A", "B", "C", "D"],
401 ... "weight": [3, 4, 5, 6],
402 ... "color": ["red", "blue", "blue", "blue"],
403 ... }
404 ... )
405 >>> G = nx.from_pandas_edgelist(
406 ... edges,
407 ... edge_key="my_edge_key",
408 ... edge_attr=["weight", "color"],
409 ... create_using=nx.MultiGraph(),
410 ... )
411 >>> G[0][2]
412 AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
415 """
416 g = nx.empty_graph(0, create_using)
418 if edge_attr is None:
419 g.add_edges_from(zip(df[source], df[target]))
420 return g
422 reserved_columns = [source, target]
424 # Additional columns requested
425 attr_col_headings = []
426 attribute_data = []
427 if edge_attr is True:
428 attr_col_headings = [c for c in df.columns if c not in reserved_columns]
429 elif isinstance(edge_attr, (list, tuple)):
430 attr_col_headings = edge_attr
431 else:
432 attr_col_headings = [edge_attr]
433 if len(attr_col_headings) == 0:
434 raise nx.NetworkXError(
435 f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"
436 )
438 try:
439 attribute_data = zip(*[df[col] for col in attr_col_headings])
440 except (KeyError, TypeError) as err:
441 msg = f"Invalid edge_attr argument: {edge_attr}"
442 raise nx.NetworkXError(msg) from err
444 if g.is_multigraph():
445 # => append the edge keys from the df to the bundled data
446 if edge_key is not None:
447 try:
448 multigraph_edge_keys = df[edge_key]
449 attribute_data = zip(attribute_data, multigraph_edge_keys)
450 except (KeyError, TypeError) as err:
451 msg = f"Invalid edge_key argument: {edge_key}"
452 raise nx.NetworkXError(msg) from err
454 for s, t, attrs in zip(df[source], df[target], attribute_data):
455 if edge_key is not None:
456 attrs, multigraph_edge_key = attrs
457 key = g.add_edge(s, t, key=multigraph_edge_key)
458 else:
459 key = g.add_edge(s, t)
461 g[s][t][key].update(zip(attr_col_headings, attrs))
462 else:
463 for s, t, attrs in zip(df[source], df[target], attribute_data):
464 g.add_edge(s, t)
465 g[s][t].update(zip(attr_col_headings, attrs))
467 return g
470@nx._dispatch(edge_attrs="weight")
471def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"):
472 """Returns the graph adjacency matrix as a SciPy sparse array.
474 Parameters
475 ----------
476 G : graph
477 The NetworkX graph used to construct the sparse matrix.
479 nodelist : list, optional
480 The rows and columns are ordered according to the nodes in `nodelist`.
481 If `nodelist` is None, then the ordering is produced by G.nodes().
483 dtype : NumPy data-type, optional
484 A valid NumPy dtype used to initialize the array. If None, then the
485 NumPy default is used.
487 weight : string or None optional (default='weight')
488 The edge attribute that holds the numerical value used for
489 the edge weight. If None then all edge weights are 1.
491 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
492 The type of the matrix to be returned (default 'csr'). For
493 some algorithms different implementations of sparse matrices
494 can perform better. See [1]_ for details.
496 Returns
497 -------
498 A : SciPy sparse array
499 Graph adjacency matrix.
501 Notes
502 -----
503 For directed graphs, matrix entry i,j corresponds to an edge from i to j.
505 The matrix entries are populated using the edge attribute held in
506 parameter weight. When an edge does not have that attribute, the
507 value of the entry is 1.
509 For multiple edges the matrix values are the sums of the edge weights.
511 When `nodelist` does not contain every node in `G`, the adjacency matrix
512 is built from the subgraph of `G` that is induced by the nodes in
513 `nodelist`.
515 The convention used for self-loop edges in graphs is to assign the
516 diagonal matrix entry value to the weight attribute of the edge
517 (or the number 1 if the edge has no weight attribute). If the
518 alternate convention of doubling the edge weight is desired the
519 resulting SciPy sparse array can be modified as follows:
521 >>> G = nx.Graph([(1, 1)])
522 >>> A = nx.to_scipy_sparse_array(G)
523 >>> print(A.todense())
524 [[1]]
525 >>> A.setdiag(A.diagonal() * 2)
526 >>> print(A.toarray())
527 [[2]]
529 Examples
530 --------
531 >>> G = nx.MultiDiGraph()
532 >>> G.add_edge(0, 1, weight=2)
533 0
534 >>> G.add_edge(1, 0)
535 0
536 >>> G.add_edge(2, 2, weight=3)
537 0
538 >>> G.add_edge(2, 2)
539 1
540 >>> S = nx.to_scipy_sparse_array(G, nodelist=[0, 1, 2])
541 >>> print(S.toarray())
542 [[0 2 0]
543 [1 0 0]
544 [0 0 4]]
546 References
547 ----------
548 .. [1] Scipy Dev. References, "Sparse Matrices",
549 https://docs.scipy.org/doc/scipy/reference/sparse.html
550 """
551 import scipy as sp
553 if len(G) == 0:
554 raise nx.NetworkXError("Graph has no nodes or edges")
556 if nodelist is None:
557 nodelist = list(G)
558 nlen = len(G)
559 else:
560 nlen = len(nodelist)
561 if nlen == 0:
562 raise nx.NetworkXError("nodelist has no nodes")
563 nodeset = set(G.nbunch_iter(nodelist))
564 if nlen != len(nodeset):
565 for n in nodelist:
566 if n not in G:
567 raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
568 raise nx.NetworkXError("nodelist contains duplicates.")
569 if nlen < len(G):
570 G = G.subgraph(nodelist)
572 index = dict(zip(nodelist, range(nlen)))
573 coefficients = zip(
574 *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
575 )
576 try:
577 row, col, data = coefficients
578 except ValueError:
579 # there is no edge in the subgraph
580 row, col, data = [], [], []
582 if G.is_directed():
583 A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
584 else:
585 # symmetrize matrix
586 d = data + data
587 r = row + col
588 c = col + row
589 # selfloop entries get double counted when symmetrizing
590 # so we subtract the data on the diagonal
591 selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
592 if selfloops:
593 diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
594 d += diag_data
595 r += diag_index
596 c += diag_index
597 A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
598 try:
599 return A.asformat(format)
600 except ValueError as err:
601 raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err
604def _csr_gen_triples(A):
605 """Converts a SciPy sparse array in **Compressed Sparse Row** format to
606 an iterable of weighted edge triples.
608 """
609 nrows = A.shape[0]
610 data, indices, indptr = A.data, A.indices, A.indptr
611 for i in range(nrows):
612 for j in range(indptr[i], indptr[i + 1]):
613 yield i, int(indices[j]), data[j]
616def _csc_gen_triples(A):
617 """Converts a SciPy sparse array in **Compressed Sparse Column** format to
618 an iterable of weighted edge triples.
620 """
621 ncols = A.shape[1]
622 data, indices, indptr = A.data, A.indices, A.indptr
623 for i in range(ncols):
624 for j in range(indptr[i], indptr[i + 1]):
625 yield int(indices[j]), i, data[j]
628def _coo_gen_triples(A):
629 """Converts a SciPy sparse array in **Coordinate** format to an iterable
630 of weighted edge triples.
632 """
633 return ((int(i), int(j), d) for i, j, d in zip(A.row, A.col, A.data))
636def _dok_gen_triples(A):
637 """Converts a SciPy sparse array in **Dictionary of Keys** format to an
638 iterable of weighted edge triples.
640 """
641 for (r, c), v in A.items():
642 yield r, c, v
645def _generate_weighted_edges(A):
646 """Returns an iterable over (u, v, w) triples, where u and v are adjacent
647 vertices and w is the weight of the edge joining u and v.
649 `A` is a SciPy sparse array (in any format).
651 """
652 if A.format == "csr":
653 return _csr_gen_triples(A)
654 if A.format == "csc":
655 return _csc_gen_triples(A)
656 if A.format == "dok":
657 return _dok_gen_triples(A)
658 # If A is in any other format (including COO), convert it to COO format.
659 return _coo_gen_triples(A.tocoo())
662@nx._dispatch(graphs=None)
663def from_scipy_sparse_array(
664 A, parallel_edges=False, create_using=None, edge_attribute="weight"
665):
666 """Creates a new graph from an adjacency matrix given as a SciPy sparse
667 array.
669 Parameters
670 ----------
671 A: scipy.sparse array
672 An adjacency matrix representation of a graph
674 parallel_edges : Boolean
675 If this is True, `create_using` is a multigraph, and `A` is an
676 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
677 number of parallel edges joining vertices *i* and *j* in the graph.
678 If it is False, then the entries in the matrix are interpreted as
679 the weight of a single edge joining the vertices.
681 create_using : NetworkX graph constructor, optional (default=nx.Graph)
682 Graph type to create. If graph instance, then cleared before populated.
684 edge_attribute: string
685 Name of edge attribute to store matrix numeric value. The data will
686 have the same type as the matrix entry (int, float, (real,imag)).
688 Notes
689 -----
690 For directed graphs, explicitly mention create_using=nx.DiGraph,
691 and entry i,j of A corresponds to an edge from i to j.
693 If `create_using` is :class:`networkx.MultiGraph` or
694 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
695 entries of `A` are of type :class:`int`, then this function returns a
696 multigraph (constructed from `create_using`) with parallel edges.
697 In this case, `edge_attribute` will be ignored.
699 If `create_using` indicates an undirected multigraph, then only the edges
700 indicated by the upper triangle of the matrix `A` will be added to the
701 graph.
703 Examples
704 --------
705 >>> import scipy as sp
706 >>> A = sp.sparse.eye(2, 2, 1)
707 >>> G = nx.from_scipy_sparse_array(A)
709 If `create_using` indicates a multigraph and the matrix has only integer
710 entries and `parallel_edges` is False, then the entries will be treated
711 as weights for edges joining the nodes (without creating parallel edges):
713 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
714 >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph)
715 >>> G[1][1]
716 AtlasView({0: {'weight': 2}})
718 If `create_using` indicates a multigraph and the matrix has only integer
719 entries and `parallel_edges` is True, then the entries will be treated
720 as the number of parallel edges joining those two vertices:
722 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
723 >>> G = nx.from_scipy_sparse_array(
724 ... A, parallel_edges=True, create_using=nx.MultiGraph
725 ... )
726 >>> G[1][1]
727 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
729 """
730 G = nx.empty_graph(0, create_using)
731 n, m = A.shape
732 if n != m:
733 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
734 # Make sure we get even the isolated nodes of the graph.
735 G.add_nodes_from(range(n))
736 # Create an iterable over (u, v, w) triples and for each triple, add an
737 # edge from u to v with weight w.
738 triples = _generate_weighted_edges(A)
739 # If the entries in the adjacency matrix are integers, the graph is a
740 # multigraph, and parallel_edges is True, then create parallel edges, each
741 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
742 # one edge for each positive entry in the adjacency matrix and set the
743 # weight of that edge to be the entry in the matrix.
744 if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:
745 chain = itertools.chain.from_iterable
746 # The following line is equivalent to:
747 #
748 # for (u, v) in edges:
749 # for d in range(A[u, v]):
750 # G.add_edge(u, v, weight=1)
751 #
752 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
753 # If we are creating an undirected multigraph, only add the edges from the
754 # upper triangle of the matrix. Otherwise, add all the edges. This relies
755 # on the fact that the vertices created in the
756 # `_generated_weighted_edges()` function are actually the row/column
757 # indices for the matrix `A`.
758 #
759 # Without this check, we run into a problem where each edge is added twice
760 # when `G.add_weighted_edges_from()` is invoked below.
761 if G.is_multigraph() and not G.is_directed():
762 triples = ((u, v, d) for u, v, d in triples if u <= v)
763 G.add_weighted_edges_from(triples, weight=edge_attribute)
764 return G
767@nx._dispatch(edge_attrs="weight") # edge attrs may also be obtained from `dtype`
768def to_numpy_array(
769 G,
770 nodelist=None,
771 dtype=None,
772 order=None,
773 multigraph_weight=sum,
774 weight="weight",
775 nonedge=0.0,
776):
777 """Returns the graph adjacency matrix as a NumPy array.
779 Parameters
780 ----------
781 G : graph
782 The NetworkX graph used to construct the NumPy array.
784 nodelist : list, optional
785 The rows and columns are ordered according to the nodes in `nodelist`.
786 If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``.
788 dtype : NumPy data type, optional
789 A NumPy data type used to initialize the array. If None, then the NumPy
790 default is used. The dtype can be structured if `weight=None`, in which
791 case the dtype field names are used to look up edge attributes. The
792 result is a structured array where each named field in the dtype
793 corresponds to the adjacency for that edge attribute. See examples for
794 details.
796 order : {'C', 'F'}, optional
797 Whether to store multidimensional data in C- or Fortran-contiguous
798 (row- or column-wise) order in memory. If None, then the NumPy default
799 is used.
801 multigraph_weight : callable, optional
802 An function that determines how weights in multigraphs are handled.
803 The function should accept a sequence of weights and return a single
804 value. The default is to sum the weights of the multiple edges.
806 weight : string or None optional (default = 'weight')
807 The edge attribute that holds the numerical value used for
808 the edge weight. If an edge does not have that attribute, then the
809 value 1 is used instead. `weight` must be ``None`` if a structured
810 dtype is used.
812 nonedge : array_like (default = 0.0)
813 The value used to represent non-edges in the adjacency matrix.
814 The array values corresponding to nonedges are typically set to zero.
815 However, this could be undesirable if there are array values
816 corresponding to actual edges that also have the value zero. If so,
817 one might prefer nonedges to have some other value, such as ``nan``.
819 Returns
820 -------
821 A : NumPy ndarray
822 Graph adjacency matrix
824 Raises
825 ------
826 NetworkXError
827 If `dtype` is a structured dtype and `G` is a multigraph
828 ValueError
829 If `dtype` is a structured dtype and `weight` is not `None`
831 See Also
832 --------
833 from_numpy_array
835 Notes
836 -----
837 For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``.
839 Entries in the adjacency matrix are given by the `weight` edge attribute.
840 When an edge does not have a weight attribute, the value of the entry is
841 set to the number 1. For multiple (parallel) edges, the values of the
842 entries are determined by the `multigraph_weight` parameter. The default is
843 to sum the weight attributes for each of the parallel edges.
845 When `nodelist` does not contain every node in `G`, the adjacency matrix is
846 built from the subgraph of `G` that is induced by the nodes in `nodelist`.
848 The convention used for self-loop edges in graphs is to assign the
849 diagonal array entry value to the weight attribute of the edge
850 (or the number 1 if the edge has no weight attribute). If the
851 alternate convention of doubling the edge weight is desired the
852 resulting NumPy array can be modified as follows:
854 >>> import numpy as np
855 >>> G = nx.Graph([(1, 1)])
856 >>> A = nx.to_numpy_array(G)
857 >>> A
858 array([[1.]])
859 >>> A[np.diag_indices_from(A)] *= 2
860 >>> A
861 array([[2.]])
863 Examples
864 --------
865 >>> G = nx.MultiDiGraph()
866 >>> G.add_edge(0, 1, weight=2)
867 0
868 >>> G.add_edge(1, 0)
869 0
870 >>> G.add_edge(2, 2, weight=3)
871 0
872 >>> G.add_edge(2, 2)
873 1
874 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
875 array([[0., 2., 0.],
876 [1., 0., 0.],
877 [0., 0., 4.]])
879 When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist`
880 and their edges are not included in the adjacency matrix. Here is an example:
882 >>> G = nx.Graph()
883 >>> G.add_edge(3, 1)
884 >>> G.add_edge(2, 0)
885 >>> G.add_edge(2, 1)
886 >>> G.add_edge(3, 0)
887 >>> nx.to_numpy_array(G, nodelist=[1, 2, 3])
888 array([[0., 1., 1.],
889 [1., 0., 0.],
890 [1., 0., 0.]])
892 This function can also be used to create adjacency matrices for multiple
893 edge attributes with structured dtypes:
895 >>> G = nx.Graph()
896 >>> G.add_edge(0, 1, weight=10)
897 >>> G.add_edge(1, 2, cost=5)
898 >>> G.add_edge(2, 3, weight=3, cost=-4.0)
899 >>> dtype = np.dtype([("weight", int), ("cost", float)])
900 >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None)
901 >>> A["weight"]
902 array([[ 0, 10, 0, 0],
903 [10, 0, 1, 0],
904 [ 0, 1, 0, 3],
905 [ 0, 0, 3, 0]])
906 >>> A["cost"]
907 array([[ 0., 1., 0., 0.],
908 [ 1., 0., 5., 0.],
909 [ 0., 5., 0., -4.],
910 [ 0., 0., -4., 0.]])
912 As stated above, the argument "nonedge" is useful especially when there are
913 actually edges with weight 0 in the graph. Setting a nonedge value different than 0,
914 makes it much clearer to differentiate such 0-weighted edges and actual nonedge values.
916 >>> G = nx.Graph()
917 >>> G.add_edge(3, 1, weight=2)
918 >>> G.add_edge(2, 0, weight=0)
919 >>> G.add_edge(2, 1, weight=0)
920 >>> G.add_edge(3, 0, weight=1)
921 >>> nx.to_numpy_array(G, nonedge=-1.)
922 array([[-1., 2., -1., 1.],
923 [ 2., -1., 0., -1.],
924 [-1., 0., -1., 0.],
925 [ 1., -1., 0., -1.]])
926 """
927 import numpy as np
929 if nodelist is None:
930 nodelist = list(G)
931 nlen = len(nodelist)
933 # Input validation
934 nodeset = set(nodelist)
935 if nodeset - set(G):
936 raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G")
937 if len(nodeset) < nlen:
938 raise nx.NetworkXError("nodelist contains duplicates.")
940 A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order)
942 # Corner cases: empty nodelist or graph without any edges
943 if nlen == 0 or G.number_of_edges() == 0:
944 return A
946 # If dtype is structured and weight is None, use dtype field names as
947 # edge attributes
948 edge_attrs = None # Only single edge attribute by default
949 if A.dtype.names:
950 if weight is None:
951 edge_attrs = dtype.names
952 else:
953 raise ValueError(
954 "Specifying `weight` not supported for structured dtypes\n."
955 "To create adjacency matrices from structured dtypes, use `weight=None`."
956 )
958 # Map nodes to row/col in matrix
959 idx = dict(zip(nodelist, range(nlen)))
960 if len(nodelist) < len(G):
961 G = G.subgraph(nodelist).copy()
963 # Collect all edge weights and reduce with `multigraph_weights`
964 if G.is_multigraph():
965 if edge_attrs:
966 raise nx.NetworkXError(
967 "Structured arrays are not supported for MultiGraphs"
968 )
969 d = defaultdict(list)
970 for u, v, wt in G.edges(data=weight, default=1.0):
971 d[(idx[u], idx[v])].append(wt)
972 i, j = np.array(list(d.keys())).T # indices
973 wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights
974 else:
975 i, j, wts = [], [], []
977 # Special branch: multi-attr adjacency from structured dtypes
978 if edge_attrs:
979 # Extract edges with all data
980 for u, v, data in G.edges(data=True):
981 i.append(idx[u])
982 j.append(idx[v])
983 wts.append(data)
984 # Map each attribute to the appropriate named field in the
985 # structured dtype
986 for attr in edge_attrs:
987 attr_data = [wt.get(attr, 1.0) for wt in wts]
988 A[attr][i, j] = attr_data
989 if not G.is_directed():
990 A[attr][j, i] = attr_data
991 return A
993 for u, v, wt in G.edges(data=weight, default=1.0):
994 i.append(idx[u])
995 j.append(idx[v])
996 wts.append(wt)
998 # Set array values with advanced indexing
999 A[i, j] = wts
1000 if not G.is_directed():
1001 A[j, i] = wts
1003 return A
1006@nx._dispatch(graphs=None)
1007def from_numpy_array(A, parallel_edges=False, create_using=None, edge_attr="weight"):
1008 """Returns a graph from a 2D NumPy array.
1010 The 2D NumPy array is interpreted as an adjacency matrix for the graph.
1012 Parameters
1013 ----------
1014 A : a 2D numpy.ndarray
1015 An adjacency matrix representation of a graph
1017 parallel_edges : Boolean
1018 If this is True, `create_using` is a multigraph, and `A` is an
1019 integer array, then entry *(i, j)* in the array is interpreted as the
1020 number of parallel edges joining vertices *i* and *j* in the graph.
1021 If it is False, then the entries in the array are interpreted as
1022 the weight of a single edge joining the vertices.
1024 create_using : NetworkX graph constructor, optional (default=nx.Graph)
1025 Graph type to create. If graph instance, then cleared before populated.
1027 edge_attr : String, optional (default="weight")
1028 The attribute to which the array values are assigned on each edge. If
1029 it is None, edge attributes will not be assigned.
1031 Notes
1032 -----
1033 For directed graphs, explicitly mention create_using=nx.DiGraph,
1034 and entry i,j of A corresponds to an edge from i to j.
1036 If `create_using` is :class:`networkx.MultiGraph` or
1037 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1038 entries of `A` are of type :class:`int`, then this function returns a
1039 multigraph (of the same type as `create_using`) with parallel edges.
1041 If `create_using` indicates an undirected multigraph, then only the edges
1042 indicated by the upper triangle of the array `A` will be added to the
1043 graph.
1045 If `edge_attr` is Falsy (False or None), edge attributes will not be
1046 assigned, and the array data will be treated like a binary mask of
1047 edge presence or absence. Otherwise, the attributes will be assigned
1048 as follows:
1050 If the NumPy array has a single data type for each array entry it
1051 will be converted to an appropriate Python data type.
1053 If the NumPy array has a user-specified compound data type the names
1054 of the data fields will be used as attribute keys in the resulting
1055 NetworkX graph.
1057 See Also
1058 --------
1059 to_numpy_array
1061 Examples
1062 --------
1063 Simple integer weights on edges:
1065 >>> import numpy as np
1066 >>> A = np.array([[1, 1], [2, 1]])
1067 >>> G = nx.from_numpy_array(A)
1068 >>> G.edges(data=True)
1069 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
1071 If `create_using` indicates a multigraph and the array has only integer
1072 entries and `parallel_edges` is False, then the entries will be treated
1073 as weights for edges joining the nodes (without creating parallel edges):
1075 >>> A = np.array([[1, 1], [1, 2]])
1076 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
1077 >>> G[1][1]
1078 AtlasView({0: {'weight': 2}})
1080 If `create_using` indicates a multigraph and the array has only integer
1081 entries and `parallel_edges` is True, then the entries will be treated
1082 as the number of parallel edges joining those two vertices:
1084 >>> A = np.array([[1, 1], [1, 2]])
1085 >>> temp = nx.MultiGraph()
1086 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
1087 >>> G[1][1]
1088 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1090 User defined compound data type on edges:
1092 >>> dt = [("weight", float), ("cost", int)]
1093 >>> A = np.array([[(1.0, 2)]], dtype=dt)
1094 >>> G = nx.from_numpy_array(A)
1095 >>> G.edges()
1096 EdgeView([(0, 0)])
1097 >>> G[0][0]["cost"]
1098 2
1099 >>> G[0][0]["weight"]
1100 1.0
1102 """
1103 kind_to_python_type = {
1104 "f": float,
1105 "i": int,
1106 "u": int,
1107 "b": bool,
1108 "c": complex,
1109 "S": str,
1110 "U": str,
1111 "V": "void",
1112 }
1113 G = nx.empty_graph(0, create_using)
1114 if A.ndim != 2:
1115 raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}")
1116 n, m = A.shape
1117 if n != m:
1118 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
1119 dt = A.dtype
1120 try:
1121 python_type = kind_to_python_type[dt.kind]
1122 except Exception as err:
1123 raise TypeError(f"Unknown numpy data type: {dt}") from err
1125 # Make sure we get even the isolated nodes of the graph.
1126 G.add_nodes_from(range(n))
1127 # Get a list of all the entries in the array with nonzero entries. These
1128 # coordinates become edges in the graph. (convert to int from np.int64)
1129 edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero()))
1130 # handle numpy constructed data type
1131 if python_type == "void":
1132 # Sort the fields by their offset, then by dtype, then by name.
1133 fields = sorted(
1134 (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()
1135 )
1136 triples = (
1137 (
1138 u,
1139 v,
1140 {
1141 name: kind_to_python_type[dtype.kind](val)
1142 for (_, dtype, name), val in zip(fields, A[u, v])
1143 }
1144 if edge_attr
1145 else {},
1146 )
1147 for u, v in edges
1148 )
1149 # If the entries in the adjacency matrix are integers, the graph is a
1150 # multigraph, and parallel_edges is True, then create parallel edges, each
1151 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
1152 # one edge for each positive entry in the adjacency matrix and set the
1153 # weight of that edge to be the entry in the matrix.
1154 elif python_type is int and G.is_multigraph() and parallel_edges:
1155 chain = itertools.chain.from_iterable
1156 # The following line is equivalent to:
1157 #
1158 # for (u, v) in edges:
1159 # for d in range(A[u, v]):
1160 # G.add_edge(u, v, weight=1)
1161 #
1162 if edge_attr:
1163 triples = chain(
1164 ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges
1165 )
1166 else:
1167 triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges)
1168 else: # basic data type
1169 if edge_attr:
1170 triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges)
1171 else:
1172 triples = ((u, v, {}) for u, v in edges)
1173 # If we are creating an undirected multigraph, only add the edges from the
1174 # upper triangle of the matrix. Otherwise, add all the edges. This relies
1175 # on the fact that the vertices created in the
1176 # `_generated_weighted_edges()` function are actually the row/column
1177 # indices for the matrix `A`.
1178 #
1179 # Without this check, we run into a problem where each edge is added twice
1180 # when `G.add_edges_from()` is invoked below.
1181 if G.is_multigraph() and not G.is_directed():
1182 triples = ((u, v, d) for u, v, d in triples if u <= v)
1183 G.add_edges_from(triples)
1184 return G